From 32264d8ccf314d83a4aa5b2ed62be7025f29dcdc Mon Sep 17 00:00:00 2001 From: Dennis Haupt Date: Tue, 3 Mar 2026 12:50:41 +0100 Subject: [PATCH 01/15] feat: Implement logic for Chromosome.php, GenomicPosition.php and GenomicRegion.php --- src/Chromosome.php | 38 ++++++++++++++++ src/GenomicPosition.php | 35 +++++++++++++++ src/GenomicRegion.php | 83 +++++++++++++++++++++++++++++++++++ src/ReferenzGenome.php | 16 +++++++ tests/ChromosomeTest.php | 35 +++++++++++++++ tests/GenomicPositionTest.php | 36 +++++++++++++++ tests/GenomicRegionTest.php | 79 +++++++++++++++++++++++++++++++++ 7 files changed, 322 insertions(+) create mode 100644 src/Chromosome.php create mode 100644 src/GenomicPosition.php create mode 100644 src/GenomicRegion.php create mode 100644 src/ReferenzGenome.php create mode 100644 tests/ChromosomeTest.php create mode 100644 tests/GenomicPositionTest.php create mode 100644 tests/GenomicRegionTest.php diff --git a/src/Chromosome.php b/src/Chromosome.php new file mode 100644 index 0000000..168498b --- /dev/null +++ b/src/Chromosome.php @@ -0,0 +1,38 @@ +referenceGenome = $matches[1] === 'chr' + ? new ReferenzGenome(ReferenzGenome::HG_19) + : new ReferenzGenome(ReferenzGenome::GRCH_37); + + $this->value = $matches[2]; + } + + public function toString(?ReferenzGenome $referenceGenome = null): string + { + $referenceGenome = $referenceGenome ?? $this->referenceGenome; + + switch($referenceGenome->value){ + case ReferenzGenome::HG_19: + return "chr{$this->value}"; + case ReferenzGenome::GRCH_37: + return $this->value; + default: + throw new \InvalidArgumentException("Invalid reference genome: {$referenceGenome->value}"); + } + } +} \ No newline at end of file diff --git a/src/GenomicPosition.php b/src/GenomicPosition.php new file mode 100644 index 0000000..11650e2 --- /dev/null +++ b/src/GenomicPosition.php @@ -0,0 +1,35 @@ +chromosome = $chromosome; + $this->position = $position; + } + + /** @example GenomicPosition::fromString('chr1:123456') */ + public static function parse(string $genomicPosition): self + { + if (\Safe\preg_match('/^(.+):(g|)(\d+)$/', $genomicPosition, $matches) === 0) { + throw new \InvalidArgumentException("Invalid genomic position format: {$genomicPosition}. Expected format: chr1:123456."); + } + + return new self(new Chromosome($matches[1]), (int) $matches[3]); + } + + public function toString(?ReferenzGenome $referenceGenome = null): string + { + return "{$this->chromosome->toString($referenceGenome)}:{$this->position}"; + } +} \ No newline at end of file diff --git a/src/GenomicRegion.php b/src/GenomicRegion.php new file mode 100644 index 0000000..97a2635 --- /dev/null +++ b/src/GenomicRegion.php @@ -0,0 +1,83 @@ + $end) { + throw new \InvalidArgumentException("End ({$end}) must be greater then start ({$start})"); + } + + $this->chromosome = $chromosome; + $this->start = $start; + $this->end = $end; + } + + public static function parse(?string $genomicRegion): ?self + { + if ($genomicRegion === null || $genomicRegion === '') { + return null; + } + + if (preg_match('/^(.+):(g|)(\d+)(-(\d+)|)$/', $genomicRegion, $matches) === 0) { + throw new \InvalidArgumentException("Invalid genomic region format: {$genomicRegion}. Expected format: chr1:123-456."); + } + + return new self( + new Chromosome($matches[1]), + (int) $matches[3], + (int) ($matches[5] ?? $matches[3]) + ); + } + + public function containsGenomicPosition(GenomicPosition $genomicPosition): bool + { + return $this->chromosome->toString() === $genomicPosition->chromosome->toString() + && $this->positionIsBetweenStartAndEnd($genomicPosition->position); + } + + public function containsGenomicRegion(GenomicRegion $genomicRegion): bool + { + return $this->chromosome->toString() === $genomicRegion->chromosome->toString() + && $this->positionIsBetweenStartAndEnd($genomicRegion->start) + && $this->positionIsBetweenStartAndEnd($genomicRegion->end); + } + + public function intersectsWithGenomicRegion(GenomicRegion $genomicRegion): bool + { + return $this->chromosome->toString() === $genomicRegion->chromosome->toString() + && ( + $this->positionIsBetweenStartAndEnd($genomicRegion->start) + || $this->positionIsBetweenStartAndEnd($genomicRegion->end) + ); + } + + private function positionIsBetweenStartAndEnd(int $position): bool + { + return $position >= $this->start && $position <= $this->end; + } + + public function toString(?ReferenzGenome $referenceGenome = null): string + { + return "{$this->chromosome->toString($referenceGenome)}:{$this->start}-{$this->end}"; + } +} diff --git a/src/ReferenzGenome.php b/src/ReferenzGenome.php new file mode 100644 index 0000000..e682eeb --- /dev/null +++ b/src/ReferenzGenome.php @@ -0,0 +1,16 @@ +value = $value; + } +} \ No newline at end of file diff --git a/tests/ChromosomeTest.php b/tests/ChromosomeTest.php new file mode 100644 index 0000000..ca83f95 --- /dev/null +++ b/tests/ChromosomeTest.php @@ -0,0 +1,35 @@ +toString()); + } + + public function testToStringWithGRC37(): void + { + $chromosome = new Chromosome('chr11'); + self::assertSame('11', $chromosome->toString(new ReferenzGenome(ReferenzGenome::GRCH_37))); + } + + public function testInitWithGRC37(): void + { + $chromosome = new Chromosome('11'); + self::assertSame('11', $chromosome->toString()); + } + + public function testFailedInit(): void + { + $chromosomeAsString = 'FOO11'; + self::expectException(\InvalidArgumentException::class); + self::expectExceptionMessage("Invalid chromosome: {$chromosomeAsString}. Expected format: chr1-chr22, chrX, chrY, chrM, or without chr prefix."); + new Chromosome($chromosomeAsString); + } +} diff --git a/tests/GenomicPositionTest.php b/tests/GenomicPositionTest.php new file mode 100644 index 0000000..b224262 --- /dev/null +++ b/tests/GenomicPositionTest.php @@ -0,0 +1,36 @@ +toString()); + } + + public function testParseOnSuccessGRC37(): void + { + $genomicPosition = GenomicPosition::parse('11:1'); + self::assertSame('11:1', $genomicPosition->toString()); + } + + public function testParseOnSuccessHGVSg(): void + { + $genomicPosition = GenomicPosition::parse('chr11:g1'); + self::assertSame('chr11:1', $genomicPosition->toString()); + } + + public function testParseOnError(): void + { + $genomicPositionAsString = '11:1test'; + self::expectException(\InvalidArgumentException::class); + self::expectExceptionMessage("Invalid genomic position format: {$genomicPositionAsString}. Expected format: chr1:123456."); + GenomicPosition::parse($genomicPositionAsString); + } +} diff --git a/tests/GenomicRegionTest.php b/tests/GenomicRegionTest.php new file mode 100644 index 0000000..b2fdb92 --- /dev/null +++ b/tests/GenomicRegionTest.php @@ -0,0 +1,79 @@ +toString()); + } + + public function testParseOnSuccessGRC37(): void + { + $genomicRegion = GenomicRegion::parse('11:1-2'); + self::assertSame('11:1-2', $genomicRegion->toString()); + } + + public function testParseOnSuccessHGVSg(): void + { + $genomicRegion = GenomicRegion::parse('chr11:g1-2'); + self::assertSame('chr11:1-2', $genomicRegion->toString()); + } + + public function testParseOnError(): void + { + $genomicRegionAsString = '11:1_2'; + self::expectException(\InvalidArgumentException::class); + self::expectExceptionMessage("Invalid genomic region format: {$genomicRegionAsString}. Expected format: chr1:123-456."); + GenomicRegion::parse($genomicRegionAsString); + } + + public function testStartIsGerateThenEnd(): void + { + $genomicRegionAsString = '11:2-1'; + self::expectException(\InvalidArgumentException::class); + self::expectExceptionMessage("End (1) must be greater then start (2)"); + GenomicRegion::parse($genomicRegionAsString); + } + + public function testContainsGenomicPositionIsTrue(): void + { + $genomicRegion = GenomicRegion::parse('chr11:g1-20'); + self::assertTrue($genomicRegion->containsGenomicPosition(GenomicPosition::parse('chr11:20'))); + } + + public function testContainsGenomicPositionIsFalse(): void + { + $genomicRegion = GenomicRegion::parse('chr11:g1-20'); + self::assertFalse($genomicRegion->containsGenomicPosition(GenomicPosition::parse('chr11:21'))); + } + + public function testContainsGenomicRegionIsTrue(): void + { + $genomicRegion = GenomicRegion::parse('chr11:g1-20'); + self::assertTrue($genomicRegion->containsGenomicRegion(GenomicRegion::parse('chr11:19-20'))); + } + + public function testContainsGenomicRegionIsFalse(): void + { + $genomicRegion = GenomicRegion::parse('chr11:g1-20'); + self::assertFalse($genomicRegion->containsGenomicRegion(GenomicRegion::parse('chr11:21-22'))); + } + + public function testIntersectsWithGenomicRegionIsTrue(): void + { + $genomicRegion = GenomicRegion::parse('chr11:g20-30'); + self::assertTrue($genomicRegion->intersectsWithGenomicRegion(GenomicRegion::parse('chr11:15-25'))); + } + + public function testIntersectsWithGenomicRegionIsFalse(): void + { + $genomicRegion = GenomicRegion::parse('chr11:g20-30'); + self::assertFalse($genomicRegion->intersectsWithGenomicRegion(GenomicRegion::parse('chr11:15-19'))); + } +} From e0c815e3df80d7bb487a383c5ff01c7ee3aa47be Mon Sep 17 00:00:00 2001 From: KingKong1213 <168984406+KingKong1213@users.noreply.github.com> Date: Tue, 3 Mar 2026 11:51:55 +0000 Subject: [PATCH 02/15] Apply php-cs-fixer changes --- src/Chromosome.php | 7 ++++--- src/GenomicPosition.php | 2 +- src/GenomicRegion.php | 2 ++ src/ReferenzGenome.php | 4 ++-- tests/ChromosomeTest.php | 1 - tests/GenomicPositionTest.php | 3 --- tests/GenomicRegionTest.php | 3 +-- 7 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/Chromosome.php b/src/Chromosome.php index 168498b..f7ff23d 100644 --- a/src/Chromosome.php +++ b/src/Chromosome.php @@ -8,6 +8,7 @@ class Chromosome public const CHROMOSOME_REGEX = '/^(chr)?(1[0-9]|[1-9]|2[0-2]|X|Y|M|MT)$/i'; private string $value; + private ReferenzGenome $referenceGenome; public function __construct(string $chromosomeAsString) @@ -24,9 +25,9 @@ public function __construct(string $chromosomeAsString) public function toString(?ReferenzGenome $referenceGenome = null): string { - $referenceGenome = $referenceGenome ?? $this->referenceGenome; + $referenceGenome ??= $this->referenceGenome; - switch($referenceGenome->value){ + switch ($referenceGenome->value) { case ReferenzGenome::HG_19: return "chr{$this->value}"; case ReferenzGenome::GRCH_37: @@ -35,4 +36,4 @@ public function toString(?ReferenzGenome $referenceGenome = null): string throw new \InvalidArgumentException("Invalid reference genome: {$referenceGenome->value}"); } } -} \ No newline at end of file +} diff --git a/src/GenomicPosition.php b/src/GenomicPosition.php index 11650e2..5c5fb4b 100644 --- a/src/GenomicPosition.php +++ b/src/GenomicPosition.php @@ -32,4 +32,4 @@ public function toString(?ReferenzGenome $referenceGenome = null): string { return "{$this->chromosome->toString($referenceGenome)}:{$this->position}"; } -} \ No newline at end of file +} diff --git a/src/GenomicRegion.php b/src/GenomicRegion.php index 97a2635..4b802d3 100644 --- a/src/GenomicRegion.php +++ b/src/GenomicRegion.php @@ -7,7 +7,9 @@ final class GenomicRegion { public Chromosome $chromosome; + public int $start; + public int $end; public function __construct( diff --git a/src/ReferenzGenome.php b/src/ReferenzGenome.php index e682eeb..97989a4 100644 --- a/src/ReferenzGenome.php +++ b/src/ReferenzGenome.php @@ -1,4 +1,4 @@ -value = $value; } -} \ No newline at end of file +} diff --git a/tests/ChromosomeTest.php b/tests/ChromosomeTest.php index ca83f95..e9a2e78 100644 --- a/tests/ChromosomeTest.php +++ b/tests/ChromosomeTest.php @@ -1,6 +1,5 @@ Date: Tue, 3 Mar 2026 14:18:28 +0100 Subject: [PATCH 03/15] Use NamingConvention.php instead of ReferenceGenome --- src/Chromosome.php | 16 ++++++++-------- src/GenomicPosition.php | 2 +- src/GenomicRegion.php | 8 ++------ src/{ReferenzGenome.php => NamingConvention.php} | 6 +++--- tests/ChromosomeTest.php | 4 ++-- tests/GenomicPositionTest.php | 1 + 6 files changed, 17 insertions(+), 20 deletions(-) rename src/{ReferenzGenome.php => NamingConvention.php} (65%) diff --git a/src/Chromosome.php b/src/Chromosome.php index f7ff23d..5158fc3 100644 --- a/src/Chromosome.php +++ b/src/Chromosome.php @@ -9,28 +9,28 @@ class Chromosome private string $value; - private ReferenzGenome $referenceGenome; + private NamingConvention $namingConvention; public function __construct(string $chromosomeAsString) { if (\Safe\preg_match(self::CHROMOSOME_REGEX, $chromosomeAsString, $matches) === 0) { throw new \InvalidArgumentException("Invalid chromosome: {$chromosomeAsString}. Expected format: chr1-chr22, chrX, chrY, chrM, or without chr prefix."); } - $this->referenceGenome = $matches[1] === 'chr' - ? new ReferenzGenome(ReferenzGenome::HG_19) - : new ReferenzGenome(ReferenzGenome::GRCH_37); + $this->namingConvention = $matches[1] === 'chr' + ? new NamingConvention(NamingConvention::ENSEMBL) + : new NamingConvention(NamingConvention::UCSC); $this->value = $matches[2]; } - public function toString(?ReferenzGenome $referenceGenome = null): string + public function toString(?NamingConvention $referenceGenome = null): string { - $referenceGenome ??= $this->referenceGenome; + $referenceGenome ??= $this->namingConvention; switch ($referenceGenome->value) { - case ReferenzGenome::HG_19: + case NamingConvention::ENSEMBL: return "chr{$this->value}"; - case ReferenzGenome::GRCH_37: + case NamingConvention::UCSC: return $this->value; default: throw new \InvalidArgumentException("Invalid reference genome: {$referenceGenome->value}"); diff --git a/src/GenomicPosition.php b/src/GenomicPosition.php index 5c5fb4b..3731be4 100644 --- a/src/GenomicPosition.php +++ b/src/GenomicPosition.php @@ -28,7 +28,7 @@ public static function parse(string $genomicPosition): self return new self(new Chromosome($matches[1]), (int) $matches[3]); } - public function toString(?ReferenzGenome $referenceGenome = null): string + public function toString(?NamingConvention $referenceGenome = null): string { return "{$this->chromosome->toString($referenceGenome)}:{$this->position}"; } diff --git a/src/GenomicRegion.php b/src/GenomicRegion.php index 4b802d3..0e2d3e0 100644 --- a/src/GenomicRegion.php +++ b/src/GenomicRegion.php @@ -34,12 +34,8 @@ public function __construct( $this->end = $end; } - public static function parse(?string $genomicRegion): ?self + public static function parse(string $genomicRegion): self { - if ($genomicRegion === null || $genomicRegion === '') { - return null; - } - if (preg_match('/^(.+):(g|)(\d+)(-(\d+)|)$/', $genomicRegion, $matches) === 0) { throw new \InvalidArgumentException("Invalid genomic region format: {$genomicRegion}. Expected format: chr1:123-456."); } @@ -78,7 +74,7 @@ private function positionIsBetweenStartAndEnd(int $position): bool return $position >= $this->start && $position <= $this->end; } - public function toString(?ReferenzGenome $referenceGenome = null): string + public function toString(?NamingConvention $referenceGenome = null): string { return "{$this->chromosome->toString($referenceGenome)}:{$this->start}-{$this->end}"; } diff --git a/src/ReferenzGenome.php b/src/NamingConvention.php similarity index 65% rename from src/ReferenzGenome.php rename to src/NamingConvention.php index 97989a4..44c851d 100644 --- a/src/ReferenzGenome.php +++ b/src/NamingConvention.php @@ -2,10 +2,10 @@ namespace MLL\Utils; -class ReferenzGenome +class NamingConvention { - public const HG_19 = 'HG_19'; - public const GRCH_37 = 'GRCH_37'; + public const ENSEMBL = 'ENSEMBL'; + public const UCSC = 'UCSC'; public string $value; diff --git a/tests/ChromosomeTest.php b/tests/ChromosomeTest.php index e9a2e78..af76694 100644 --- a/tests/ChromosomeTest.php +++ b/tests/ChromosomeTest.php @@ -1,7 +1,7 @@ toString(new ReferenzGenome(ReferenzGenome::GRCH_37))); + self::assertSame('11', $chromosome->toString(new NamingConvention(NamingConvention::UCSC))); } public function testInitWithGRC37(): void diff --git a/tests/GenomicPositionTest.php b/tests/GenomicPositionTest.php index 2d9074f..a886abe 100644 --- a/tests/GenomicPositionTest.php +++ b/tests/GenomicPositionTest.php @@ -1,6 +1,7 @@ Date: Tue, 3 Mar 2026 13:19:43 +0000 Subject: [PATCH 04/15] Apply php-cs-fixer changes --- tests/GenomicPositionTest.php | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/GenomicPositionTest.php b/tests/GenomicPositionTest.php index a886abe..2d9074f 100644 --- a/tests/GenomicPositionTest.php +++ b/tests/GenomicPositionTest.php @@ -1,7 +1,6 @@ Date: Tue, 3 Mar 2026 14:33:03 +0100 Subject: [PATCH 05/15] Fix Mitochondrial Chromosome --- src/Chromosome.php | 16 +++++++--------- tests/ChromosomeTest.php | 12 +++++++++--- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/src/Chromosome.php b/src/Chromosome.php index 5158fc3..3fa796d 100644 --- a/src/Chromosome.php +++ b/src/Chromosome.php @@ -4,23 +4,21 @@ class Chromosome { - /** Matches human chromosomes with or without "chr" prefix: chr1-chr22, chrX, chrY, chrM, chrMT, or 1-22, X, Y, M, MT. */ - public const CHROMOSOME_REGEX = '/^(chr)?(1[0-9]|[1-9]|2[0-2]|X|Y|M|MT)$/i'; - private string $value; private NamingConvention $namingConvention; public function __construct(string $chromosomeAsString) { - if (\Safe\preg_match(self::CHROMOSOME_REGEX, $chromosomeAsString, $matches) === 0) { + /** Matches human chromosomes with or without "chr" prefix: chr1-chr22, chrX, chrY, chrM, chrMT, or 1-22, X, Y, M, MT. */ + if (\Safe\preg_match('/^(chr)?(1[0-9]|[1-9]|2[0-2]|X|Y|M|MT)$/i', $chromosomeAsString, $matches) === 0) { throw new \InvalidArgumentException("Invalid chromosome: {$chromosomeAsString}. Expected format: chr1-chr22, chrX, chrY, chrM, or without chr prefix."); } $this->namingConvention = $matches[1] === 'chr' - ? new NamingConvention(NamingConvention::ENSEMBL) - : new NamingConvention(NamingConvention::UCSC); + ? new NamingConvention(NamingConvention::UCSC) + : new NamingConvention(NamingConvention::ENSEMBL); - $this->value = $matches[2]; + $this->value = strtoupper($matches[2]); } public function toString(?NamingConvention $referenceGenome = null): string @@ -29,9 +27,9 @@ public function toString(?NamingConvention $referenceGenome = null): string switch ($referenceGenome->value) { case NamingConvention::ENSEMBL: - return "chr{$this->value}"; + return $this->value === 'M' ? 'MT' : $this->value; case NamingConvention::UCSC: - return $this->value; + return "chr{$this->value}"; default: throw new \InvalidArgumentException("Invalid reference genome: {$referenceGenome->value}"); } diff --git a/tests/ChromosomeTest.php b/tests/ChromosomeTest.php index af76694..da9ac22 100644 --- a/tests/ChromosomeTest.php +++ b/tests/ChromosomeTest.php @@ -12,18 +12,24 @@ public function testToStringWithDefault(): void self::assertSame('chr11', $chromosome->toString()); } - public function testToStringWithGRC37(): void + public function testToStringForEnsembl(): void { $chromosome = new Chromosome('chr11'); - self::assertSame('11', $chromosome->toString(new NamingConvention(NamingConvention::UCSC))); + self::assertSame('11', $chromosome->toString(new NamingConvention(NamingConvention::ENSEMBL))); } - public function testInitWithGRC37(): void + public function testInitWithUCSC(): void { $chromosome = new Chromosome('11'); self::assertSame('11', $chromosome->toString()); } + public function testToStringWithUCSCAndMitochondrialChromosome(): void + { + $chromosome = new Chromosome('chrM'); + self::assertSame('MT', $chromosome->toString(new NamingConvention(NamingConvention::ENSEMBL))); + } + public function testFailedInit(): void { $chromosomeAsString = 'FOO11'; From 731d8aa150c6f1c1ab3972f70224501b033b342a Mon Sep 17 00:00:00 2001 From: Dennis Haupt Date: Tue, 3 Mar 2026 14:34:36 +0100 Subject: [PATCH 06/15] Fix point in HGVSg Fix example --- src/GenomicPosition.php | 4 ++-- tests/GenomicPositionTest.php | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/GenomicPosition.php b/src/GenomicPosition.php index 3731be4..3642be1 100644 --- a/src/GenomicPosition.php +++ b/src/GenomicPosition.php @@ -18,10 +18,10 @@ public function __construct(Chromosome $chromosome, int $position) $this->position = $position; } - /** @example GenomicPosition::fromString('chr1:123456') */ + /** @example GenomicPosition::parse('chr1:123456') */ public static function parse(string $genomicPosition): self { - if (\Safe\preg_match('/^(.+):(g|)(\d+)$/', $genomicPosition, $matches) === 0) { + if (\Safe\preg_match('/^(.+):(g\.|)(\d+)$/', $genomicPosition, $matches) === 0) { throw new \InvalidArgumentException("Invalid genomic position format: {$genomicPosition}. Expected format: chr1:123456."); } diff --git a/tests/GenomicPositionTest.php b/tests/GenomicPositionTest.php index 2d9074f..2a9e06a 100644 --- a/tests/GenomicPositionTest.php +++ b/tests/GenomicPositionTest.php @@ -19,7 +19,7 @@ public function testParseOnSuccessGRC37(): void public function testParseOnSuccessHGVSg(): void { - $genomicPosition = GenomicPosition::parse('chr11:g1'); + $genomicPosition = GenomicPosition::parse('chr11:g.1'); self::assertSame('chr11:1', $genomicPosition->toString()); } From 900a024ca2c25e5bd2135aa732249d29d963d66f Mon Sep 17 00:00:00 2001 From: Dennis Haupt Date: Tue, 3 Mar 2026 14:35:29 +0100 Subject: [PATCH 07/15] Missspelling --- src/GenomicRegion.php | 2 +- tests/GenomicRegionTest.php | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/GenomicRegion.php b/src/GenomicRegion.php index 0e2d3e0..5fcd527 100644 --- a/src/GenomicRegion.php +++ b/src/GenomicRegion.php @@ -26,7 +26,7 @@ public function __construct( } if ($start > $end) { - throw new \InvalidArgumentException("End ({$end}) must be greater then start ({$start})"); + throw new \InvalidArgumentException("End ({$end}) must be greater than start ({$start})"); } $this->chromosome = $chromosome; diff --git a/tests/GenomicRegionTest.php b/tests/GenomicRegionTest.php index 048c586..d4b18be 100644 --- a/tests/GenomicRegionTest.php +++ b/tests/GenomicRegionTest.php @@ -36,7 +36,7 @@ public function testStartIsGerateThenEnd(): void { $genomicRegionAsString = '11:2-1'; self::expectException(\InvalidArgumentException::class); - self::expectExceptionMessage('End (1) must be greater then start (2)'); + self::expectExceptionMessage('End (1) must be greater than start (2)'); GenomicRegion::parse($genomicRegionAsString); } From 466c5e586b42bfc0c7428f786e73535422af394b Mon Sep 17 00:00:00 2001 From: Dennis Haupt Date: Tue, 3 Mar 2026 14:57:13 +0100 Subject: [PATCH 08/15] Bug Fix. Clarify differnces between containsGenomicRegion, intersectsWithGenomicRegion and isCoveredByGenomicRegion --- src/GenomicRegion.php | 12 ++++++++++-- tests/GenomicRegionTest.php | 28 +++++++++++++++++----------- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/src/GenomicRegion.php b/src/GenomicRegion.php index 5fcd527..3486119 100644 --- a/src/GenomicRegion.php +++ b/src/GenomicRegion.php @@ -36,7 +36,7 @@ public function __construct( public static function parse(string $genomicRegion): self { - if (preg_match('/^(.+):(g|)(\d+)(-(\d+)|)$/', $genomicRegion, $matches) === 0) { + if (preg_match('/^(.+):(g\.|)(\d+)(-(\d+)|)$/', $genomicRegion, $matches) === 0) { throw new \InvalidArgumentException("Invalid genomic region format: {$genomicRegion}. Expected format: chr1:123-456."); } @@ -60,11 +60,19 @@ public function containsGenomicRegion(GenomicRegion $genomicRegion): bool && $this->positionIsBetweenStartAndEnd($genomicRegion->end); } + public function isCoveredByGenomicRegion(GenomicRegion $genomicRegion): bool + { + return $this->chromosome->toString() === $genomicRegion->chromosome->toString() + && $genomicRegion->start <= $this->start + && $genomicRegion->end >= $this->end; + } + public function intersectsWithGenomicRegion(GenomicRegion $genomicRegion): bool { return $this->chromosome->toString() === $genomicRegion->chromosome->toString() && ( - $this->positionIsBetweenStartAndEnd($genomicRegion->start) + $this->isCoveredByGenomicRegion($genomicRegion) + || $this->positionIsBetweenStartAndEnd($genomicRegion->start) || $this->positionIsBetweenStartAndEnd($genomicRegion->end) ); } diff --git a/tests/GenomicRegionTest.php b/tests/GenomicRegionTest.php index d4b18be..f98b9da 100644 --- a/tests/GenomicRegionTest.php +++ b/tests/GenomicRegionTest.php @@ -6,13 +6,13 @@ final class GenomicRegionTest extends TestCase { - public function testParseOnSuccessHG19(): void + public function testParseOnSuccessUCSC(): void { $genomicRegion = GenomicRegion::parse('chr11:1-2'); self::assertSame('chr11:1-2', $genomicRegion->toString()); } - public function testParseOnSuccessGRC37(): void + public function testParseOnSuccessEnsembl(): void { $genomicRegion = GenomicRegion::parse('11:1-2'); self::assertSame('11:1-2', $genomicRegion->toString()); @@ -20,7 +20,7 @@ public function testParseOnSuccessGRC37(): void public function testParseOnSuccessHGVSg(): void { - $genomicRegion = GenomicRegion::parse('chr11:g1-2'); + $genomicRegion = GenomicRegion::parse('chr11:g.1-2'); self::assertSame('chr11:1-2', $genomicRegion->toString()); } @@ -42,37 +42,43 @@ public function testStartIsGerateThenEnd(): void public function testContainsGenomicPositionIsTrue(): void { - $genomicRegion = GenomicRegion::parse('chr11:g1-20'); + $genomicRegion = GenomicRegion::parse('chr11:g.1-20'); self::assertTrue($genomicRegion->containsGenomicPosition(GenomicPosition::parse('chr11:20'))); } public function testContainsGenomicPositionIsFalse(): void { - $genomicRegion = GenomicRegion::parse('chr11:g1-20'); + $genomicRegion = GenomicRegion::parse('chr11:g.1-20'); self::assertFalse($genomicRegion->containsGenomicPosition(GenomicPosition::parse('chr11:21'))); } public function testContainsGenomicRegionIsTrue(): void { - $genomicRegion = GenomicRegion::parse('chr11:g1-20'); + $genomicRegion = GenomicRegion::parse('chr11:g.1-20'); self::assertTrue($genomicRegion->containsGenomicRegion(GenomicRegion::parse('chr11:19-20'))); } public function testContainsGenomicRegionIsFalse(): void { - $genomicRegion = GenomicRegion::parse('chr11:g1-20'); + $genomicRegion = GenomicRegion::parse('chr11:g.1-20'); self::assertFalse($genomicRegion->containsGenomicRegion(GenomicRegion::parse('chr11:21-22'))); } - public function testIntersectsWithGenomicRegionIsTrue(): void + public function testCoversGenomicRegionIsTrue(): void { - $genomicRegion = GenomicRegion::parse('chr11:g20-30'); - self::assertTrue($genomicRegion->intersectsWithGenomicRegion(GenomicRegion::parse('chr11:15-25'))); + $genomicRegion = GenomicRegion::parse('chr11:g.20-30'); + self::assertTrue($genomicRegion->isCoveredByGenomicRegion(GenomicRegion::parse('chr11:g.15-35'))); + } + + public function testIntersectsFullyWithGenomicRegionIsTrue(): void + { + $genomicRegion = GenomicRegion::parse('chr11:g.20-30'); + self::assertTrue($genomicRegion->intersectsWithGenomicRegion(GenomicRegion::parse('chr11:g.15-35'))); } public function testIntersectsWithGenomicRegionIsFalse(): void { - $genomicRegion = GenomicRegion::parse('chr11:g20-30'); + $genomicRegion = GenomicRegion::parse('chr11:g.20-30'); self::assertFalse($genomicRegion->intersectsWithGenomicRegion(GenomicRegion::parse('chr11:15-19'))); } } From d1a85a1c556b3ea1199c4dde7acfd4f10911bff6 Mon Sep 17 00:00:00 2001 From: Dennis Haupt Date: Tue, 3 Mar 2026 15:05:58 +0100 Subject: [PATCH 09/15] Add validation step for NamingConvention.php --- src/Chromosome.php | 8 ++++---- src/GenomicPosition.php | 4 ++-- src/GenomicRegion.php | 4 ++-- src/NamingConvention.php | 9 ++++++++- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/Chromosome.php b/src/Chromosome.php index 3fa796d..784b271 100644 --- a/src/Chromosome.php +++ b/src/Chromosome.php @@ -21,17 +21,17 @@ public function __construct(string $chromosomeAsString) $this->value = strtoupper($matches[2]); } - public function toString(?NamingConvention $referenceGenome = null): string + public function toString(?NamingConvention $namingConvention = null): string { - $referenceGenome ??= $this->namingConvention; + $namingConvention ??= $this->namingConvention; - switch ($referenceGenome->value) { + switch ($namingConvention->value) { case NamingConvention::ENSEMBL: return $this->value === 'M' ? 'MT' : $this->value; case NamingConvention::UCSC: return "chr{$this->value}"; default: - throw new \InvalidArgumentException("Invalid reference genome: {$referenceGenome->value}"); + throw new \InvalidArgumentException("No toString logic implemented for valid naming convention: {$namingConvention->value}"); } } } diff --git a/src/GenomicPosition.php b/src/GenomicPosition.php index 3642be1..f354d9e 100644 --- a/src/GenomicPosition.php +++ b/src/GenomicPosition.php @@ -28,8 +28,8 @@ public static function parse(string $genomicPosition): self return new self(new Chromosome($matches[1]), (int) $matches[3]); } - public function toString(?NamingConvention $referenceGenome = null): string + public function toString(?NamingConvention $namingConvention = null): string { - return "{$this->chromosome->toString($referenceGenome)}:{$this->position}"; + return "{$this->chromosome->toString($namingConvention)}:{$this->position}"; } } diff --git a/src/GenomicRegion.php b/src/GenomicRegion.php index 3486119..0195bcd 100644 --- a/src/GenomicRegion.php +++ b/src/GenomicRegion.php @@ -82,8 +82,8 @@ private function positionIsBetweenStartAndEnd(int $position): bool return $position >= $this->start && $position <= $this->end; } - public function toString(?NamingConvention $referenceGenome = null): string + public function toString(?NamingConvention $namingConvention = null): string { - return "{$this->chromosome->toString($referenceGenome)}:{$this->start}-{$this->end}"; + return "{$this->chromosome->toString($namingConvention)}:{$this->start}-{$this->end}"; } } diff --git a/src/NamingConvention.php b/src/NamingConvention.php index 44c851d..be0d5cb 100644 --- a/src/NamingConvention.php +++ b/src/NamingConvention.php @@ -11,6 +11,13 @@ class NamingConvention public function __construct(string $value) { - $this->value = $value; + switch ($value) { + case NamingConvention::ENSEMBL: + case NamingConvention::UCSC: + $this->value = $value; + break; + default: + throw new \InvalidArgumentException("Invalid naming convention: {$value}"); + } } } From 414c336801c9dc63b55297537dd8d76da8805540 Mon Sep 17 00:00:00 2001 From: Dennis Haupt Date: Wed, 4 Mar 2026 09:16:52 +0100 Subject: [PATCH 10/15] Bug 1 - Case-Insensitive Prefix-Erkennung (Chromosome.php:17) Bug 2 - containsGenomicPosition() bricht bei gemischten Konventionen (GenomicRegion.php:52) Add tests --- src/Chromosome.php | 15 +++++++++++++-- src/GenomicRegion.php | 10 +++++----- tests/ChromosomeTest.php | 35 ++++++++++++++++++++++++++++++++++- tests/GenomicRegionTest.php | 24 ++++++++++++++++++++++++ 4 files changed, 76 insertions(+), 8 deletions(-) diff --git a/src/Chromosome.php b/src/Chromosome.php index 784b271..c133d68 100644 --- a/src/Chromosome.php +++ b/src/Chromosome.php @@ -14,11 +14,12 @@ public function __construct(string $chromosomeAsString) if (\Safe\preg_match('/^(chr)?(1[0-9]|[1-9]|2[0-2]|X|Y|M|MT)$/i', $chromosomeAsString, $matches) === 0) { throw new \InvalidArgumentException("Invalid chromosome: {$chromosomeAsString}. Expected format: chr1-chr22, chrX, chrY, chrM, or without chr prefix."); } - $this->namingConvention = $matches[1] === 'chr' + $this->namingConvention = $matches[1] !== '' ? new NamingConvention(NamingConvention::UCSC) : new NamingConvention(NamingConvention::ENSEMBL); - $this->value = strtoupper($matches[2]); + $value = strtoupper($matches[2]); + $this->value = $value === 'MT' ? 'M': $value; } public function toString(?NamingConvention $namingConvention = null): string @@ -34,4 +35,14 @@ public function toString(?NamingConvention $namingConvention = null): string throw new \InvalidArgumentException("No toString logic implemented for valid naming convention: {$namingConvention->value}"); } } + + public function getRawValue(): string + { + return $this->value; + } + + public function equals(Chromosome $chromosome): bool + { + return $this->value === $chromosome->getRawValue(); + } } diff --git a/src/GenomicRegion.php b/src/GenomicRegion.php index 0195bcd..1e71e08 100644 --- a/src/GenomicRegion.php +++ b/src/GenomicRegion.php @@ -4,7 +4,7 @@ use function Safe\preg_match; -final class GenomicRegion +class GenomicRegion { public Chromosome $chromosome; @@ -49,27 +49,27 @@ public static function parse(string $genomicRegion): self public function containsGenomicPosition(GenomicPosition $genomicPosition): bool { - return $this->chromosome->toString() === $genomicPosition->chromosome->toString() + return $this->chromosome->equals($genomicPosition->chromosome) && $this->positionIsBetweenStartAndEnd($genomicPosition->position); } public function containsGenomicRegion(GenomicRegion $genomicRegion): bool { - return $this->chromosome->toString() === $genomicRegion->chromosome->toString() + return $this->chromosome->equals($genomicRegion->chromosome) && $this->positionIsBetweenStartAndEnd($genomicRegion->start) && $this->positionIsBetweenStartAndEnd($genomicRegion->end); } public function isCoveredByGenomicRegion(GenomicRegion $genomicRegion): bool { - return $this->chromosome->toString() === $genomicRegion->chromosome->toString() + return $this->chromosome->equals($genomicRegion->chromosome) && $genomicRegion->start <= $this->start && $genomicRegion->end >= $this->end; } public function intersectsWithGenomicRegion(GenomicRegion $genomicRegion): bool { - return $this->chromosome->toString() === $genomicRegion->chromosome->toString() + return $this->chromosome->equals($genomicRegion->chromosome) && ( $this->isCoveredByGenomicRegion($genomicRegion) || $this->positionIsBetweenStartAndEnd($genomicRegion->start) diff --git a/tests/ChromosomeTest.php b/tests/ChromosomeTest.php index da9ac22..44924b5 100644 --- a/tests/ChromosomeTest.php +++ b/tests/ChromosomeTest.php @@ -18,12 +18,24 @@ public function testToStringForEnsembl(): void self::assertSame('11', $chromosome->toString(new NamingConvention(NamingConvention::ENSEMBL))); } - public function testInitWithUCSC(): void + public function testInitWithEnsembl(): void { $chromosome = new Chromosome('11'); self::assertSame('11', $chromosome->toString()); } + public function testUpperCaseChrPrefixIsDetectedAsUCSC(): void + { + $chromosome = new Chromosome('CHR11'); + self::assertSame('chr11', $chromosome->toString()); + } + + public function testMixedCaseChrPrefixIsDetectedAsUCSC(): void + { + $chromosome = new Chromosome('Chr11'); + self::assertSame('chr11', $chromosome->toString()); + } + public function testToStringWithUCSCAndMitochondrialChromosome(): void { $chromosome = new Chromosome('chrM'); @@ -37,4 +49,25 @@ public function testFailedInit(): void self::expectExceptionMessage("Invalid chromosome: {$chromosomeAsString}. Expected format: chr1-chr22, chrX, chrY, chrM, or without chr prefix."); new Chromosome($chromosomeAsString); } + + public function testNormalizesMTtoM(): void + { + $fromMT = new Chromosome('chrMT'); + self::assertSame('chrM', $fromMT->toString(new NamingConvention(NamingConvention::UCSC))); + self::assertSame('MT', $fromMT->toString(new NamingConvention(NamingConvention::ENSEMBL))); + } + + public function testEqualsIgnoresNamingConvention(): void + { + $ucsc = new Chromosome('chr11'); + $ensembl = new Chromosome('11'); + self::assertTrue($ucsc->equals($ensembl)); + } + + public function testEqualsCaseInsensitive(): void + { + $upper = new Chromosome('chrX'); + $lower = new Chromosome('chrx'); + self::assertTrue($upper->equals($lower)); + } } diff --git a/tests/GenomicRegionTest.php b/tests/GenomicRegionTest.php index f98b9da..55ff7f1 100644 --- a/tests/GenomicRegionTest.php +++ b/tests/GenomicRegionTest.php @@ -52,6 +52,18 @@ public function testContainsGenomicPositionIsFalse(): void self::assertFalse($genomicRegion->containsGenomicPosition(GenomicPosition::parse('chr11:21'))); } + public function testContainsGenomicPositionAcrossNamingConventions(): void + { + $genomicRegion = GenomicRegion::parse('chr11:1-20'); + self::assertTrue($genomicRegion->containsGenomicPosition(GenomicPosition::parse('11:15'))); + } + + public function testContainsGenomicPositionOnDifferentChromosome(): void + { + $genomicRegion = GenomicRegion::parse('chr11:1-20'); + self::assertFalse($genomicRegion->containsGenomicPosition(GenomicPosition::parse('chr12:15'))); + } + public function testContainsGenomicRegionIsTrue(): void { $genomicRegion = GenomicRegion::parse('chr11:g.1-20'); @@ -76,9 +88,21 @@ public function testIntersectsFullyWithGenomicRegionIsTrue(): void self::assertTrue($genomicRegion->intersectsWithGenomicRegion(GenomicRegion::parse('chr11:g.15-35'))); } + public function testIntersectsPartiallyWithGenomicRegion(): void + { + $genomicRegion = GenomicRegion::parse('chr11:20-30'); + self::assertTrue($genomicRegion->intersectsWithGenomicRegion(GenomicRegion::parse('chr11:25-35'))); + } + public function testIntersectsWithGenomicRegionIsFalse(): void { $genomicRegion = GenomicRegion::parse('chr11:g.20-30'); self::assertFalse($genomicRegion->intersectsWithGenomicRegion(GenomicRegion::parse('chr11:15-19'))); } + + public function testIntersectsWithGenomicRegionOnDifferentChromosome(): void + { + $genomicRegion = GenomicRegion::parse('chr11:20-30'); + self::assertFalse($genomicRegion->intersectsWithGenomicRegion(GenomicRegion::parse('chr12:20-30'))); + } } From b6bc89ade9550fa443cf28ccf6b44611e176eb42 Mon Sep 17 00:00:00 2001 From: KingKong1213 <168984406+KingKong1213@users.noreply.github.com> Date: Wed, 4 Mar 2026 08:17:30 +0000 Subject: [PATCH 11/15] Apply php-cs-fixer changes --- src/Chromosome.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Chromosome.php b/src/Chromosome.php index c133d68..086484f 100644 --- a/src/Chromosome.php +++ b/src/Chromosome.php @@ -19,7 +19,7 @@ public function __construct(string $chromosomeAsString) : new NamingConvention(NamingConvention::ENSEMBL); $value = strtoupper($matches[2]); - $this->value = $value === 'MT' ? 'M': $value; + $this->value = $value === 'MT' ? 'M' : $value; } public function toString(?NamingConvention $namingConvention = null): string From f004c8e63ecb0ce8155c2cbedf85add6919cd6ef Mon Sep 17 00:00:00 2001 From: Simon Bigelmayr Date: Wed, 4 Mar 2026 10:34:04 +0100 Subject: [PATCH 12/15] feat!: decouple NamingConvention from Chromosome, fix domain model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Chromosome no longer stores a NamingConvention. It stores only the canonical value (e.g. "11", "X", "M") and requires an explicit NamingConvention parameter for toString(). This correctly models that naming convention is a serialization concern, not an intrinsic property of a chromosome. Changes: - Chromosome: remove stored NamingConvention, add equals(), normalize MT→M and case via strtoupper, extract MITOCHONDRIAL constants - GenomicRegion: remove final, use equals() instead of toString() for comparisons, simplify intersectsWithGenomicRegion to canonical interval overlap formula (start1 <= end2 && start2 <= end1) - GenomicPosition/GenomicRegion: toString() now requires NamingConvention, tighten parse regex from .+ to [^:]+ - Tests: cross-convention equality, MT normalization, case insensitivity, boundary chromosomes, single-point overlap, single-base region, identical region coverage, different chromosome rejection BREAKING CHANGE: toString() on Chromosome, GenomicPosition, and GenomicRegion now requires a NamingConvention argument instead of accepting an optional nullable parameter. Co-Authored-By: Claude Opus 4.6 --- src/Chromosome.php | 33 +++++------- src/GenomicPosition.php | 4 +- src/GenomicRegion.php | 17 +++---- tests/ChromosomeTest.php | 81 ++++++++++++++++++------------ tests/GenomicPositionTest.php | 20 +++++--- tests/GenomicRegionTest.php | 94 +++++++++++++++++++++-------------- 6 files changed, 142 insertions(+), 107 deletions(-) diff --git a/src/Chromosome.php b/src/Chromosome.php index 086484f..eb94a00 100644 --- a/src/Chromosome.php +++ b/src/Chromosome.php @@ -4,9 +4,10 @@ class Chromosome { - private string $value; + private const MITOCHONDRIAL = 'M'; + private const MITOCHONDRIAL_ENSEMBL = 'MT'; - private NamingConvention $namingConvention; + private string $value; public function __construct(string $chromosomeAsString) { @@ -14,35 +15,25 @@ public function __construct(string $chromosomeAsString) if (\Safe\preg_match('/^(chr)?(1[0-9]|[1-9]|2[0-2]|X|Y|M|MT)$/i', $chromosomeAsString, $matches) === 0) { throw new \InvalidArgumentException("Invalid chromosome: {$chromosomeAsString}. Expected format: chr1-chr22, chrX, chrY, chrM, or without chr prefix."); } - $this->namingConvention = $matches[1] !== '' - ? new NamingConvention(NamingConvention::UCSC) - : new NamingConvention(NamingConvention::ENSEMBL); $value = strtoupper($matches[2]); - $this->value = $value === 'MT' ? 'M' : $value; + $this->value = $value === self::MITOCHONDRIAL_ENSEMBL ? self::MITOCHONDRIAL : $value; } - public function toString(?NamingConvention $namingConvention = null): string + public function equals(self $other): bool { - $namingConvention ??= $this->namingConvention; + return $this->value === $other->value; + } + public function toString(NamingConvention $namingConvention): string + { switch ($namingConvention->value) { - case NamingConvention::ENSEMBL: - return $this->value === 'M' ? 'MT' : $this->value; case NamingConvention::UCSC: return "chr{$this->value}"; + case NamingConvention::ENSEMBL: + return $this->value === self::MITOCHONDRIAL ? self::MITOCHONDRIAL_ENSEMBL : $this->value; default: - throw new \InvalidArgumentException("No toString logic implemented for valid naming convention: {$namingConvention->value}"); + throw new \InvalidArgumentException("No toString logic implemented for naming convention: {$namingConvention->value}"); } } - - public function getRawValue(): string - { - return $this->value; - } - - public function equals(Chromosome $chromosome): bool - { - return $this->value === $chromosome->getRawValue(); - } } diff --git a/src/GenomicPosition.php b/src/GenomicPosition.php index f354d9e..a50b04d 100644 --- a/src/GenomicPosition.php +++ b/src/GenomicPosition.php @@ -21,14 +21,14 @@ public function __construct(Chromosome $chromosome, int $position) /** @example GenomicPosition::parse('chr1:123456') */ public static function parse(string $genomicPosition): self { - if (\Safe\preg_match('/^(.+):(g\.|)(\d+)$/', $genomicPosition, $matches) === 0) { + if (\Safe\preg_match('/^([^:]+):(g\.|)(\d+)$/', $genomicPosition, $matches) === 0) { throw new \InvalidArgumentException("Invalid genomic position format: {$genomicPosition}. Expected format: chr1:123456."); } return new self(new Chromosome($matches[1]), (int) $matches[3]); } - public function toString(?NamingConvention $namingConvention = null): string + public function toString(NamingConvention $namingConvention): string { return "{$this->chromosome->toString($namingConvention)}:{$this->position}"; } diff --git a/src/GenomicRegion.php b/src/GenomicRegion.php index 1e71e08..0c1c81e 100644 --- a/src/GenomicRegion.php +++ b/src/GenomicRegion.php @@ -36,7 +36,7 @@ public function __construct( public static function parse(string $genomicRegion): self { - if (preg_match('/^(.+):(g\.|)(\d+)(-(\d+)|)$/', $genomicRegion, $matches) === 0) { + if (preg_match('/^([^:]+):(g\.|)(\d+)(-(\d+)|)$/', $genomicRegion, $matches) === 0) { throw new \InvalidArgumentException("Invalid genomic region format: {$genomicRegion}. Expected format: chr1:123-456."); } @@ -53,28 +53,25 @@ public function containsGenomicPosition(GenomicPosition $genomicPosition): bool && $this->positionIsBetweenStartAndEnd($genomicPosition->position); } - public function containsGenomicRegion(GenomicRegion $genomicRegion): bool + public function containsGenomicRegion(self $genomicRegion): bool { return $this->chromosome->equals($genomicRegion->chromosome) && $this->positionIsBetweenStartAndEnd($genomicRegion->start) && $this->positionIsBetweenStartAndEnd($genomicRegion->end); } - public function isCoveredByGenomicRegion(GenomicRegion $genomicRegion): bool + public function isCoveredByGenomicRegion(self $genomicRegion): bool { return $this->chromosome->equals($genomicRegion->chromosome) && $genomicRegion->start <= $this->start && $genomicRegion->end >= $this->end; } - public function intersectsWithGenomicRegion(GenomicRegion $genomicRegion): bool + public function intersectsWithGenomicRegion(self $genomicRegion): bool { return $this->chromosome->equals($genomicRegion->chromosome) - && ( - $this->isCoveredByGenomicRegion($genomicRegion) - || $this->positionIsBetweenStartAndEnd($genomicRegion->start) - || $this->positionIsBetweenStartAndEnd($genomicRegion->end) - ); + && $this->start <= $genomicRegion->end + && $genomicRegion->start <= $this->end; } private function positionIsBetweenStartAndEnd(int $position): bool @@ -82,7 +79,7 @@ private function positionIsBetweenStartAndEnd(int $position): bool return $position >= $this->start && $position <= $this->end; } - public function toString(?NamingConvention $namingConvention = null): string + public function toString(NamingConvention $namingConvention): string { return "{$this->chromosome->toString($namingConvention)}:{$this->start}-{$this->end}"; } diff --git a/tests/ChromosomeTest.php b/tests/ChromosomeTest.php index 44924b5..4c60070 100644 --- a/tests/ChromosomeTest.php +++ b/tests/ChromosomeTest.php @@ -6,68 +6,87 @@ final class ChromosomeTest extends TestCase { - public function testToStringWithDefault(): void + public function testToStringUCSC(): void { $chromosome = new Chromosome('chr11'); - self::assertSame('chr11', $chromosome->toString()); + self::assertSame('chr11', $chromosome->toString(new NamingConvention(NamingConvention::UCSC))); } - public function testToStringForEnsembl(): void + public function testToStringEnsembl(): void { $chromosome = new Chromosome('chr11'); self::assertSame('11', $chromosome->toString(new NamingConvention(NamingConvention::ENSEMBL))); } - public function testInitWithEnsembl(): void + public function testToStringFromEnsemblInput(): void { $chromosome = new Chromosome('11'); - self::assertSame('11', $chromosome->toString()); + self::assertSame('11', $chromosome->toString(new NamingConvention(NamingConvention::ENSEMBL))); + self::assertSame('chr11', $chromosome->toString(new NamingConvention(NamingConvention::UCSC))); } - public function testUpperCaseChrPrefixIsDetectedAsUCSC(): void + public function testMitochondrialNormalization(): void { - $chromosome = new Chromosome('CHR11'); - self::assertSame('chr11', $chromosome->toString()); + $fromChrM = new Chromosome('chrM'); + $fromMT = new Chromosome('MT'); + $fromChrMT = new Chromosome('chrMT'); + + self::assertTrue($fromChrM->equals($fromMT)); + self::assertTrue($fromChrM->equals($fromChrMT)); + + self::assertSame('chrM', $fromChrM->toString(new NamingConvention(NamingConvention::UCSC))); + self::assertSame('MT', $fromChrM->toString(new NamingConvention(NamingConvention::ENSEMBL))); + self::assertSame('chrM', $fromChrMT->toString(new NamingConvention(NamingConvention::UCSC))); + self::assertSame('MT', $fromMT->toString(new NamingConvention(NamingConvention::ENSEMBL))); } - public function testMixedCaseChrPrefixIsDetectedAsUCSC(): void + public function testEqualsAcrossNamingConventions(): void { - $chromosome = new Chromosome('Chr11'); - self::assertSame('chr11', $chromosome->toString()); + $ucsc = new Chromosome('chr11'); + $ensembl = new Chromosome('11'); + self::assertTrue($ucsc->equals($ensembl)); } - public function testToStringWithUCSCAndMitochondrialChromosome(): void + public function testEqualsCaseInsensitive(): void { - $chromosome = new Chromosome('chrM'); - self::assertSame('MT', $chromosome->toString(new NamingConvention(NamingConvention::ENSEMBL))); + $upper = new Chromosome('chrX'); + $lower = new Chromosome('chrx'); + self::assertTrue($upper->equals($lower)); } - public function testFailedInit(): void + public function testCaseInsensitivePrefixDetection(): void { - $chromosomeAsString = 'FOO11'; - self::expectException(\InvalidArgumentException::class); - self::expectExceptionMessage("Invalid chromosome: {$chromosomeAsString}. Expected format: chr1-chr22, chrX, chrY, chrM, or without chr prefix."); - new Chromosome($chromosomeAsString); + $chr = new Chromosome('CHR11'); + self::assertSame('chr11', $chr->toString(new NamingConvention(NamingConvention::UCSC))); + + $chr2 = new Chromosome('Chr11'); + self::assertSame('chr11', $chr2->toString(new NamingConvention(NamingConvention::UCSC))); } - public function testNormalizesMTtoM(): void + public function testBoundaryChromosomes(): void { - $fromMT = new Chromosome('chrMT'); - self::assertSame('chrM', $fromMT->toString(new NamingConvention(NamingConvention::UCSC))); - self::assertSame('MT', $fromMT->toString(new NamingConvention(NamingConvention::ENSEMBL))); + $chr1 = new Chromosome('chr1'); + $chr22 = new Chromosome('chr22'); + $chrX = new Chromosome('X'); + $chrY = new Chromosome('chrY'); + + self::assertSame('chr1', $chr1->toString(new NamingConvention(NamingConvention::UCSC))); + self::assertSame('chr22', $chr22->toString(new NamingConvention(NamingConvention::UCSC))); + self::assertSame('chrX', $chrX->toString(new NamingConvention(NamingConvention::UCSC))); + self::assertSame('Y', $chrY->toString(new NamingConvention(NamingConvention::ENSEMBL))); } - public function testEqualsIgnoresNamingConvention(): void + public function testRejectsInvalidChromosomeNumbers(): void { - $ucsc = new Chromosome('chr11'); - $ensembl = new Chromosome('11'); - self::assertTrue($ucsc->equals($ensembl)); + self::expectException(\InvalidArgumentException::class); + new Chromosome('chr23'); } - public function testEqualsCaseInsensitive(): void + public function testFailedInit(): void { - $upper = new Chromosome('chrX'); - $lower = new Chromosome('chrx'); - self::assertTrue($upper->equals($lower)); + $chromosomeAsString = 'FOO11'; + self::expectException(\InvalidArgumentException::class); + self::expectExceptionMessage("Invalid chromosome: {$chromosomeAsString}. Expected format: chr1-chr22, chrX, chrY, chrM, or without chr prefix."); + new Chromosome($chromosomeAsString); } } diff --git a/tests/GenomicPositionTest.php b/tests/GenomicPositionTest.php index 2a9e06a..7b9ad11 100644 --- a/tests/GenomicPositionTest.php +++ b/tests/GenomicPositionTest.php @@ -1,26 +1,34 @@ toString()); + self::assertSame('chr11:1', $genomicPosition->toString(new NamingConvention(NamingConvention::UCSC))); } - public function testParseOnSuccessGRC37(): void + public function testParseEnsembl(): void { $genomicPosition = GenomicPosition::parse('11:1'); - self::assertSame('11:1', $genomicPosition->toString()); + self::assertSame('11:1', $genomicPosition->toString(new NamingConvention(NamingConvention::ENSEMBL))); } - public function testParseOnSuccessHGVSg(): void + public function testParseHGVSg(): void { $genomicPosition = GenomicPosition::parse('chr11:g.1'); - self::assertSame('chr11:1', $genomicPosition->toString()); + self::assertSame('chr11:1', $genomicPosition->toString(new NamingConvention(NamingConvention::UCSC))); + } + + public function testOutputInBothConventions(): void + { + $genomicPosition = GenomicPosition::parse('chr11:12345'); + self::assertSame('chr11:12345', $genomicPosition->toString(new NamingConvention(NamingConvention::UCSC))); + self::assertSame('11:12345', $genomicPosition->toString(new NamingConvention(NamingConvention::ENSEMBL))); } public function testParseOnError(): void diff --git a/tests/GenomicRegionTest.php b/tests/GenomicRegionTest.php index 55ff7f1..9002b9d 100644 --- a/tests/GenomicRegionTest.php +++ b/tests/GenomicRegionTest.php @@ -2,26 +2,27 @@ use MLL\Utils\GenomicPosition; use MLL\Utils\GenomicRegion; +use MLL\Utils\NamingConvention; use PHPUnit\Framework\TestCase; final class GenomicRegionTest extends TestCase { - public function testParseOnSuccessUCSC(): void + public function testParseUCSC(): void { $genomicRegion = GenomicRegion::parse('chr11:1-2'); - self::assertSame('chr11:1-2', $genomicRegion->toString()); + self::assertSame('chr11:1-2', $genomicRegion->toString(new NamingConvention(NamingConvention::UCSC))); } - public function testParseOnSuccessEnsembl(): void + public function testParseEnsembl(): void { $genomicRegion = GenomicRegion::parse('11:1-2'); - self::assertSame('11:1-2', $genomicRegion->toString()); + self::assertSame('11:1-2', $genomicRegion->toString(new NamingConvention(NamingConvention::ENSEMBL))); } - public function testParseOnSuccessHGVSg(): void + public function testParseHGVSg(): void { $genomicRegion = GenomicRegion::parse('chr11:g.1-2'); - self::assertSame('chr11:1-2', $genomicRegion->toString()); + self::assertSame('chr11:1-2', $genomicRegion->toString(new NamingConvention(NamingConvention::UCSC))); } public function testParseOnError(): void @@ -32,23 +33,17 @@ public function testParseOnError(): void GenomicRegion::parse($genomicRegionAsString); } - public function testStartIsGerateThenEnd(): void + public function testStartGreaterThanEnd(): void { - $genomicRegionAsString = '11:2-1'; self::expectException(\InvalidArgumentException::class); self::expectExceptionMessage('End (1) must be greater than start (2)'); - GenomicRegion::parse($genomicRegionAsString); + GenomicRegion::parse('11:2-1'); } - public function testContainsGenomicPositionIsTrue(): void + public function testContainsGenomicPosition(): void { $genomicRegion = GenomicRegion::parse('chr11:g.1-20'); self::assertTrue($genomicRegion->containsGenomicPosition(GenomicPosition::parse('chr11:20'))); - } - - public function testContainsGenomicPositionIsFalse(): void - { - $genomicRegion = GenomicRegion::parse('chr11:g.1-20'); self::assertFalse($genomicRegion->containsGenomicPosition(GenomicPosition::parse('chr11:21'))); } @@ -58,51 +53,76 @@ public function testContainsGenomicPositionAcrossNamingConventions(): void self::assertTrue($genomicRegion->containsGenomicPosition(GenomicPosition::parse('11:15'))); } - public function testContainsGenomicPositionOnDifferentChromosome(): void - { - $genomicRegion = GenomicRegion::parse('chr11:1-20'); - self::assertFalse($genomicRegion->containsGenomicPosition(GenomicPosition::parse('chr12:15'))); - } - - public function testContainsGenomicRegionIsTrue(): void + public function testContainsGenomicRegion(): void { $genomicRegion = GenomicRegion::parse('chr11:g.1-20'); self::assertTrue($genomicRegion->containsGenomicRegion(GenomicRegion::parse('chr11:19-20'))); - } - - public function testContainsGenomicRegionIsFalse(): void - { - $genomicRegion = GenomicRegion::parse('chr11:g.1-20'); self::assertFalse($genomicRegion->containsGenomicRegion(GenomicRegion::parse('chr11:21-22'))); } - public function testCoversGenomicRegionIsTrue(): void + public function testIsCoveredByGenomicRegion(): void { $genomicRegion = GenomicRegion::parse('chr11:g.20-30'); self::assertTrue($genomicRegion->isCoveredByGenomicRegion(GenomicRegion::parse('chr11:g.15-35'))); + self::assertFalse($genomicRegion->isCoveredByGenomicRegion(GenomicRegion::parse('chr11:g.22-35'))); } - public function testIntersectsFullyWithGenomicRegionIsTrue(): void + public function testIntersectsWithGenomicRegionPartialOverlap(): void { $genomicRegion = GenomicRegion::parse('chr11:g.20-30'); - self::assertTrue($genomicRegion->intersectsWithGenomicRegion(GenomicRegion::parse('chr11:g.15-35'))); + self::assertTrue($genomicRegion->intersectsWithGenomicRegion(GenomicRegion::parse('chr11:15-25'))); + self::assertTrue($genomicRegion->intersectsWithGenomicRegion(GenomicRegion::parse('chr11:25-35'))); } - public function testIntersectsPartiallyWithGenomicRegion(): void + public function testIntersectsWithGenomicRegionFullyWrapped(): void { - $genomicRegion = GenomicRegion::parse('chr11:20-30'); - self::assertTrue($genomicRegion->intersectsWithGenomicRegion(GenomicRegion::parse('chr11:25-35'))); + $genomicRegion = GenomicRegion::parse('chr11:g.20-30'); + self::assertTrue($genomicRegion->intersectsWithGenomicRegion(GenomicRegion::parse('chr11:g.15-35'))); } - public function testIntersectsWithGenomicRegionIsFalse(): void + public function testIntersectsWithGenomicRegionNoOverlap(): void { $genomicRegion = GenomicRegion::parse('chr11:g.20-30'); self::assertFalse($genomicRegion->intersectsWithGenomicRegion(GenomicRegion::parse('chr11:15-19'))); } - public function testIntersectsWithGenomicRegionOnDifferentChromosome(): void + public function testIntersectsWithAdjacentRegion(): void + { + $genomicRegion = GenomicRegion::parse('chr11:10-20'); + self::assertFalse($genomicRegion->intersectsWithGenomicRegion(GenomicRegion::parse('chr11:21-30'))); + } + + public function testIntersectsWithSinglePointOverlap(): void + { + $region = GenomicRegion::parse('chr1:10-20'); + self::assertTrue($region->intersectsWithGenomicRegion(GenomicRegion::parse('chr1:20-30'))); + } + + public function testParseSingleBaseRegion(): void + { + $region = GenomicRegion::parse('chr1:100'); + self::assertSame(100, $region->start); + self::assertSame(100, $region->end); + self::assertSame('chr1:100-100', $region->toString(new NamingConvention(NamingConvention::UCSC))); + } + + public function testIsCoveredByIdenticalRegion(): void + { + $region = GenomicRegion::parse('chr11:20-30'); + self::assertTrue($region->isCoveredByGenomicRegion(GenomicRegion::parse('chr11:20-30'))); + } + + public function testContainsPositionAtStartBoundary(): void + { + $region = GenomicRegion::parse('chr11:10-20'); + self::assertTrue($region->containsGenomicPosition(GenomicPosition::parse('chr11:10'))); + } + + public function testDifferentChromosomesNeverMatch(): void { - $genomicRegion = GenomicRegion::parse('chr11:20-30'); - self::assertFalse($genomicRegion->intersectsWithGenomicRegion(GenomicRegion::parse('chr12:20-30'))); + $region = GenomicRegion::parse('chr11:1-100'); + self::assertFalse($region->containsGenomicPosition(GenomicPosition::parse('chr12:50'))); + self::assertFalse($region->containsGenomicRegion(GenomicRegion::parse('chr12:10-20'))); + self::assertFalse($region->intersectsWithGenomicRegion(GenomicRegion::parse('chr12:10-20'))); } } From 1c99617cd952736e36cad2dc7b8b6b0085239362 Mon Sep 17 00:00:00 2001 From: Simon Bigelmayr Date: Wed, 4 Mar 2026 10:46:18 +0100 Subject: [PATCH 13/15] feat: add value(), equals(), length(), and overlap() to genomic types - Chromosome::value() getter for canonical chromosome identity - GenomicPosition::equals() for cross-convention comparison - GenomicRegion::equals(), length(), overlap() for interval operations - Simplify existing tests using new equals() helpers Co-Authored-By: Claude Opus 4.6 --- src/Chromosome.php | 5 +++ src/GenomicPosition.php | 6 +++ src/GenomicRegion.php | 26 +++++++++++++ tests/ChromosomeTest.php | 23 +++++++----- tests/GenomicPositionTest.php | 13 +++++++ tests/GenomicRegionTest.php | 69 +++++++++++++++++++++++++++++++++++ 6 files changed, 133 insertions(+), 9 deletions(-) diff --git a/src/Chromosome.php b/src/Chromosome.php index eb94a00..067217e 100644 --- a/src/Chromosome.php +++ b/src/Chromosome.php @@ -20,6 +20,11 @@ public function __construct(string $chromosomeAsString) $this->value = $value === self::MITOCHONDRIAL_ENSEMBL ? self::MITOCHONDRIAL : $value; } + public function value(): string + { + return $this->value; + } + public function equals(self $other): bool { return $this->value === $other->value; diff --git a/src/GenomicPosition.php b/src/GenomicPosition.php index a50b04d..2fd93aa 100644 --- a/src/GenomicPosition.php +++ b/src/GenomicPosition.php @@ -28,6 +28,12 @@ public static function parse(string $genomicPosition): self return new self(new Chromosome($matches[1]), (int) $matches[3]); } + public function equals(self $other): bool + { + return $this->chromosome->equals($other->chromosome) + && $this->position === $other->position; + } + public function toString(NamingConvention $namingConvention): string { return "{$this->chromosome->toString($namingConvention)}:{$this->position}"; diff --git a/src/GenomicRegion.php b/src/GenomicRegion.php index 0c1c81e..c57fdde 100644 --- a/src/GenomicRegion.php +++ b/src/GenomicRegion.php @@ -47,6 +47,32 @@ public static function parse(string $genomicRegion): self ); } + public function equals(self $other): bool + { + return $this->chromosome->equals($other->chromosome) + && $this->start === $other->start + && $this->end === $other->end; + } + + public function length(): int + { + return $this->end - $this->start + 1; + } + + /** Returns the overlapping region, or null if the regions do not intersect. */ + public function overlap(self $other): ?self + { + if (! $this->intersectsWithGenomicRegion($other)) { + return null; + } + + return new self( + $this->chromosome, + max($this->start, $other->start), + min($this->end, $other->end) + ); + } + public function containsGenomicPosition(GenomicPosition $genomicPosition): bool { return $this->chromosome->equals($genomicPosition->chromosome) diff --git a/tests/ChromosomeTest.php b/tests/ChromosomeTest.php index 4c60070..35cfd90 100644 --- a/tests/ChromosomeTest.php +++ b/tests/ChromosomeTest.php @@ -25,6 +25,16 @@ public function testToStringFromEnsemblInput(): void self::assertSame('chr11', $chromosome->toString(new NamingConvention(NamingConvention::UCSC))); } + public function testValueReturnsCanonicalForm(): void + { + self::assertSame('11', (new Chromosome('chr11'))->value()); + self::assertSame('11', (new Chromosome('11'))->value()); + self::assertSame('X', (new Chromosome('chrx'))->value()); + self::assertSame('M', (new Chromosome('chrM'))->value()); + self::assertSame('M', (new Chromosome('MT'))->value()); + self::assertSame('M', (new Chromosome('chrMT'))->value()); + } + public function testMitochondrialNormalization(): void { $fromChrM = new Chromosome('chrM'); @@ -65,15 +75,10 @@ public function testCaseInsensitivePrefixDetection(): void public function testBoundaryChromosomes(): void { - $chr1 = new Chromosome('chr1'); - $chr22 = new Chromosome('chr22'); - $chrX = new Chromosome('X'); - $chrY = new Chromosome('chrY'); - - self::assertSame('chr1', $chr1->toString(new NamingConvention(NamingConvention::UCSC))); - self::assertSame('chr22', $chr22->toString(new NamingConvention(NamingConvention::UCSC))); - self::assertSame('chrX', $chrX->toString(new NamingConvention(NamingConvention::UCSC))); - self::assertSame('Y', $chrY->toString(new NamingConvention(NamingConvention::ENSEMBL))); + self::assertSame('1', (new Chromosome('chr1'))->value()); + self::assertSame('22', (new Chromosome('chr22'))->value()); + self::assertSame('X', (new Chromosome('X'))->value()); + self::assertSame('Y', (new Chromosome('chrY'))->value()); } public function testRejectsInvalidChromosomeNumbers(): void diff --git a/tests/GenomicPositionTest.php b/tests/GenomicPositionTest.php index 7b9ad11..b389df6 100644 --- a/tests/GenomicPositionTest.php +++ b/tests/GenomicPositionTest.php @@ -31,6 +31,19 @@ public function testOutputInBothConventions(): void self::assertSame('11:12345', $genomicPosition->toString(new NamingConvention(NamingConvention::ENSEMBL))); } + public function testEquals(): void + { + self::assertTrue( + GenomicPosition::parse('chr11:100')->equals(GenomicPosition::parse('11:100')) + ); + self::assertFalse( + GenomicPosition::parse('chr11:100')->equals(GenomicPosition::parse('chr11:101')) + ); + self::assertFalse( + GenomicPosition::parse('chr11:100')->equals(GenomicPosition::parse('chr12:100')) + ); + } + public function testParseOnError(): void { $genomicPositionAsString = '11:1test'; diff --git a/tests/GenomicRegionTest.php b/tests/GenomicRegionTest.php index 9002b9d..05b56bf 100644 --- a/tests/GenomicRegionTest.php +++ b/tests/GenomicRegionTest.php @@ -103,6 +103,7 @@ public function testParseSingleBaseRegion(): void $region = GenomicRegion::parse('chr1:100'); self::assertSame(100, $region->start); self::assertSame(100, $region->end); + self::assertSame(1, $region->length()); self::assertSame('chr1:100-100', $region->toString(new NamingConvention(NamingConvention::UCSC))); } @@ -118,6 +119,74 @@ public function testContainsPositionAtStartBoundary(): void self::assertTrue($region->containsGenomicPosition(GenomicPosition::parse('chr11:10'))); } + public function testEquals(): void + { + self::assertTrue( + GenomicRegion::parse('chr11:10-20')->equals(GenomicRegion::parse('11:10-20')) + ); + self::assertFalse( + GenomicRegion::parse('chr11:10-20')->equals(GenomicRegion::parse('chr11:10-21')) + ); + self::assertFalse( + GenomicRegion::parse('chr11:10-20')->equals(GenomicRegion::parse('chr12:10-20')) + ); + } + + public function testLength(): void + { + self::assertSame(11, GenomicRegion::parse('chr11:20-30')->length()); + self::assertSame(1, GenomicRegion::parse('chr1:5-5')->length()); + self::assertSame(100, GenomicRegion::parse('chr1:1-100')->length()); + } + + public function testOverlapPartial(): void + { + $a = GenomicRegion::parse('chr11:10-20'); + $b = GenomicRegion::parse('chr11:15-25'); + + $overlap = $a->overlap($b); + self::assertNotNull($overlap); + self::assertTrue($overlap->equals(GenomicRegion::parse('chr11:15-20'))); + self::assertSame(6, $overlap->length()); + } + + public function testOverlapFullyContained(): void + { + $outer = GenomicRegion::parse('chr11:10-30'); + $inner = GenomicRegion::parse('chr11:15-20'); + + $overlap = $outer->overlap($inner); + self::assertNotNull($overlap); + self::assertTrue($overlap->equals($inner)); + } + + public function testOverlapSinglePoint(): void + { + $a = GenomicRegion::parse('chr1:10-20'); + $b = GenomicRegion::parse('chr1:20-30'); + + $overlap = $a->overlap($b); + self::assertNotNull($overlap); + self::assertTrue($overlap->equals(GenomicRegion::parse('chr1:20-20'))); + self::assertSame(1, $overlap->length()); + } + + public function testOverlapReturnsNullWhenNoIntersection(): void + { + $a = GenomicRegion::parse('chr11:10-20'); + $b = GenomicRegion::parse('chr11:21-30'); + + self::assertNull($a->overlap($b)); + } + + public function testOverlapReturnsNullForDifferentChromosomes(): void + { + $a = GenomicRegion::parse('chr11:10-20'); + $b = GenomicRegion::parse('chr12:10-20'); + + self::assertNull($a->overlap($b)); + } + public function testDifferentChromosomesNeverMatch(): void { $region = GenomicRegion::parse('chr11:1-100'); From 001769d1b87516afd9dfc1af38a8d9e4ec327eee Mon Sep 17 00:00:00 2001 From: Simon Bigelmayr Date: Wed, 4 Mar 2026 10:53:11 +0100 Subject: [PATCH 14/15] fix(release): remove example issue number from PR template The PR template contained "Resolves #236" as an example inside an HTML comment. issue-parser used by @semantic-release/github parsed this as a real issue reference, causing the success step to fail on every release. Co-Authored-By: Claude Opus 4.6 --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 0807eec..f253d31 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,7 +1,7 @@ - [ ] Added automated tests - [ ] Documented for all relevant versions - + **Changes** From 456fadb31793ee09e45f4c630f2e4609ba281358 Mon Sep 17 00:00:00 2001 From: Simon Bigelmayr Date: Wed, 4 Mar 2026 11:17:03 +0100 Subject: [PATCH 15/15] feat: improve API naming, consistency, and test coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename methods: containsGenomicPosition→containsPosition, containsGenomicRegion→containsRegion, isCoveredByGenomicRegion→isCoveredBy, intersectsWithGenomicRegion→intersects, overlap→intersection - Rename parameters to $other/$value for consistency - Reorder methods: construct→parse→equals→length→toString→spatial→private - Implement containsRegion as delegate to isCoveredBy (DRY) - Use protected for Chromosome::$value (extensibility) - Unify Safe\preg_match usage via use function import - Fix error message precision ("must not be less than") - Add NamingConventionTest - Add edge case tests (empty string, chr0, whitespace, position 0) - Refactor tests to use data providers - Remove redundant tests Co-Authored-By: Claude Opus 4.6 --- src/Chromosome.php | 10 +- src/GenomicPosition.php | 8 +- src/GenomicRegion.php | 66 +++++------ tests/ChromosomeTest.php | 71 +++++------ tests/GenomicPositionTest.php | 44 +++++-- tests/GenomicRegionTest.php | 209 +++++++++++++++++++-------------- tests/NamingConventionTest.php | 36 ++++++ 7 files changed, 270 insertions(+), 174 deletions(-) create mode 100644 tests/NamingConventionTest.php diff --git a/src/Chromosome.php b/src/Chromosome.php index 067217e..432add0 100644 --- a/src/Chromosome.php +++ b/src/Chromosome.php @@ -2,18 +2,20 @@ namespace MLL\Utils; +use function Safe\preg_match; + class Chromosome { private const MITOCHONDRIAL = 'M'; private const MITOCHONDRIAL_ENSEMBL = 'MT'; - private string $value; + protected string $value; - public function __construct(string $chromosomeAsString) + public function __construct(string $value) { /** Matches human chromosomes with or without "chr" prefix: chr1-chr22, chrX, chrY, chrM, chrMT, or 1-22, X, Y, M, MT. */ - if (\Safe\preg_match('/^(chr)?(1[0-9]|[1-9]|2[0-2]|X|Y|M|MT)$/i', $chromosomeAsString, $matches) === 0) { - throw new \InvalidArgumentException("Invalid chromosome: {$chromosomeAsString}. Expected format: chr1-chr22, chrX, chrY, chrM, or without chr prefix."); + if (preg_match('/^(chr)?(1[0-9]|[1-9]|2[0-2]|X|Y|M|MT)$/i', $value, $matches) === 0) { + throw new \InvalidArgumentException("Invalid chromosome: {$value}. Expected format: chr1-chr22, chrX, chrY, chrM, or without chr prefix."); } $value = strtoupper($matches[2]); diff --git a/src/GenomicPosition.php b/src/GenomicPosition.php index 2fd93aa..bdf4eb3 100644 --- a/src/GenomicPosition.php +++ b/src/GenomicPosition.php @@ -2,6 +2,8 @@ namespace MLL\Utils; +use function Safe\preg_match; + class GenomicPosition { public Chromosome $chromosome; @@ -19,10 +21,10 @@ public function __construct(Chromosome $chromosome, int $position) } /** @example GenomicPosition::parse('chr1:123456') */ - public static function parse(string $genomicPosition): self + public static function parse(string $value): self { - if (\Safe\preg_match('/^([^:]+):(g\.|)(\d+)$/', $genomicPosition, $matches) === 0) { - throw new \InvalidArgumentException("Invalid genomic position format: {$genomicPosition}. Expected format: chr1:123456."); + if (preg_match('/^([^:]+):(g\.|)(\d+)$/', $value, $matches) === 0) { + throw new \InvalidArgumentException("Invalid genomic position format: {$value}. Expected format: chr1:123456."); } return new self(new Chromosome($matches[1]), (int) $matches[3]); diff --git a/src/GenomicRegion.php b/src/GenomicRegion.php index c57fdde..3acf3a1 100644 --- a/src/GenomicRegion.php +++ b/src/GenomicRegion.php @@ -26,7 +26,7 @@ public function __construct( } if ($start > $end) { - throw new \InvalidArgumentException("End ({$end}) must be greater than start ({$start})"); + throw new \InvalidArgumentException("End ({$end}) must not be less than start ({$start})."); } $this->chromosome = $chromosome; @@ -34,10 +34,10 @@ public function __construct( $this->end = $end; } - public static function parse(string $genomicRegion): self + public static function parse(string $value): self { - if (preg_match('/^([^:]+):(g\.|)(\d+)(-(\d+)|)$/', $genomicRegion, $matches) === 0) { - throw new \InvalidArgumentException("Invalid genomic region format: {$genomicRegion}. Expected format: chr1:123-456."); + if (preg_match('/^([^:]+):(g\.|)(\d+)(-(\d+)|)$/', $value, $matches) === 0) { + throw new \InvalidArgumentException("Invalid genomic region format: {$value}. Expected format: chr1:123-456."); } return new self( @@ -59,54 +59,52 @@ public function length(): int return $this->end - $this->start + 1; } - /** Returns the overlapping region, or null if the regions do not intersect. */ - public function overlap(self $other): ?self + public function toString(NamingConvention $namingConvention): string { - if (! $this->intersectsWithGenomicRegion($other)) { - return null; - } - - return new self( - $this->chromosome, - max($this->start, $other->start), - min($this->end, $other->end) - ); + return "{$this->chromosome->toString($namingConvention)}:{$this->start}-{$this->end}"; } - public function containsGenomicPosition(GenomicPosition $genomicPosition): bool + public function containsPosition(GenomicPosition $other): bool { - return $this->chromosome->equals($genomicPosition->chromosome) - && $this->positionIsBetweenStartAndEnd($genomicPosition->position); + return $this->chromosome->equals($other->chromosome) + && $this->containsCoordinate($other->position); } - public function containsGenomicRegion(self $genomicRegion): bool + public function containsRegion(self $other): bool { - return $this->chromosome->equals($genomicRegion->chromosome) - && $this->positionIsBetweenStartAndEnd($genomicRegion->start) - && $this->positionIsBetweenStartAndEnd($genomicRegion->end); + return $other->isCoveredBy($this); } - public function isCoveredByGenomicRegion(self $genomicRegion): bool + public function isCoveredBy(self $other): bool { - return $this->chromosome->equals($genomicRegion->chromosome) - && $genomicRegion->start <= $this->start - && $genomicRegion->end >= $this->end; + return $this->chromosome->equals($other->chromosome) + && $other->start <= $this->start + && $other->end >= $this->end; } - public function intersectsWithGenomicRegion(self $genomicRegion): bool + public function intersects(self $other): bool { - return $this->chromosome->equals($genomicRegion->chromosome) - && $this->start <= $genomicRegion->end - && $genomicRegion->start <= $this->end; + return $this->chromosome->equals($other->chromosome) + && $this->start <= $other->end + && $other->start <= $this->end; } - private function positionIsBetweenStartAndEnd(int $position): bool + /** Returns the intersecting region, or null if the regions do not intersect. */ + public function intersection(self $other): ?self { - return $position >= $this->start && $position <= $this->end; + if (! $this->intersects($other)) { + return null; + } + + return new self( + $this->chromosome, + max($this->start, $other->start), + min($this->end, $other->end) + ); } - public function toString(NamingConvention $namingConvention): string + private function containsCoordinate(int $position): bool { - return "{$this->chromosome->toString($namingConvention)}:{$this->start}-{$this->end}"; + return $position >= $this->start && $position <= $this->end; } } diff --git a/tests/ChromosomeTest.php b/tests/ChromosomeTest.php index 35cfd90..2fb2786 100644 --- a/tests/ChromosomeTest.php +++ b/tests/ChromosomeTest.php @@ -2,22 +2,11 @@ use MLL\Utils\Chromosome; use MLL\Utils\NamingConvention; +use PHPUnit\Framework\Attributes\DataProvider; use PHPUnit\Framework\TestCase; final class ChromosomeTest extends TestCase { - public function testToStringUCSC(): void - { - $chromosome = new Chromosome('chr11'); - self::assertSame('chr11', $chromosome->toString(new NamingConvention(NamingConvention::UCSC))); - } - - public function testToStringEnsembl(): void - { - $chromosome = new Chromosome('chr11'); - self::assertSame('11', $chromosome->toString(new NamingConvention(NamingConvention::ENSEMBL))); - } - public function testToStringFromEnsemblInput(): void { $chromosome = new Chromosome('11'); @@ -25,14 +14,26 @@ public function testToStringFromEnsemblInput(): void self::assertSame('chr11', $chromosome->toString(new NamingConvention(NamingConvention::UCSC))); } - public function testValueReturnsCanonicalForm(): void + /** @return iterable */ + public static function canonicalValues(): iterable + { + yield ['chr11', '11']; + yield ['11', '11']; + yield ['chr1', '1']; + yield ['chr22', '22']; + yield ['X', 'X']; + yield ['chrx', 'X']; + yield ['chrY', 'Y']; + yield ['chrM', 'M']; + yield ['MT', 'M']; + yield ['chrMT', 'M']; + } + + /** @dataProvider canonicalValues */ + #[DataProvider('canonicalValues')] + public function testValueReturnsCanonicalForm(string $input, string $expected): void { - self::assertSame('11', (new Chromosome('chr11'))->value()); - self::assertSame('11', (new Chromosome('11'))->value()); - self::assertSame('X', (new Chromosome('chrx'))->value()); - self::assertSame('M', (new Chromosome('chrM'))->value()); - self::assertSame('M', (new Chromosome('MT'))->value()); - self::assertSame('M', (new Chromosome('chrMT'))->value()); + self::assertSame($expected, (new Chromosome($input))->value()); } public function testMitochondrialNormalization(): void @@ -64,6 +65,11 @@ public function testEqualsCaseInsensitive(): void self::assertTrue($upper->equals($lower)); } + public function testNotEqualsDifferentChromosome(): void + { + self::assertFalse((new Chromosome('chr1'))->equals(new Chromosome('chr2'))); + } + public function testCaseInsensitivePrefixDetection(): void { $chr = new Chromosome('CHR11'); @@ -73,25 +79,22 @@ public function testCaseInsensitivePrefixDetection(): void self::assertSame('chr11', $chr2->toString(new NamingConvention(NamingConvention::UCSC))); } - public function testBoundaryChromosomes(): void + /** @return iterable */ + public static function invalidChromosomes(): iterable { - self::assertSame('1', (new Chromosome('chr1'))->value()); - self::assertSame('22', (new Chromosome('chr22'))->value()); - self::assertSame('X', (new Chromosome('X'))->value()); - self::assertSame('Y', (new Chromosome('chrY'))->value()); - } - - public function testRejectsInvalidChromosomeNumbers(): void - { - self::expectException(\InvalidArgumentException::class); - new Chromosome('chr23'); + yield ['']; + yield ['chr0']; + yield ['chr23']; + yield ['FOO11']; + yield [' chr1']; + yield ['chr1 ']; } - public function testFailedInit(): void + /** @dataProvider invalidChromosomes */ + #[DataProvider('invalidChromosomes')] + public function testRejectsInvalidInput(string $input): void { - $chromosomeAsString = 'FOO11'; self::expectException(\InvalidArgumentException::class); - self::expectExceptionMessage("Invalid chromosome: {$chromosomeAsString}. Expected format: chr1-chr22, chrX, chrY, chrM, or without chr prefix."); - new Chromosome($chromosomeAsString); + new Chromosome($input); } } diff --git a/tests/GenomicPositionTest.php b/tests/GenomicPositionTest.php index b389df6..6523235 100644 --- a/tests/GenomicPositionTest.php +++ b/tests/GenomicPositionTest.php @@ -1,34 +1,36 @@ toString(new NamingConvention(NamingConvention::UCSC))); + $position = GenomicPosition::parse('chr11:1'); + self::assertSame('chr11:1', $position->toString(new NamingConvention(NamingConvention::UCSC))); } public function testParseEnsembl(): void { - $genomicPosition = GenomicPosition::parse('11:1'); - self::assertSame('11:1', $genomicPosition->toString(new NamingConvention(NamingConvention::ENSEMBL))); + $position = GenomicPosition::parse('11:1'); + self::assertSame('11:1', $position->toString(new NamingConvention(NamingConvention::ENSEMBL))); } public function testParseHGVSg(): void { - $genomicPosition = GenomicPosition::parse('chr11:g.1'); - self::assertSame('chr11:1', $genomicPosition->toString(new NamingConvention(NamingConvention::UCSC))); + $position = GenomicPosition::parse('chr11:g.1'); + self::assertSame('chr11:1', $position->toString(new NamingConvention(NamingConvention::UCSC))); } public function testOutputInBothConventions(): void { - $genomicPosition = GenomicPosition::parse('chr11:12345'); - self::assertSame('chr11:12345', $genomicPosition->toString(new NamingConvention(NamingConvention::UCSC))); - self::assertSame('11:12345', $genomicPosition->toString(new NamingConvention(NamingConvention::ENSEMBL))); + $position = GenomicPosition::parse('chr11:12345'); + self::assertSame('chr11:12345', $position->toString(new NamingConvention(NamingConvention::UCSC))); + self::assertSame('11:12345', $position->toString(new NamingConvention(NamingConvention::ENSEMBL))); } public function testEquals(): void @@ -44,11 +46,27 @@ public function testEquals(): void ); } - public function testParseOnError(): void + public function testConstructorRejectsNonPositivePosition(): void { - $genomicPositionAsString = '11:1test'; self::expectException(\InvalidArgumentException::class); - self::expectExceptionMessage("Invalid genomic position format: {$genomicPositionAsString}. Expected format: chr1:123456."); - GenomicPosition::parse($genomicPositionAsString); + self::expectExceptionMessage('Position must be positive, got: 0.'); + new GenomicPosition(new Chromosome('chr1'), 0); + } + + /** @return iterable */ + public static function invalidFormats(): iterable + { + yield ['11:1test']; + yield ['chr1:0']; + yield ['chr1:']; + yield ['chr1']; + } + + /** @dataProvider invalidFormats */ + #[DataProvider('invalidFormats')] + public function testParseRejectsInvalidFormat(string $value): void + { + self::expectException(\InvalidArgumentException::class); + GenomicPosition::parse($value); } } diff --git a/tests/GenomicRegionTest.php b/tests/GenomicRegionTest.php index 05b56bf..feced0c 100644 --- a/tests/GenomicRegionTest.php +++ b/tests/GenomicRegionTest.php @@ -3,195 +3,232 @@ use MLL\Utils\GenomicPosition; use MLL\Utils\GenomicRegion; use MLL\Utils\NamingConvention; +use PHPUnit\Framework\Attributes\DataProvider; use PHPUnit\Framework\TestCase; final class GenomicRegionTest extends TestCase { public function testParseUCSC(): void { - $genomicRegion = GenomicRegion::parse('chr11:1-2'); - self::assertSame('chr11:1-2', $genomicRegion->toString(new NamingConvention(NamingConvention::UCSC))); + $region = GenomicRegion::parse('chr11:1-2'); + self::assertSame('chr11:1-2', $region->toString(new NamingConvention(NamingConvention::UCSC))); } public function testParseEnsembl(): void { - $genomicRegion = GenomicRegion::parse('11:1-2'); - self::assertSame('11:1-2', $genomicRegion->toString(new NamingConvention(NamingConvention::ENSEMBL))); + $region = GenomicRegion::parse('11:1-2'); + self::assertSame('11:1-2', $region->toString(new NamingConvention(NamingConvention::ENSEMBL))); } public function testParseHGVSg(): void { - $genomicRegion = GenomicRegion::parse('chr11:g.1-2'); - self::assertSame('chr11:1-2', $genomicRegion->toString(new NamingConvention(NamingConvention::UCSC))); + $region = GenomicRegion::parse('chr11:g.1-2'); + self::assertSame('chr11:1-2', $region->toString(new NamingConvention(NamingConvention::UCSC))); + } + + public function testParseSingleBaseRegion(): void + { + $region = GenomicRegion::parse('chr1:100'); + self::assertSame(100, $region->start); + self::assertSame(100, $region->end); + self::assertSame(1, $region->length()); + self::assertSame('chr1:100-100', $region->toString(new NamingConvention(NamingConvention::UCSC))); } public function testParseOnError(): void { - $genomicRegionAsString = '11:1_2'; + $value = '11:1_2'; self::expectException(\InvalidArgumentException::class); - self::expectExceptionMessage("Invalid genomic region format: {$genomicRegionAsString}. Expected format: chr1:123-456."); - GenomicRegion::parse($genomicRegionAsString); + self::expectExceptionMessage("Invalid genomic region format: {$value}. Expected format: chr1:123-456."); + GenomicRegion::parse($value); } public function testStartGreaterThanEnd(): void { self::expectException(\InvalidArgumentException::class); - self::expectExceptionMessage('End (1) must be greater than start (2)'); + self::expectExceptionMessage('End (1) must not be less than start (2).'); GenomicRegion::parse('11:2-1'); } - public function testContainsGenomicPosition(): void + public function testEquals(): void { - $genomicRegion = GenomicRegion::parse('chr11:g.1-20'); - self::assertTrue($genomicRegion->containsGenomicPosition(GenomicPosition::parse('chr11:20'))); - self::assertFalse($genomicRegion->containsGenomicPosition(GenomicPosition::parse('chr11:21'))); + self::assertTrue( + GenomicRegion::parse('chr11:10-20')->equals(GenomicRegion::parse('11:10-20')) + ); + self::assertFalse( + GenomicRegion::parse('chr11:10-20')->equals(GenomicRegion::parse('chr11:10-21')) + ); + self::assertFalse( + GenomicRegion::parse('chr11:10-20')->equals(GenomicRegion::parse('chr12:10-20')) + ); } - public function testContainsGenomicPositionAcrossNamingConventions(): void + /** @return iterable */ + public static function lengths(): iterable { - $genomicRegion = GenomicRegion::parse('chr11:1-20'); - self::assertTrue($genomicRegion->containsGenomicPosition(GenomicPosition::parse('11:15'))); + yield ['chr11:20-30', 11]; + yield ['chr1:5-5', 1]; + yield ['chr1:1-100', 100]; } - public function testContainsGenomicRegion(): void + /** @dataProvider lengths */ + #[DataProvider('lengths')] + public function testLength(string $region, int $expected): void { - $genomicRegion = GenomicRegion::parse('chr11:g.1-20'); - self::assertTrue($genomicRegion->containsGenomicRegion(GenomicRegion::parse('chr11:19-20'))); - self::assertFalse($genomicRegion->containsGenomicRegion(GenomicRegion::parse('chr11:21-22'))); + self::assertSame($expected, GenomicRegion::parse($region)->length()); } - public function testIsCoveredByGenomicRegion(): void + public function testContainsPosition(): void { - $genomicRegion = GenomicRegion::parse('chr11:g.20-30'); - self::assertTrue($genomicRegion->isCoveredByGenomicRegion(GenomicRegion::parse('chr11:g.15-35'))); - self::assertFalse($genomicRegion->isCoveredByGenomicRegion(GenomicRegion::parse('chr11:g.22-35'))); + $region = GenomicRegion::parse('chr11:g.1-20'); + self::assertTrue($region->containsPosition(GenomicPosition::parse('chr11:20'))); + self::assertFalse($region->containsPosition(GenomicPosition::parse('chr11:21'))); } - public function testIntersectsWithGenomicRegionPartialOverlap(): void + public function testContainsPositionAcrossNamingConventions(): void { - $genomicRegion = GenomicRegion::parse('chr11:g.20-30'); - self::assertTrue($genomicRegion->intersectsWithGenomicRegion(GenomicRegion::parse('chr11:15-25'))); - self::assertTrue($genomicRegion->intersectsWithGenomicRegion(GenomicRegion::parse('chr11:25-35'))); + $region = GenomicRegion::parse('chr11:1-20'); + self::assertTrue($region->containsPosition(GenomicPosition::parse('11:15'))); } - public function testIntersectsWithGenomicRegionFullyWrapped(): void + public function testContainsPositionAtStartBoundary(): void { - $genomicRegion = GenomicRegion::parse('chr11:g.20-30'); - self::assertTrue($genomicRegion->intersectsWithGenomicRegion(GenomicRegion::parse('chr11:g.15-35'))); + $region = GenomicRegion::parse('chr11:10-20'); + self::assertTrue($region->containsPosition(GenomicPosition::parse('chr11:10'))); } - public function testIntersectsWithGenomicRegionNoOverlap(): void + public function testContainsRegion(): void { - $genomicRegion = GenomicRegion::parse('chr11:g.20-30'); - self::assertFalse($genomicRegion->intersectsWithGenomicRegion(GenomicRegion::parse('chr11:15-19'))); + $region = GenomicRegion::parse('chr11:g.1-20'); + self::assertTrue($region->containsRegion(GenomicRegion::parse('chr11:19-20'))); + self::assertFalse($region->containsRegion(GenomicRegion::parse('chr11:21-22'))); } - public function testIntersectsWithAdjacentRegion(): void + public function testIsCoveredBy(): void { - $genomicRegion = GenomicRegion::parse('chr11:10-20'); - self::assertFalse($genomicRegion->intersectsWithGenomicRegion(GenomicRegion::parse('chr11:21-30'))); + $region = GenomicRegion::parse('chr11:g.20-30'); + self::assertTrue($region->isCoveredBy(GenomicRegion::parse('chr11:g.15-35'))); + self::assertFalse($region->isCoveredBy(GenomicRegion::parse('chr11:g.22-35'))); } - public function testIntersectsWithSinglePointOverlap(): void + public function testIsCoveredByIdenticalRegion(): void { - $region = GenomicRegion::parse('chr1:10-20'); - self::assertTrue($region->intersectsWithGenomicRegion(GenomicRegion::parse('chr1:20-30'))); + $region = GenomicRegion::parse('chr11:20-30'); + self::assertTrue($region->isCoveredBy(GenomicRegion::parse('chr11:20-30'))); } - public function testParseSingleBaseRegion(): void + public function testIntersectsPartialOverlap(): void { - $region = GenomicRegion::parse('chr1:100'); - self::assertSame(100, $region->start); - self::assertSame(100, $region->end); - self::assertSame(1, $region->length()); - self::assertSame('chr1:100-100', $region->toString(new NamingConvention(NamingConvention::UCSC))); + $region = GenomicRegion::parse('chr11:g.20-30'); + self::assertTrue($region->intersects(GenomicRegion::parse('chr11:15-25'))); + self::assertTrue($region->intersects(GenomicRegion::parse('chr11:25-35'))); } - public function testIsCoveredByIdenticalRegion(): void + public function testIntersectsNoOverlap(): void { - $region = GenomicRegion::parse('chr11:20-30'); - self::assertTrue($region->isCoveredByGenomicRegion(GenomicRegion::parse('chr11:20-30'))); + $region = GenomicRegion::parse('chr11:g.20-30'); + self::assertFalse($region->intersects(GenomicRegion::parse('chr11:15-19'))); } - public function testContainsPositionAtStartBoundary(): void + public function testIntersectsAdjacentRegion(): void { $region = GenomicRegion::parse('chr11:10-20'); - self::assertTrue($region->containsGenomicPosition(GenomicPosition::parse('chr11:10'))); + self::assertFalse($region->intersects(GenomicRegion::parse('chr11:21-30'))); } - public function testEquals(): void + public function testIntersectsSinglePointOverlap(): void { - self::assertTrue( - GenomicRegion::parse('chr11:10-20')->equals(GenomicRegion::parse('11:10-20')) - ); - self::assertFalse( - GenomicRegion::parse('chr11:10-20')->equals(GenomicRegion::parse('chr11:10-21')) - ); - self::assertFalse( - GenomicRegion::parse('chr11:10-20')->equals(GenomicRegion::parse('chr12:10-20')) - ); - } - - public function testLength(): void - { - self::assertSame(11, GenomicRegion::parse('chr11:20-30')->length()); - self::assertSame(1, GenomicRegion::parse('chr1:5-5')->length()); - self::assertSame(100, GenomicRegion::parse('chr1:1-100')->length()); + $region = GenomicRegion::parse('chr1:10-20'); + self::assertTrue($region->intersects(GenomicRegion::parse('chr1:20-30'))); } - public function testOverlapPartial(): void + public function testIntersectionPartial(): void { $a = GenomicRegion::parse('chr11:10-20'); $b = GenomicRegion::parse('chr11:15-25'); - $overlap = $a->overlap($b); + $overlap = $a->intersection($b); self::assertNotNull($overlap); self::assertTrue($overlap->equals(GenomicRegion::parse('chr11:15-20'))); self::assertSame(6, $overlap->length()); } - public function testOverlapFullyContained(): void + public function testIntersectionFullyContained(): void { $outer = GenomicRegion::parse('chr11:10-30'); $inner = GenomicRegion::parse('chr11:15-20'); - $overlap = $outer->overlap($inner); + $overlap = $outer->intersection($inner); self::assertNotNull($overlap); self::assertTrue($overlap->equals($inner)); } - public function testOverlapSinglePoint(): void + public function testIntersectionSinglePoint(): void { $a = GenomicRegion::parse('chr1:10-20'); $b = GenomicRegion::parse('chr1:20-30'); - $overlap = $a->overlap($b); + $overlap = $a->intersection($b); self::assertNotNull($overlap); self::assertTrue($overlap->equals(GenomicRegion::parse('chr1:20-20'))); self::assertSame(1, $overlap->length()); } - public function testOverlapReturnsNullWhenNoIntersection(): void + public function testIntersectionReturnsNullWhenNoIntersection(): void { $a = GenomicRegion::parse('chr11:10-20'); $b = GenomicRegion::parse('chr11:21-30'); - self::assertNull($a->overlap($b)); + self::assertNull($a->intersection($b)); } - public function testOverlapReturnsNullForDifferentChromosomes(): void + public function testDifferentChromosomesNeverMatch(): void { - $a = GenomicRegion::parse('chr11:10-20'); - $b = GenomicRegion::parse('chr12:10-20'); + $region = GenomicRegion::parse('chr11:1-100'); + $other = GenomicRegion::parse('chr12:10-20'); + self::assertFalse($region->containsPosition(GenomicPosition::parse('chr12:50'))); + self::assertFalse($region->containsRegion($other)); + self::assertFalse($region->intersects($other)); + self::assertNull($region->intersection($other)); + } - self::assertNull($a->overlap($b)); + public function testParseRejectsPositionZero(): void + { + self::expectException(\InvalidArgumentException::class); + GenomicRegion::parse('chr1:0-10'); } - public function testDifferentChromosomesNeverMatch(): void + public function testContainsRegionIsInverseOfIsCoveredBy(): void { - $region = GenomicRegion::parse('chr11:1-100'); - self::assertFalse($region->containsGenomicPosition(GenomicPosition::parse('chr12:50'))); - self::assertFalse($region->containsGenomicRegion(GenomicRegion::parse('chr12:10-20'))); - self::assertFalse($region->intersectsWithGenomicRegion(GenomicRegion::parse('chr12:10-20'))); + $outer = GenomicRegion::parse('chr11:10-30'); + $inner = GenomicRegion::parse('chr11:15-20'); + + self::assertSame( + $outer->containsRegion($inner), + $inner->isCoveredBy($outer) + ); + self::assertSame( + $inner->containsRegion($outer), + $outer->isCoveredBy($inner) + ); + } + + public function testIsCoveredByDifferentChromosomes(): void + { + $region = GenomicRegion::parse('chr11:10-20'); + self::assertFalse($region->isCoveredBy(GenomicRegion::parse('chr12:1-100'))); + } + + public function testIntersectionIsCommutative(): void + { + $a = GenomicRegion::parse('chr11:10-20'); + $b = GenomicRegion::parse('chr11:15-25'); + + $ab = $a->intersection($b); + $ba = $b->intersection($a); + + self::assertNotNull($ab); + self::assertNotNull($ba); + self::assertTrue($ab->equals($ba)); } } diff --git a/tests/NamingConventionTest.php b/tests/NamingConventionTest.php new file mode 100644 index 0000000..df4e59f --- /dev/null +++ b/tests/NamingConventionTest.php @@ -0,0 +1,36 @@ +value); + } + + public function testEnsembl(): void + { + $convention = new NamingConvention(NamingConvention::ENSEMBL); + self::assertSame('ENSEMBL', $convention->value); + } + + /** @return iterable */ + public static function invalidValues(): iterable + { + yield ['HGVS']; + yield ['']; + yield ['ucsc']; + } + + /** @dataProvider invalidValues */ + #[DataProvider('invalidValues')] + public function testRejectsInvalidValue(string $value): void + { + self::expectException(\InvalidArgumentException::class); + new NamingConvention($value); + } +}