diff --git a/src/Chromosome.php b/src/Chromosome.php new file mode 100644 index 0000000..784b271 --- /dev/null +++ b/src/Chromosome.php @@ -0,0 +1,37 @@ +namingConvention = $matches[1] === 'chr' + ? new NamingConvention(NamingConvention::UCSC) + : new NamingConvention(NamingConvention::ENSEMBL); + + $this->value = strtoupper($matches[2]); + } + + public function toString(?NamingConvention $namingConvention = null): string + { + $namingConvention ??= $this->namingConvention; + + switch ($namingConvention->value) { + case NamingConvention::ENSEMBL: + return $this->value === 'M' ? 'MT' : $this->value; + case NamingConvention::UCSC: + return "chr{$this->value}"; + default: + throw new \InvalidArgumentException("No toString logic implemented for valid naming convention: {$namingConvention->value}"); + } + } +} diff --git a/src/GenomicPosition.php b/src/GenomicPosition.php new file mode 100644 index 0000000..f354d9e --- /dev/null +++ b/src/GenomicPosition.php @@ -0,0 +1,35 @@ +chromosome = $chromosome; + $this->position = $position; + } + + /** @example GenomicPosition::parse('chr1:123456') */ + public static function parse(string $genomicPosition): self + { + if (\Safe\preg_match('/^(.+):(g\.|)(\d+)$/', $genomicPosition, $matches) === 0) { + throw new \InvalidArgumentException("Invalid genomic position format: {$genomicPosition}. Expected format: chr1:123456."); + } + + return new self(new Chromosome($matches[1]), (int) $matches[3]); + } + + public function toString(?NamingConvention $namingConvention = null): string + { + return "{$this->chromosome->toString($namingConvention)}:{$this->position}"; + } +} diff --git a/src/GenomicRegion.php b/src/GenomicRegion.php new file mode 100644 index 0000000..0195bcd --- /dev/null +++ b/src/GenomicRegion.php @@ -0,0 +1,89 @@ + $end) { + throw new \InvalidArgumentException("End ({$end}) must be greater than start ({$start})"); + } + + $this->chromosome = $chromosome; + $this->start = $start; + $this->end = $end; + } + + public static function parse(string $genomicRegion): self + { + if (preg_match('/^(.+):(g\.|)(\d+)(-(\d+)|)$/', $genomicRegion, $matches) === 0) { + throw new \InvalidArgumentException("Invalid genomic region format: {$genomicRegion}. Expected format: chr1:123-456."); + } + + return new self( + new Chromosome($matches[1]), + (int) $matches[3], + (int) ($matches[5] ?? $matches[3]) + ); + } + + public function containsGenomicPosition(GenomicPosition $genomicPosition): bool + { + return $this->chromosome->toString() === $genomicPosition->chromosome->toString() + && $this->positionIsBetweenStartAndEnd($genomicPosition->position); + } + + public function containsGenomicRegion(GenomicRegion $genomicRegion): bool + { + return $this->chromosome->toString() === $genomicRegion->chromosome->toString() + && $this->positionIsBetweenStartAndEnd($genomicRegion->start) + && $this->positionIsBetweenStartAndEnd($genomicRegion->end); + } + + public function isCoveredByGenomicRegion(GenomicRegion $genomicRegion): bool + { + return $this->chromosome->toString() === $genomicRegion->chromosome->toString() + && $genomicRegion->start <= $this->start + && $genomicRegion->end >= $this->end; + } + + public function intersectsWithGenomicRegion(GenomicRegion $genomicRegion): bool + { + return $this->chromosome->toString() === $genomicRegion->chromosome->toString() + && ( + $this->isCoveredByGenomicRegion($genomicRegion) + || $this->positionIsBetweenStartAndEnd($genomicRegion->start) + || $this->positionIsBetweenStartAndEnd($genomicRegion->end) + ); + } + + private function positionIsBetweenStartAndEnd(int $position): bool + { + return $position >= $this->start && $position <= $this->end; + } + + public function toString(?NamingConvention $namingConvention = null): string + { + return "{$this->chromosome->toString($namingConvention)}:{$this->start}-{$this->end}"; + } +} diff --git a/src/NamingConvention.php b/src/NamingConvention.php new file mode 100644 index 0000000..be0d5cb --- /dev/null +++ b/src/NamingConvention.php @@ -0,0 +1,23 @@ +value = $value; + break; + default: + throw new \InvalidArgumentException("Invalid naming convention: {$value}"); + } + } +} diff --git a/tests/ChromosomeTest.php b/tests/ChromosomeTest.php new file mode 100644 index 0000000..da9ac22 --- /dev/null +++ b/tests/ChromosomeTest.php @@ -0,0 +1,40 @@ +toString()); + } + + public function testToStringForEnsembl(): void + { + $chromosome = new Chromosome('chr11'); + self::assertSame('11', $chromosome->toString(new NamingConvention(NamingConvention::ENSEMBL))); + } + + public function testInitWithUCSC(): void + { + $chromosome = new Chromosome('11'); + self::assertSame('11', $chromosome->toString()); + } + + public function testToStringWithUCSCAndMitochondrialChromosome(): void + { + $chromosome = new Chromosome('chrM'); + self::assertSame('MT', $chromosome->toString(new NamingConvention(NamingConvention::ENSEMBL))); + } + + public function testFailedInit(): void + { + $chromosomeAsString = 'FOO11'; + self::expectException(\InvalidArgumentException::class); + self::expectExceptionMessage("Invalid chromosome: {$chromosomeAsString}. Expected format: chr1-chr22, chrX, chrY, chrM, or without chr prefix."); + new Chromosome($chromosomeAsString); + } +} diff --git a/tests/GenomicPositionTest.php b/tests/GenomicPositionTest.php new file mode 100644 index 0000000..2a9e06a --- /dev/null +++ b/tests/GenomicPositionTest.php @@ -0,0 +1,33 @@ +toString()); + } + + public function testParseOnSuccessGRC37(): void + { + $genomicPosition = GenomicPosition::parse('11:1'); + self::assertSame('11:1', $genomicPosition->toString()); + } + + public function testParseOnSuccessHGVSg(): void + { + $genomicPosition = GenomicPosition::parse('chr11:g.1'); + self::assertSame('chr11:1', $genomicPosition->toString()); + } + + public function testParseOnError(): void + { + $genomicPositionAsString = '11:1test'; + self::expectException(\InvalidArgumentException::class); + self::expectExceptionMessage("Invalid genomic position format: {$genomicPositionAsString}. Expected format: chr1:123456."); + GenomicPosition::parse($genomicPositionAsString); + } +} diff --git a/tests/GenomicRegionTest.php b/tests/GenomicRegionTest.php new file mode 100644 index 0000000..f98b9da --- /dev/null +++ b/tests/GenomicRegionTest.php @@ -0,0 +1,84 @@ +toString()); + } + + public function testParseOnSuccessEnsembl(): void + { + $genomicRegion = GenomicRegion::parse('11:1-2'); + self::assertSame('11:1-2', $genomicRegion->toString()); + } + + public function testParseOnSuccessHGVSg(): void + { + $genomicRegion = GenomicRegion::parse('chr11:g.1-2'); + self::assertSame('chr11:1-2', $genomicRegion->toString()); + } + + public function testParseOnError(): void + { + $genomicRegionAsString = '11:1_2'; + self::expectException(\InvalidArgumentException::class); + self::expectExceptionMessage("Invalid genomic region format: {$genomicRegionAsString}. Expected format: chr1:123-456."); + GenomicRegion::parse($genomicRegionAsString); + } + + public function testStartIsGerateThenEnd(): void + { + $genomicRegionAsString = '11:2-1'; + self::expectException(\InvalidArgumentException::class); + self::expectExceptionMessage('End (1) must be greater than start (2)'); + GenomicRegion::parse($genomicRegionAsString); + } + + public function testContainsGenomicPositionIsTrue(): void + { + $genomicRegion = GenomicRegion::parse('chr11:g.1-20'); + self::assertTrue($genomicRegion->containsGenomicPosition(GenomicPosition::parse('chr11:20'))); + } + + public function testContainsGenomicPositionIsFalse(): void + { + $genomicRegion = GenomicRegion::parse('chr11:g.1-20'); + self::assertFalse($genomicRegion->containsGenomicPosition(GenomicPosition::parse('chr11:21'))); + } + + public function testContainsGenomicRegionIsTrue(): void + { + $genomicRegion = GenomicRegion::parse('chr11:g.1-20'); + self::assertTrue($genomicRegion->containsGenomicRegion(GenomicRegion::parse('chr11:19-20'))); + } + + public function testContainsGenomicRegionIsFalse(): void + { + $genomicRegion = GenomicRegion::parse('chr11:g.1-20'); + self::assertFalse($genomicRegion->containsGenomicRegion(GenomicRegion::parse('chr11:21-22'))); + } + + public function testCoversGenomicRegionIsTrue(): void + { + $genomicRegion = GenomicRegion::parse('chr11:g.20-30'); + self::assertTrue($genomicRegion->isCoveredByGenomicRegion(GenomicRegion::parse('chr11:g.15-35'))); + } + + public function testIntersectsFullyWithGenomicRegionIsTrue(): void + { + $genomicRegion = GenomicRegion::parse('chr11:g.20-30'); + self::assertTrue($genomicRegion->intersectsWithGenomicRegion(GenomicRegion::parse('chr11:g.15-35'))); + } + + public function testIntersectsWithGenomicRegionIsFalse(): void + { + $genomicRegion = GenomicRegion::parse('chr11:g.20-30'); + self::assertFalse($genomicRegion->intersectsWithGenomicRegion(GenomicRegion::parse('chr11:15-19'))); + } +}