diff --git a/app/Services/QdrantService.php b/app/Services/QdrantService.php index f4028d4..eadf9c7 100644 --- a/app/Services/QdrantService.php +++ b/app/Services/QdrantService.php @@ -141,6 +141,25 @@ public function upsert(array $entry, string $project = 'default', bool $checkDup // Check for duplicates when requested (for new entries) if ($checkDuplicates) { + // Fingerprint dedup: if entry has a fingerprint tag, check for existing entries with same fingerprint + $fingerprint = $this->extractFingerprint($entry['tags'] ?? []); + if ($fingerprint !== null) { + $existing = $this->findByFingerprint($fingerprint, $project); + if ($existing !== null) { + throw DuplicateEntryException::hashMatch($existing, $fingerprint); + } + } + + // Title+commit dedup: same title and commit hash means same CI event captured twice + $commitHash = $entry['commit'] ?? null; + if (is_string($commitHash) && $commitHash !== '') { + $existing = $this->findByTitleAndCommit($entry['title'], $commitHash, $project); + if ($existing !== null) { + throw DuplicateEntryException::hashMatch($existing, $entry['title'].'@'.$commitHash); + } + } + + // Content hash dedup (existing behavior) $contentHash = hash('sha256', $entry['title'].$entry['content']); $similar = $this->findSimilar($vector, $project, 0.95); @@ -172,6 +191,7 @@ public function upsert(array $entry, string $project = 'default', bool $checkDup 'updated_at' => $entry['updated_at'] ?? now()->toIso8601String(), 'last_verified' => $entry['last_verified'] ?? null, 'evidence' => $entry['evidence'] ?? null, + 'commit' => $entry['commit'] ?? null, 'superseded_by' => $entry['superseded_by'] ?? null, 'superseded_date' => $entry['superseded_date'] ?? null, 'superseded_reason' => $entry['superseded_reason'] ?? null, @@ -881,6 +901,75 @@ public function listCollections(): array )); } + /** + * Extract fingerprint value from tags array. + * + * Fingerprint tags follow the format "fingerprint:{hash}". + * + * @param array $tags + */ + private function extractFingerprint(array $tags): ?string + { + foreach ($tags as $tag) { + if (str_starts_with($tag, 'fingerprint:')) { + return $tag; + } + } + + return null; + } + + /** + * Find an existing entry with the same fingerprint tag. + */ + private function findByFingerprint(string $fingerprint, string $project): string|int|null + { + $filter = [ + 'must' => [ + ['key' => 'tags', 'match' => ['value' => $fingerprint]], + ['is_empty' => ['key' => 'superseded_by']], + ], + ]; + + $response = $this->connector->send( + new ScrollPoints($this->getCollectionName($project), 1, $filter, null) + ); + + if (! $response->successful()) { + return null; + } + + $points = $response->json()['result']['points'] ?? []; + + return $points !== [] ? $points[0]['id'] : null; + } + + /** + * Find an existing entry with the same title and commit hash. + */ + private function findByTitleAndCommit(string $title, string $commit, string $project): string|int|null + { + $filter = [ + 'must' => [ + ['key' => 'title', 'match' => ['text' => $title]], + ['key' => 'commit', 'match' => ['value' => $commit]], + ['is_empty' => ['key' => 'superseded_by']], + ], + ]; + + $response = $this->connector->send( + new ScrollPoints($this->getCollectionName($project), 1, $filter, null) + ); + + if (! $response->successful()) { + return null; + } + + $points = $response->json()['result']['points'] ?? []; + + return $points !== [] ? $points[0]['id'] : null; + } + /** * Get collection name for project namespace. */ diff --git a/tests/Unit/Services/QdrantServiceTest.php b/tests/Unit/Services/QdrantServiceTest.php index 7698068..5252e8c 100644 --- a/tests/Unit/Services/QdrantServiceTest.php +++ b/tests/Unit/Services/QdrantServiceTest.php @@ -1150,6 +1150,134 @@ function mockCollectionExists(Mockery\MockInterface $connector, int $times = 1): expect($this->service->upsert($entry, 'default', true))->toBeTrue(); }); + it('throws when fingerprint tag matches existing entry', function (): void { + $this->mockEmbedding->shouldReceive('generate') + ->with('Test Title Test content') + ->once() + ->andReturn([0.1, 0.2, 0.3]); + + mockCollectionExists($this->mockConnector); + + // Mock findByFingerprint scroll returning a match + $scrollResponse = createMockResponse(true, 200, [ + 'result' => [ + 'points' => [ + ['id' => 'existing-fingerprint-id'], + ], + ], + ]); + $this->mockConnector->shouldReceive('send') + ->with(Mockery::type(ScrollPoints::class)) + ->once() + ->andReturn($scrollResponse); + + $entry = [ + 'id' => 'new-id', + 'title' => 'Test Title', + 'content' => 'Test content', + 'tags' => ['fingerprint:abc123', 'other-tag'], + ]; + + expect(fn () => $this->service->upsert($entry, 'default', true)) + ->toThrow(DuplicateEntryException::class); + }); + + it('throws when title and commit hash match existing entry', function (): void { + $this->mockEmbedding->shouldReceive('generate') + ->with('Test Title Test content') + ->once() + ->andReturn([0.1, 0.2, 0.3]); + + mockCollectionExists($this->mockConnector); + + // Mock findByTitleAndCommit scroll returning a match + $scrollResponse = createMockResponse(true, 200, [ + 'result' => [ + 'points' => [ + ['id' => 'existing-commit-id'], + ], + ], + ]); + $this->mockConnector->shouldReceive('send') + ->with(Mockery::type(ScrollPoints::class)) + ->once() + ->andReturn($scrollResponse); + + $entry = [ + 'id' => 'new-id', + 'title' => 'Test Title', + 'content' => 'Test content', + 'commit' => 'abc1234', + ]; + + expect(fn () => $this->service->upsert($entry, 'default', true)) + ->toThrow(DuplicateEntryException::class); + }); + + it('proceeds when fingerprint has no match', function (): void { + $this->mockEmbedding->shouldReceive('generate') + ->with('Unique Title Unique content') + ->once() + ->andReturn([0.1, 0.2, 0.3]); + + mockCollectionExists($this->mockConnector, 2); + + // Mock findByFingerprint scroll returning no match + $scrollResponse = createMockResponse(true, 200, [ + 'result' => ['points' => []], + ]); + $this->mockConnector->shouldReceive('send') + ->with(Mockery::type(ScrollPoints::class)) + ->once() + ->andReturn($scrollResponse); + + // Mock findSimilar returning no results (content hash check) + $searchResponse = createMockResponse(true, 200, ['result' => []]); + $this->mockConnector->shouldReceive('send') + ->with(Mockery::type(SearchPoints::class)) + ->once() + ->andReturn($searchResponse); + + $upsertResponse = createMockResponse(true); + $this->mockConnector->shouldReceive('send') + ->with(Mockery::type(UpsertPoints::class)) + ->once() + ->andReturn($upsertResponse); + + $entry = [ + 'id' => 'new-id', + 'title' => 'Unique Title', + 'content' => 'Unique content', + 'tags' => ['fingerprint:unique123'], + ]; + + expect($this->service->upsert($entry, 'default', true))->toBeTrue(); + }); + + it('stores commit field in payload', function (): void { + $this->mockEmbedding->shouldReceive('generate') + ->with('Test Title Test content') + ->once() + ->andReturn([0.1, 0.2, 0.3]); + + mockCollectionExists($this->mockConnector); + + $upsertResponse = createMockResponse(true); + $this->mockConnector->shouldReceive('send') + ->with(Mockery::type(UpsertPoints::class)) + ->once() + ->andReturn($upsertResponse); + + $entry = [ + 'id' => 'test-id', + 'title' => 'Test Title', + 'content' => 'Test content', + 'commit' => 'abc1234def', + ]; + + expect($this->service->upsert($entry, 'default', false))->toBeTrue(); + }); + it('stores superseded fields in payload', function (): void { $this->mockEmbedding->shouldReceive('generate') ->with('Test Title Test content')