Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
8fc46f3
Refactor AI connection handling and improve job deletion logic
rostilos Jan 26, 2026
7c78057
feat: Add pre-acquired lock key to prevent double-locking in PR analy…
rostilos Jan 26, 2026
6d80d71
feat: Implement handling for AnalysisLockedException and DiffTooLarge…
rostilos Jan 26, 2026
e2c1474
feat: Re-fetch job entities in transaction methods to handle detached…
rostilos Jan 27, 2026
342c4fa
feat: Update JobService and WebhookAsyncProcessor to manage job entit…
rostilos Jan 27, 2026
409c42d
feat: Enable transaction management in processWebhookAsync to support…
rostilos Jan 27, 2026
11c983c
feat: Re-fetch job entities in JobService methods to ensure consisten…
rostilos Jan 27, 2026
c75eaba
feat: Add @Transactional annotation to processWebhookAsync for lazy l…
rostilos Jan 27, 2026
8afc0ad
feat: Implement self-injection in WebhookAsyncProcessor for proper tr…
rostilos Jan 27, 2026
402486b
feat: Enhance logging and error handling in processWebhookAsync for i…
rostilos Jan 27, 2026
fdcdca0
feat: Implement webhook deduplication service to prevent duplicate co…
rostilos Jan 27, 2026
e321361
feat: Enhance job deletion process with logging and persistence conte…
rostilos Jan 27, 2026
ebd0fad
feat: Improve job deletion process with enhanced logging and error ha…
rostilos Jan 27, 2026
092b361
feat: Add method to delete job by ID in JobRepository and update JobS…
rostilos Jan 27, 2026
61d2620
feat: Simplify job handling by marking ignored jobs as SKIPPED instea…
rostilos Jan 27, 2026
704a7a2
feat: Enhance AI connection logging and refactor placeholder manageme…
rostilos Jan 28, 2026
2e42ebc
feat: Add logging for LLM creation and enhance diff snippet extractio…
rostilos Jan 28, 2026
d036fa9
feat: Implement AST-based code splitter and scoring configuration
rostilos Jan 28, 2026
642bda0
feat: Enhance lock management in PullRequestAnalysisProcessor and imp…
rostilos Jan 28, 2026
5add89c
feat: Enhance AST processing and metadata extraction in RAG pipeline …
rostilos Jan 28, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions java-ecosystem/libs/analysis-engine/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,12 @@
<artifactId>okhttp</artifactId>
</dependency>

<!-- JTokkit for token counting -->
<dependency>
<groupId>com.knuddels</groupId>
<artifactId>jtokkit</artifactId>
</dependency>

<!-- Test Dependencies -->
<dependency>
<groupId>org.junit.jupiter</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
requires com.fasterxml.jackson.annotation;
requires jakarta.persistence;
requires kotlin.stdlib;
requires jtokkit;

exports org.rostilos.codecrow.analysisengine.aiclient;
exports org.rostilos.codecrow.analysisengine.config;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ public class PrProcessRequest implements AnalysisProcessRequest {
public String prAuthorId;

public String prAuthorUsername;

/**
* Optional pre-acquired lock key. If set, the processor will skip lock acquisition
* and use this lock key directly. This prevents double-locking when the webhook handler
* has already acquired the lock before calling the processor.
*/
public String preAcquiredLockKey;


public Long getProjectId() {
Expand Down Expand Up @@ -64,4 +71,6 @@ public String getSourceBranchName() {
public String getPrAuthorId() { return prAuthorId; }

public String getPrAuthorUsername() { return prAuthorUsername; }

public String getPreAcquiredLockKey() { return preAcquiredLockKey; }
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package org.rostilos.codecrow.analysisengine.exception;

/**
* Exception thrown when a diff exceeds the configured token limit for analysis.
* This is a soft skip - the analysis is not performed but the job is not marked as failed.
*/
public class DiffTooLargeException extends RuntimeException {

private final int estimatedTokens;
private final int maxAllowedTokens;
private final Long projectId;
private final Long pullRequestId;

public DiffTooLargeException(int estimatedTokens, int maxAllowedTokens, Long projectId, Long pullRequestId) {
super(String.format(
"PR diff exceeds token limit: estimated %d tokens, max allowed %d tokens (project=%d, PR=%d)",
estimatedTokens, maxAllowedTokens, projectId, pullRequestId
));
this.estimatedTokens = estimatedTokens;
this.maxAllowedTokens = maxAllowedTokens;
this.projectId = projectId;
this.pullRequestId = pullRequestId;
}

public int getEstimatedTokens() {
return estimatedTokens;
}

public int getMaxAllowedTokens() {
return maxAllowedTokens;
}

public Long getProjectId() {
return projectId;
}

public Long getPullRequestId() {
return pullRequestId;
}

/**
* Returns the percentage of the token limit that would be used.
*/
public double getUtilizationPercentage() {
return maxAllowedTokens > 0 ? (estimatedTokens * 100.0 / maxAllowedTokens) : 0;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -91,34 +91,45 @@
// Publish analysis started event
publishAnalysisStartedEvent(project, request, correlationId);

Optional<String> lockKey = analysisLockService.acquireLockWithWait(
project,
request.getSourceBranchName(),
AnalysisLockType.PR_ANALYSIS,
request.getCommitHash(),
request.getPullRequestId(),
consumer::accept
);

if (lockKey.isEmpty()) {
String message = String.format(
"Failed to acquire lock after %d minutes for project=%s, PR=%d, branch=%s. Another analysis is still in progress.",
analysisLockService.getLockWaitTimeoutMinutes(),
project.getId(),
request.getPullRequestId(),
request.getSourceBranchName()
);
log.warn(message);

// Publish failed event due to lock timeout
publishAnalysisCompletedEvent(project, request, correlationId, startTime,
AnalysisCompletedEvent.CompletionStatus.FAILED, 0, 0, "Lock acquisition timeout");

throw new AnalysisLockedException(
AnalysisLockType.PR_ANALYSIS.name(),
// Check if a lock was already acquired by the caller (e.g., webhook handler)
// to prevent double-locking which causes unnecessary 2-minute waits
String lockKey;
boolean isPreAcquired = false;
if (request.getPreAcquiredLockKey() != null && !request.getPreAcquiredLockKey().isBlank()) {
lockKey = request.getPreAcquiredLockKey();
isPreAcquired = true;
log.info("Using pre-acquired lock: {} for project={}, PR={}", lockKey, project.getId(), request.getPullRequestId());
} else {
Optional<String> acquiredLock = analysisLockService.acquireLockWithWait(
project,
request.getSourceBranchName(),
project.getId()
AnalysisLockType.PR_ANALYSIS,
request.getCommitHash(),
request.getPullRequestId(),
consumer::accept
);

if (acquiredLock.isEmpty()) {
String message = String.format(
"Failed to acquire lock after %d minutes for project=%s, PR=%d, branch=%s. Another analysis is still in progress.",
analysisLockService.getLockWaitTimeoutMinutes(),
project.getId(),
request.getPullRequestId(),
request.getSourceBranchName()
);
log.warn(message);

// Publish failed event due to lock timeout
publishAnalysisCompletedEvent(project, request, correlationId, startTime,
AnalysisCompletedEvent.CompletionStatus.FAILED, 0, 0, "Lock acquisition timeout");

throw new AnalysisLockedException(
AnalysisLockType.PR_ANALYSIS.name(),
request.getSourceBranchName(),
project.getId()
);
}
lockKey = acquiredLock.get();
}

try {
Expand Down Expand Up @@ -216,7 +227,9 @@

return Map.of("status", "error", "message", e.getMessage());
} finally {
analysisLockService.releaseLock(lockKey.get());
if (!isPreAcquired) {

Check warning on line 230 in java-ecosystem/libs/analysis-engine/src/main/java/org/rostilos/codecrow/analysisengine/processor/analysis/PullRequestAnalysisProcessor.java

View check run for this annotation

CodeCrow-Local / CodeCrow Analysis

MEDIUM severity issue

The current implementation skips lock release if 'isPreAcquired' is true. While this avoids double-releasing, it creates a split responsibility where the owner of the lock must be perfectly synchronized with the processor lifecycle. If the processor is the primary executor of the analysis, it is often safer for it to own the lifecycle of the lock it uses, or explicitly document that the caller must release it.
Raw output
Suggested fix:
Ensure that the documentation for the API/Service explicitly states that callers providing a pre-acquired lock are responsible for its cleanup even if this processor fails.
analysisLockService.releaseLock(lockKey);
}
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
package org.rostilos.codecrow.analysisengine.util;

import com.knuddels.jtokkit.Encodings;
import com.knuddels.jtokkit.api.Encoding;
import com.knuddels.jtokkit.api.EncodingRegistry;
import com.knuddels.jtokkit.api.EncodingType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Utility class for estimating token counts in text content.
* Uses the cl100k_base encoding (used by GPT-4, Claude, and most modern LLMs).
*/
public class TokenEstimator {
private static final Logger log = LoggerFactory.getLogger(TokenEstimator.class);

private static final EncodingRegistry ENCODING_REGISTRY = Encodings.newDefaultEncodingRegistry();
private static final Encoding ENCODING = ENCODING_REGISTRY.getEncoding(EncodingType.CL100K_BASE);

/**
* Estimate the number of tokens in the given text.
*
* @param text The text to estimate tokens for
* @return The estimated token count, or 0 if text is null/empty
*/
public static int estimateTokens(String text) {
if (text == null || text.isEmpty()) {
return 0;
}
try {
return ENCODING.countTokens(text);
} catch (Exception e) {
log.warn("Failed to count tokens, using fallback estimation: {}", e.getMessage());
// Fallback: rough estimate of ~4 characters per token
return text.length() / 4;
}
}

/**
* Check if the estimated token count exceeds the given limit.
*
* @param text The text to check
* @param maxTokens The maximum allowed tokens
* @return true if the text exceeds the limit, false otherwise
*/
public static boolean exceedsLimit(String text, int maxTokens) {
return estimateTokens(text) > maxTokens;
}

/**
* Result of a token estimation check with details.
*/
public record TokenEstimationResult(
int estimatedTokens,
int maxAllowedTokens,
boolean exceedsLimit,
double utilizationPercentage
) {
public String toLogString() {
return String.format("Tokens: %d / %d (%.1f%%) - %s",
estimatedTokens, maxAllowedTokens, utilizationPercentage,
exceedsLimit ? "EXCEEDS LIMIT" : "within limit");
}
}

/**
* Estimate tokens and check against limit, returning detailed result.
*
* @param text The text to check
* @param maxTokens The maximum allowed tokens
* @return Detailed estimation result
*/
public static TokenEstimationResult estimateAndCheck(String text, int maxTokens) {
int estimated = estimateTokens(text);
double utilization = maxTokens > 0 ? (estimated * 100.0 / maxTokens) : 0;

Check notice on line 75 in java-ecosystem/libs/analysis-engine/src/main/java/org/rostilos/codecrow/analysisengine/util/TokenEstimator.java

View check run for this annotation

CodeCrow-Local / CodeCrow Analysis

LOW severity issue

The utilization percentage calculation uses integer division for the ratio (estimated / maxTokens) if not careful, though here '100.0' forces double. However, it doesn't handle the case where maxTokens is zero or negative gracefully beyond returning 0, and the logic for 'exceedsLimit' is duplicated.
Raw output
Suggested fix:
Ensure the division logic is robust and use the existing exceedsLimit method to maintain DRY principles.
return new TokenEstimationResult(
estimated,
maxTokens,
estimated > maxTokens,
utilization
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@ public record AIConnectionDTO(
AIProviderKey providerKey,
String aiModel,
OffsetDateTime createdAt,
OffsetDateTime updatedAt,
int tokenLimitation
OffsetDateTime updatedAt
) {

public static AIConnectionDTO fromAiConnection(AIConnection aiConnection) {
Expand All @@ -22,8 +21,7 @@ public static AIConnectionDTO fromAiConnection(AIConnection aiConnection) {
aiConnection.getProviderKey(),
aiConnection.getAiModel(),
aiConnection.getCreatedAt(),
aiConnection.getUpdatedAt(),
aiConnection.getTokenLimitation()
aiConnection.getUpdatedAt()
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ public record ProjectDTO(
String installationMethod,
CommentCommandsConfigDTO commentCommandsConfig,
Boolean webhooksConfigured,
Long qualityGateId
Long qualityGateId,
Integer maxAnalysisTokenLimit
) {
public static ProjectDTO fromProject(Project project) {
Long vcsConnectionId = null;
Expand Down Expand Up @@ -123,6 +124,9 @@ public static ProjectDTO fromProject(Project project) {
if (project.getVcsRepoBinding() != null) {
webhooksConfigured = project.getVcsRepoBinding().isWebhooksConfigured();
}

// Get maxAnalysisTokenLimit from config
Integer maxAnalysisTokenLimit = config != null ? config.maxAnalysisTokenLimit() : ProjectConfig.DEFAULT_MAX_ANALYSIS_TOKEN_LIMIT;

return new ProjectDTO(
project.getId(),
Expand All @@ -146,7 +150,8 @@ public static ProjectDTO fromProject(Project project) {
installationMethod,
commentCommandsConfigDTO,
webhooksConfigured,
project.getQualityGate() != null ? project.getQualityGate().getId() : null
project.getQualityGate() != null ? project.getQualityGate().getId() : null,
maxAnalysisTokenLimit
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,6 @@ public class AIConnection {
@Column(name = "updated_at", nullable = false)
private OffsetDateTime updatedAt = OffsetDateTime.now();

@Column(name= "token_limitation", nullable = false)
private int tokenLimitation = 100000;

@PreUpdate
public void onUpdate() {
this.updatedAt = OffsetDateTime.now();
Expand Down Expand Up @@ -98,12 +95,4 @@ public OffsetDateTime getCreatedAt() {
public OffsetDateTime getUpdatedAt() {
return updatedAt;
}

public void setTokenLimitation(int tokenLimitation) {
this.tokenLimitation = tokenLimitation;
}

public int getTokenLimitation() {
return tokenLimitation;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,15 @@
this.configuration = configuration;
}

/**
* Returns the effective project configuration.
* If configuration is null, returns a new default ProjectConfig.
* This ensures callers always get a valid config with default values.
*/
public org.rostilos.codecrow.core.model.project.config.ProjectConfig getEffectiveConfig() {

Check notice on line 230 in java-ecosystem/libs/core/src/main/java/org/rostilos/codecrow/core/model/project/Project.java

View check run for this annotation

CodeCrow-Local / CodeCrow Analysis

LOW severity issue

The method 'getEffectiveConfig' creates a new 'ProjectConfig' instance every time it is called if 'configuration' is null. If this method is called frequently (e.g., in a loop or during high-throughput analysis), it may lead to unnecessary object allocations.
Raw output
Suggested fix:
Consider initializing 'configuration' with a default value at the field level if the project is always expected to have at least default settings, or document the instantiation behavior.
return configuration != null ? configuration : new org.rostilos.codecrow.core.model.project.config.ProjectConfig();
}

public org.rostilos.codecrow.core.model.branch.Branch getDefaultBranch() {
return defaultBranch;
}
Expand Down
Loading