diff --git a/src/main/java/com/techfork/domain/recommendation/config/RecommendationProperties.java b/src/main/java/com/techfork/domain/recommendation/config/RecommendationProperties.java index 2505bf8b..15d6a996 100644 --- a/src/main/java/com/techfork/domain/recommendation/config/RecommendationProperties.java +++ b/src/main/java/com/techfork/domain/recommendation/config/RecommendationProperties.java @@ -15,16 +15,20 @@ @ConfigurationProperties(prefix = "recommendation") public class RecommendationProperties { - private Integer knnSearchSize = 80; + private Integer knnSearchSize = 50; - private Integer numCandidates = 180; + private Integer numCandidates = 150; - private Integer mmrCandidateSize = 80; + private Integer mmrCandidateSize = 60; private Integer mmrFinalSize = 30; private Double lambda = 0.95; + private Integer mmrFirstTopK = 5; + + private Integer mmrTopK = 3; + private Integer activeUserHours = 24; private EmbeddingWeights embeddingWeights = new EmbeddingWeights(); @@ -36,8 +40,8 @@ public class RecommendationProperties { @NoArgsConstructor @AllArgsConstructor public static class EmbeddingWeights { - private Float title = 0.4f; - private Float summary = 0.4f; + private Float title = 0.6f; + private Float summary = 0.2f; private Float content = 0.2f; } diff --git a/src/main/java/com/techfork/domain/recommendation/service/MmrService.java b/src/main/java/com/techfork/domain/recommendation/service/MmrService.java index 90565d76..3b393d61 100644 --- a/src/main/java/com/techfork/domain/recommendation/service/MmrService.java +++ b/src/main/java/com/techfork/domain/recommendation/service/MmrService.java @@ -75,9 +75,9 @@ public List applyMmr(List candidates) { log.debug("MMR 선택 시작: candidates={}, finalSize={}, lambda={}", candidates.size(), finalSize, lambda); - // 첫 번째는 상위 K개 중에서 랜덤하게 선택 (다양성 증가) - int topK = Math.min(5, remainingCandidates.size()); - int randomIndex = random.nextInt(topK); + // 첫 번째는 상위 K개 중에서 랜덤하게 선택 (다양성 증가, mmrFirstTopK=1이면 결정적) + int topK = Math.min(properties.getMmrFirstTopK(), remainingCandidates.size()); + int randomIndex = topK <= 1 ? 0 : random.nextInt(topK); MmrCandidate first = remainingCandidates.remove(randomIndex); selectedResults.add(MmrResult.builder() .postId(first.getPostId()) @@ -99,9 +99,9 @@ public List applyMmr(List candidates) { // MMR 점수 내림차순 정렬 scoredCandidates.sort((a, b) -> Double.compare(b.mmrScore, a.mmrScore)); - // 상위 K개 중에서 랜덤 선택 - int topKForSelection = Math.min(3, scoredCandidates.size()); - int randomIdx = random.nextInt(topKForSelection); + // 상위 K개 중에서 랜덤 선택 (mmrTopK=1이면 결정적) + int topKForSelection = Math.min(properties.getMmrTopK(), scoredCandidates.size()); + int randomIdx = topKForSelection <= 1 ? 0 : random.nextInt(topKForSelection); ScoredCandidate selected = scoredCandidates.get(randomIdx); remainingCandidates.remove(selected.originalIndex); diff --git a/src/test/java/com/techfork/evaluation/recommendation/KValueComparisonTest.java b/src/test/java/com/techfork/evaluation/recommendation/KValueComparisonTest.java index 0ecb8138..6e504d31 100644 --- a/src/test/java/com/techfork/evaluation/recommendation/KValueComparisonTest.java +++ b/src/test/java/com/techfork/evaluation/recommendation/KValueComparisonTest.java @@ -18,10 +18,12 @@ @Slf4j public class KValueComparisonTest extends RecommendationTestBase { + private static final String REPORT_FILE = "evaluation-report-recommendation-phase2.json"; + @Test - @DisplayName("knnSearchSize와 numCandidates 값 비교 평가") - void compareKValues() { - log.info("===== K 값에 따른 성능 및 품질 비교 ====="); + @DisplayName("knnSearchSize와 numCandidates 값 비교 평가 (1차 후보군, MMR bypass)") + void compareKValues() throws Exception { + log.info("===== K 값에 따른 성능 및 품질 비교 (1차 후보군, MMR bypass) ====="); log.info("Ground-Truth: {} 명 사용자", cachedGroundTruth.size()); List kConfigs = createKConfigs(); @@ -31,6 +33,9 @@ void compareKValues() { printKComparisonHeader(); List results = evaluateAllKConfigs(kConfigs, testUsers); printBestKResult(results); + + // JSON 리포트 저장 + saveKValueReport(REPORT_FILE, "K값 성능 비교 (1차 후보군)", false, toReportEntries(results)); } /** @@ -38,23 +43,20 @@ void compareKValues() { */ private List createKConfigs() { return Arrays.asList( - // 현재 기본값 - KConfig.builder().name("현재 (50/100)") - .knnSearchSize(50).numCandidates(100).build(), + KConfig.builder().name("소형 (30/90)") + .knnSearchSize(30).numCandidates(90).build(), - KConfig.builder().name("중간-하 (60/120)") - .knnSearchSize(60).numCandidates(120).build(), + KConfig.builder().name("중간-하 (40/120)") + .knnSearchSize(40).numCandidates(120).build(), - // 중간 값 - KConfig.builder().name("중간 (70/150)") - .knnSearchSize(70).numCandidates(150).build(), + KConfig.builder().name("현재 (50/150)") + .knnSearchSize(50).numCandidates(150).build(), - KConfig.builder().name("중간-상 (80/180)") - .knnSearchSize(80).numCandidates(180).build(), + KConfig.builder().name("중간 (60/180)") + .knnSearchSize(60).numCandidates(180).build(), - // 이전 값 - KConfig.builder().name("이전 (100/200)") - .knnSearchSize(100).numCandidates(200).build() + KConfig.builder().name("중간-상 (70/210)") + .knnSearchSize(70).numCandidates(210).build() ); } @@ -75,16 +77,16 @@ private List evaluateAllKConfigs(List kConfigs, List tes properties.setMmrFinalSize(30); properties.setLambda(1.0); // 다양성 제외, 관련성만 - // 가중치는 최적값으로 고정 (제목+요약 중심) + // 가중치는 최적값으로 고정 (제목 중심) RecommendationProperties.EmbeddingWeights weights = new RecommendationProperties.EmbeddingWeights(); - weights.setTitle(0.4f); - weights.setSummary(0.4f); + weights.setTitle(0.6f); + weights.setSummary(0.2f); weights.setContent(0.2f); properties.setEmbeddingWeights(weights); - // 평가 수행 - UserMetrics 수집 + // 평가 수행 - MMR bypass, 1차 후보군만 List userMetrics = testUsers.stream() - .map(user -> evaluateUserWithGroundTruth(user, properties)) + .map(user -> evaluateUserCandidatesOnly(user, properties)) .filter(Optional::isPresent) .map(Optional::get) .toList(); @@ -116,6 +118,7 @@ private KMetrics calculateAverageKMetrics(List userMetrics) { double n8 = userMetrics.stream().mapToDouble(UserMetrics::getNdcg8).average().orElse(0.0); double r30 = userMetrics.stream().mapToDouble(UserMetrics::getRecall30).average().orElse(0.0); double n30 = userMetrics.stream().mapToDouble(UserMetrics::getNdcg30).average().orElse(0.0); + double latency = userMetrics.stream().mapToDouble(UserMetrics::getLatencyMs).average().orElse(0.0); return KMetrics.builder() .recallAt4(r4) @@ -124,17 +127,18 @@ private KMetrics calculateAverageKMetrics(List userMetrics) { .ndcgAt8(n8) .recallAt30(r30) .ndcgAt30(n30) + .avgLatencyMs(latency) .build(); } private void printKComparisonHeader() { log.info(""); - log.info("설정 | K값 | Candidates | R@4 | R@8 | R@30 | nDCG@4 | nDCG@8 | nDCG@30 | 실행시간"); - log.info("----------------------------------------------------------------------------------------------"); + log.info("설정 | K값 | Candidates | R@4 | R@8 | R@30 | nDCG@4 | nDCG@8 | nDCG@30 | Latency | 실행시간"); + log.info("-----------------------------------------------------------------------------------------------------------"); } private void printKResult(KResult result) { - log.info(String.format("%-30s | %-9s | %-10s | %.4f | %.4f | %.4f | %.4f | %.4f | %.4f | %dms", + log.info(String.format("%-30s | %-9s | %-10s | %.4f | %.4f | %.4f | %.4f | %.4f | %.4f | %.0fms | %dms", result.name, result.knnSearchSize, result.numCandidates, @@ -144,6 +148,7 @@ private void printKResult(KResult result) { result.metrics.ndcgAt4, result.metrics.ndcgAt8, result.metrics.ndcgAt30, + result.metrics.avgLatencyMs, result.executionTimeMs )); } @@ -219,6 +224,25 @@ private void printBestKResult(List results) { }); } + private List> toReportEntries(List results) { + List> entries = new ArrayList<>(); + for (KResult r : results) { + Map entry = new LinkedHashMap<>(); + entry.put("configName", r.name); + entry.put("knnSearchSize", r.knnSearchSize); + entry.put("numCandidates", r.numCandidates); + entry.put("averageRecall4", Math.round(r.metrics.recallAt4 * 10000.0) / 10000.0); + entry.put("averageRecall8", Math.round(r.metrics.recallAt8 * 10000.0) / 10000.0); + entry.put("averageRecall30", Math.round(r.metrics.recallAt30 * 10000.0) / 10000.0); + entry.put("averageNDCG4", Math.round(r.metrics.ndcgAt4 * 10000.0) / 10000.0); + entry.put("averageNDCG8", Math.round(r.metrics.ndcgAt8 * 10000.0) / 10000.0); + entry.put("averageNDCG30", Math.round(r.metrics.ndcgAt30 * 10000.0) / 10000.0); + entry.put("avgLatencyMs", Math.round(r.metrics.avgLatencyMs * 100.0) / 100.0); + entries.add(entry); + } + return entries; + } + @Getter @Builder private static class KConfig { @@ -236,6 +260,7 @@ private static class KMetrics { private double ndcgAt8; private double recallAt30; private double ndcgAt30; + private double avgLatencyMs; } @Getter diff --git a/src/test/java/com/techfork/evaluation/recommendation/LambdaOptimizationTest.java b/src/test/java/com/techfork/evaluation/recommendation/LambdaOptimizationTest.java index 08ca095b..c236ede7 100644 --- a/src/test/java/com/techfork/evaluation/recommendation/LambdaOptimizationTest.java +++ b/src/test/java/com/techfork/evaluation/recommendation/LambdaOptimizationTest.java @@ -1,86 +1,131 @@ package com.techfork.evaluation.recommendation; +import com.techfork.domain.recommendation.config.RecommendationProperties; import com.techfork.domain.user.entity.User; import lombok.extern.slf4j.Slf4j; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.List; +import java.util.*; /** - * MMR Lambda 파라미터 최적화 테스트 + * MMR Lambda 파라미터 최적화 테스트 (Phase 4) + * + * Phase 1~3에서 결정된 최적값을 고정하고, lambda만 변화시켜 테스트. + * MMR Top-K 샘플링을 비활성화(결정적)하여 재현 가능한 평가. */ @Tag("evaluation") @Slf4j public class LambdaOptimizationTest extends RecommendationTestBase { - @Test - @DisplayName("Lambda 최적화 - Ground-Truth 기반 평가") - void optimizeLambdaWithGroundTruth() { - log.info("===== Lambda 최적화 테스트 (Ground-Truth 기반) ====="); + private static final String REPORT_FILE = "evaluation-report-recommendation-phase4.json"; - if (cachedGroundTruth == null || cachedGroundTruth.isEmpty()) { - log.warn("Ground-Truth 데이터가 없습니다. Fixture 로드를 확인하세요."); - return; - } + // Phase 1 최적 가중치 + private static final float BEST_TITLE_WEIGHT = 0.6f; + private static final float BEST_SUMMARY_WEIGHT = 0.2f; + private static final float BEST_CONTENT_WEIGHT = 0.2f; + + // Phase 2 최적 K값 + private static final int BEST_KNN_SEARCH_SIZE = 50; + private static final int BEST_NUM_CANDIDATES = 150; - log.info("가중치 고정: 제목(0.5) + 요약(0.5)"); - log.info("Lambda 범위: 0.0 ~ 1.0 (0.1 단위)"); + // Phase 3 최적 후보군 크기 + private static final int BEST_MMR_CANDIDATE_SIZE = 60; + private static final int BEST_MMR_FINAL_SIZE = 30; + + @Test + @DisplayName("Lambda 최적화 - Phase 1~3 최적값 고정, 결정적 MMR") + void optimizeLambda() throws Exception { + log.info("===== Lambda 최적화 테스트 (Phase 4) ====="); + log.info("Ground-Truth: {} 명 사용자", cachedGroundTruth.size()); + log.info("고정값: title={}, summary={}, content={}, knnSearchSize={}, numCandidates={}, mmrCandidateSize={}", + BEST_TITLE_WEIGHT, BEST_SUMMARY_WEIGHT, BEST_CONTENT_WEIGHT, + BEST_KNN_SEARCH_SIZE, BEST_NUM_CANDIDATES, BEST_MMR_CANDIDATE_SIZE); - List configs = createLambdaTestConfigs(); + List lambdaValues = List.of(0.80, 0.85, 0.90, 0.93, 0.95, 0.97, 1.0); List testUsers = getTestUsers(); log.info("테스트 사용자: {} 명", testUsers.size()); + log.info("Lambda 범위: {}", lambdaValues); - printLambdaOptimizationHeader(); - List results = configs.stream() - .map(config -> { - EvaluationResult result = evaluateConfigWithGroundTruthAndILD(config, testUsers); - printLambdaOptimizationResult(result); - return result; - }) - .toList(); + printHeader(); + List results = evaluateAll(lambdaValues, testUsers); + printBestResult(results); - printBestLambdaResults(results); + saveRecommendationReport(REPORT_FILE, "Lambda 최적화", true, results); } - private List createLambdaTestConfigs() { - List configs = new ArrayList<>(); - // Lambda 0.0 ~ 1.0 (0.1 단위) - for (int i = 0; i <= 10; i++) { - double lambda = i / 10.0; - configs.add(ConfigCombo.builder() - .name(String.format("T0.5/S0.5 λ=%.1f", lambda)) - .titleWeight(0.5f) - .summaryWeight(0.5f) - .contentWeight(0.0f) - .mmrLambda(lambda) - .build()); + private List evaluateAll(List lambdaValues, List testUsers) { + List results = new ArrayList<>(); + + for (Double lambda : lambdaValues) { + RecommendationProperties props = new RecommendationProperties(); + props.setKnnSearchSize(BEST_KNN_SEARCH_SIZE); + props.setNumCandidates(BEST_NUM_CANDIDATES); + props.setMmrCandidateSize(BEST_MMR_CANDIDATE_SIZE); + props.setMmrFinalSize(BEST_MMR_FINAL_SIZE); + props.setLambda(lambda); + props.setMmrFirstTopK(1); + props.setMmrTopK(1); + + RecommendationProperties.EmbeddingWeights weights = new RecommendationProperties.EmbeddingWeights(); + weights.setTitle(BEST_TITLE_WEIGHT); + weights.setSummary(BEST_SUMMARY_WEIGHT); + weights.setContent(BEST_CONTENT_WEIGHT); + props.setEmbeddingWeights(weights); + + String configName = String.format("λ=%.2f", lambda); + + List metrics = testUsers.stream() + .map(user -> evaluateUserWithGroundTruthAndILD(user, props)) + .filter(Optional::isPresent) + .map(Optional::get) + .toList(); + + EvaluationResult result = calculateAverageMetrics(configName, metrics); + results.add(result); + printResult(result); } - return configs; + + return results; + } + + private void printHeader() { + log.info(""); + log.info(String.format("%-12s | %-8s | %-8s | %-8s | %-8s | %-8s | %-8s | %-8s | %-10s | %-8s", + "설정", "R@4", "R@8", "R@30", "nDCG@4", "nDCG@8", "nDCG@30", "ILD", "Composite", "Latency")); + log.info("-".repeat(115)); } - private void printBestLambdaResults(List results) { - log.info("\n===== Lambda 최적화 결과 요약 (K=8 기준) ====="); + private void printResult(EvaluationResult result) { + log.info(String.format("%-12s | %.4f | %.4f | %.4f | %.4f | %.4f | %.4f | %.4f | %.4f | %.0fms", + result.getConfigName(), + result.getAvgRecall4(), result.getAvgRecall8(), result.getAvgRecall30(), + result.getAvgNdcg4(), result.getAvgNdcg8(), result.getAvgNdcg30(), + result.getAvgIld(), result.getCompositeScore(), result.getAvgLatencyMs())); + } + + private void printBestResult(List results) { + log.info(""); + log.info("===== 최적 Lambda ====="); - // 복합 점수 최고 results.stream() .max(Comparator.comparingDouble(EvaluationResult::getCompositeScore)) - .ifPresent(best -> log.info(String.format("[복합 점수 최고] %s | R@8: %.4f | nDCG@8: %.4f | ILD: %.4f | Score: %.4f", - best.getConfigName(), best.getAvgRecall8(), best.getAvgNdcg8(), best.getAvgIld(), best.getCompositeScore()))); + .ifPresent(best -> log.info(String.format( + "[Composite 최고] %s | R@8: %.4f | nDCG@8: %.4f | ILD: %.4f | Score: %.4f | Latency: %.0fms", + best.getConfigName(), best.getAvgRecall8(), best.getAvgNdcg8(), + best.getAvgIld(), best.getCompositeScore(), best.getAvgLatencyMs()))); - // 다양성(ILD) 최고 results.stream() - .max(Comparator.comparingDouble(EvaluationResult::getAvgIld)) - .ifPresent(best -> log.info(String.format("[다양성(ILD) 최고] %s | ILD: %.4f", - best.getConfigName(), best.getAvgIld()))); + .max(Comparator.comparingDouble(r -> (r.getAvgRecall8() + r.getAvgNdcg8()) / 2.0)) + .ifPresent(best -> log.info(String.format( + "[정확성 최고 (R@8+nDCG@8)] %s | R@8: %.4f | nDCG@8: %.4f | ILD: %.4f", + best.getConfigName(), best.getAvgRecall8(), best.getAvgNdcg8(), best.getAvgIld()))); - // Recall@8 최고 results.stream() - .max(Comparator.comparingDouble(EvaluationResult::getAvgRecall8)) - .ifPresent(best -> log.info(String.format("[Recall@8 최고] %s | R@8: %.4f", - best.getConfigName(), best.getAvgRecall8()))); + .max(Comparator.comparingDouble(EvaluationResult::getAvgIld)) + .ifPresent(best -> log.info(String.format( + "[다양성 최고 (ILD)] %s | ILD: %.4f | R@8: %.4f | nDCG@8: %.4f", + best.getConfigName(), best.getAvgIld(), best.getAvgRecall8(), best.getAvgNdcg8()))); } -} +} \ No newline at end of file diff --git a/src/test/java/com/techfork/evaluation/recommendation/MmrCandidateSizeComparisonTest.java b/src/test/java/com/techfork/evaluation/recommendation/MmrCandidateSizeComparisonTest.java new file mode 100644 index 00000000..c1565a41 --- /dev/null +++ b/src/test/java/com/techfork/evaluation/recommendation/MmrCandidateSizeComparisonTest.java @@ -0,0 +1,145 @@ +package com.techfork.evaluation.recommendation; + +import com.techfork.domain.recommendation.config.RecommendationProperties; +import com.techfork.domain.user.entity.User; +import lombok.Builder; +import lombok.Getter; +import lombok.extern.slf4j.Slf4j; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; + +import java.util.*; + +/** + * MMR 후보군 크기(mmrCandidateSize)에 따른 성능 비교 테스트 (Phase 3) + * + * MMR은 O(n²)이므로 후보군이 클수록 다양성은 높아지지만 성능이 저하됨. + * 정확성(Recall, nDCG)과 다양성(ILD)의 균형을 찾기 위한 테스트. + */ +@Tag("evaluation") +@Slf4j +public class MmrCandidateSizeComparisonTest extends RecommendationTestBase { + + private static final String REPORT_FILE = "evaluation-report-recommendation-phase3.json"; + + // Phase 1에서 결정된 최적 가중치 고정 + private static final float BEST_TITLE_WEIGHT = 0.6f; + private static final float BEST_SUMMARY_WEIGHT = 0.2f; + private static final float BEST_CONTENT_WEIGHT = 0.2f; + + // Phase 2에서 결정된 최적 K값 고정 + private static final int BEST_KNN_SEARCH_SIZE = 50; + private static final int BEST_NUM_CANDIDATES = 150; + + private static final double FIXED_LAMBDA = 0.95; + + @Test + @DisplayName("MMR 후보군 크기별 성능 비교 (MMR 포함, ILD 측정)") + void compareMmrCandidateSizes() throws Exception { + log.info("===== MMR 후보군 크기별 성능 비교 ====="); + log.info("Ground-Truth: {} 명 사용자", cachedGroundTruth.size()); + log.info("고정값: title={}, summary={}, content={}, knnSearchSize={}, numCandidates={}, lambda={}", + BEST_TITLE_WEIGHT, BEST_SUMMARY_WEIGHT, BEST_CONTENT_WEIGHT, + BEST_KNN_SEARCH_SIZE, BEST_NUM_CANDIDATES, FIXED_LAMBDA); + + List configs = createConfigs(); + List testUsers = getTestUsers(); + log.info("테스트 사용자: {} 명", testUsers.size()); + + printHeader(); + List results = evaluateAll(configs, testUsers); + printBestResult(results); + + saveRecommendationReport(REPORT_FILE, "MMR 후보군 크기 비교", true, results); + } + + private List createConfigs() { + return Arrays.asList( + MmrCandidateConfig.builder().name("후보 40개").mmrCandidateSize(40).mmrFinalSize(30).build(), + MmrCandidateConfig.builder().name("후보 60개").mmrCandidateSize(60).mmrFinalSize(30).build(), + MmrCandidateConfig.builder().name("현재 (80개)").mmrCandidateSize(80).mmrFinalSize(30).build(), + MmrCandidateConfig.builder().name("후보 100개").mmrCandidateSize(100).mmrFinalSize(30).build() + ); + } + + private List evaluateAll(List configs, List testUsers) { + List results = new ArrayList<>(); + + for (MmrCandidateConfig config : configs) { + RecommendationProperties props = new RecommendationProperties(); + props.setKnnSearchSize(BEST_KNN_SEARCH_SIZE); + props.setNumCandidates(BEST_NUM_CANDIDATES); + props.setMmrCandidateSize(config.mmrCandidateSize); + props.setMmrFinalSize(config.mmrFinalSize); + props.setLambda(FIXED_LAMBDA); + props.setMmrFirstTopK(1); + props.setMmrTopK(1); + + RecommendationProperties.EmbeddingWeights weights = new RecommendationProperties.EmbeddingWeights(); + weights.setTitle(BEST_TITLE_WEIGHT); + weights.setSummary(BEST_SUMMARY_WEIGHT); + weights.setContent(BEST_CONTENT_WEIGHT); + props.setEmbeddingWeights(weights); + + List metrics = testUsers.stream() + .map(user -> evaluateUserWithGroundTruthAndILD(user, props)) + .filter(Optional::isPresent) + .map(Optional::get) + .toList(); + + EvaluationResult result = calculateAverageMetrics(config.name, metrics); + results.add(result); + printResult(result); + } + + return results; + } + + private void printHeader() { + log.info(""); + log.info(String.format("%-20s | %-8s | %-8s | %-8s | %-8s | %-8s | %-8s | %-8s | %-10s | %-8s", + "설정", "R@4", "R@8", "R@30", "nDCG@4", "nDCG@8", "nDCG@30", "ILD", "Composite", "Latency")); + log.info("-".repeat(120)); + } + + private void printResult(EvaluationResult result) { + log.info(String.format("%-20s | %.4f | %.4f | %.4f | %.4f | %.4f | %.4f | %.4f | %.4f | %.0fms", + result.getConfigName(), + result.getAvgRecall4(), result.getAvgRecall8(), result.getAvgRecall30(), + result.getAvgNdcg4(), result.getAvgNdcg8(), result.getAvgNdcg30(), + result.getAvgIld(), result.getCompositeScore(), result.getAvgLatencyMs())); + } + + private void printBestResult(List results) { + log.info(""); + log.info("===== 최적 MMR 후보군 크기 ====="); + + results.stream() + .max(Comparator.comparingDouble(EvaluationResult::getCompositeScore)) + .ifPresent(best -> log.info(String.format( + "[Composite 최고] %s | R@8: %.4f | nDCG@8: %.4f | ILD: %.4f | Score: %.4f | Latency: %.0fms", + best.getConfigName(), best.getAvgRecall8(), best.getAvgNdcg8(), + best.getAvgIld(), best.getCompositeScore(), best.getAvgLatencyMs()))); + + results.stream() + .max(Comparator.comparingDouble(r -> (r.getAvgRecall8() + r.getAvgNdcg8()) / 2.0)) + .ifPresent(best -> log.info(String.format( + "[정확성 최고 (R@8+nDCG@8)] %s | R@8: %.4f | nDCG@8: %.4f | ILD: %.4f", + best.getConfigName(), best.getAvgRecall8(), best.getAvgNdcg8(), best.getAvgIld()))); + + results.stream() + .max(Comparator.comparingDouble(EvaluationResult::getAvgIld)) + .ifPresent(best -> log.info(String.format( + "[다양성 최고 (ILD)] %s | ILD: %.4f | R@8: %.4f | nDCG@8: %.4f", + best.getConfigName(), best.getAvgIld(), best.getAvgRecall8(), best.getAvgNdcg8()))); + } + + @Getter + @Builder + private static class MmrCandidateConfig { + private String name; + private int mmrCandidateSize; + private int mmrFinalSize; + } +} diff --git a/src/test/java/com/techfork/evaluation/recommendation/RecommendationConfigComparisonTest.java b/src/test/java/com/techfork/evaluation/recommendation/RecommendationConfigComparisonTest.java index 9ef1253f..f6e3d38c 100644 --- a/src/test/java/com/techfork/evaluation/recommendation/RecommendationConfigComparisonTest.java +++ b/src/test/java/com/techfork/evaluation/recommendation/RecommendationConfigComparisonTest.java @@ -18,10 +18,12 @@ @Slf4j public class RecommendationConfigComparisonTest extends RecommendationTestBase { + private static final String REPORT_FILE = "evaluation-report-recommendation-phase1.json"; + @Test - @DisplayName("여러 설정 비교 평가 (Ground-Truth 기반)") - void compareConfigurationsWithGroundTruth() { - log.info("===== 설정별 성능 비교 (Ground-Truth 기반) ====="); + @DisplayName("여러 설정 비교 평가 (1차 후보군, MMR bypass)") + void compareConfigurationsWithGroundTruth() throws Exception { + log.info("===== 설정별 성능 비교 (1차 후보군, MMR bypass) ====="); log.info("Ground-Truth: {} 명 사용자", cachedGroundTruth.size()); List configs = createTestConfigs(); @@ -29,8 +31,11 @@ void compareConfigurationsWithGroundTruth() { log.info("테스트 사용자: {} 명", testUsers.size()); printWeightComparisonHeader(); - List results = evaluateAllConfigsWithGroundTruth(configs, testUsers); + List results = evaluateAllConfigsCandidatesOnly(configs, testUsers); printBestWeightResult(results); + + // JSON 리포트 저장 + saveRecommendationReport(REPORT_FILE, "설정별 성능 비교 (1차 후보군)", false, results); } /** @@ -64,15 +69,15 @@ private List createTestConfigs() { } /** - * 모든 설정 평가 (Ground-Truth 기반 - ILD 제외) + * 모든 설정 평가 (1차 후보군만 - MMR bypass) */ - private List evaluateAllConfigsWithGroundTruth( + private List evaluateAllConfigsCandidatesOnly( List configs, List testUsers) { List results = new ArrayList<>(); for (ConfigCombo config : configs) { - EvaluationResult result = evaluateConfigWithGroundTruth(config, testUsers); + EvaluationResult result = evaluateConfigCandidatesOnly(config, testUsers); results.add(result); printWeightComparisonResult(result); } diff --git a/src/test/java/com/techfork/evaluation/recommendation/RecommendationEvaluationService.java b/src/test/java/com/techfork/evaluation/recommendation/RecommendationEvaluationService.java index a0b7426f..11506b59 100644 --- a/src/test/java/com/techfork/evaluation/recommendation/RecommendationEvaluationService.java +++ b/src/test/java/com/techfork/evaluation/recommendation/RecommendationEvaluationService.java @@ -7,7 +7,6 @@ import co.elastic.clients.elasticsearch._types.KnnSearch; import com.techfork.domain.activity.repository.ReadPostRepository; import com.techfork.domain.post.document.PostDocument; -import com.techfork.domain.post.entity.Post; import com.techfork.domain.post.repository.PostRepository; import com.techfork.domain.recommendation.config.RecommendationProperties; import com.techfork.domain.recommendation.entity.RecommendedPost; @@ -17,6 +16,7 @@ import com.techfork.domain.recommendation.service.MmrService; import com.techfork.domain.recommendation.service.MmrService.MmrCandidate; import com.techfork.domain.recommendation.service.MmrService.MmrResult; +import com.techfork.global.util.RrfScorer; import com.techfork.domain.user.document.UserProfileDocument; import com.techfork.domain.user.entity.User; import com.techfork.domain.user.repository.UserProfileDocumentRepository; @@ -113,6 +113,73 @@ public List generateRecommendationsForEvaluation(User user, Set trai } } + /** + * 1차 후보군만 반환 (MMR bypass) - RRF 결과를 similarityScore 내림차순으로 반환 + */ + public List generateCandidatesOnly(User user, Set trainPostIds, RecommendationProperties properties) { + Optional profileOpt = userProfileDocumentRepository.findByUserId(user.getId()); + if (profileOpt.isEmpty() || profileOpt.get().getProfileVector() == null) { + return Collections.emptyList(); + } + + UserProfileDocument profile = profileOpt.get(); + float[] userProfileVector = profile.getProfileVector(); + List keyKeywords = profile.getKeyKeywords(); + + try { + List candidates = searchCandidatesWithCustomReadHistory( + userProfileVector, keyKeywords, trainPostIds, properties); + + if (candidates.isEmpty()) { + return Collections.emptyList(); + } + + // MMR 없이 similarityScore 내림차순으로 반환 + return candidates.stream() + .sorted(Comparator.comparingDouble(MmrCandidate::getSimilarityScore).reversed()) + .map(MmrCandidate::getPostId) + .toList(); + + } catch (Exception e) { + log.error("사용자 {} 1차 후보군 생성 실패", user.getId(), e); + return Collections.emptyList(); + } + } + + /** + * 부모의 applyRrf를 오버라이드하여 mmrCandidateSize limit을 제거. + * 테스트에서 다양한 mmrCandidateSize를 시도할 수 있도록 호출 측에서 limit 적용. + */ + @Override + protected List applyRrf(List> vectorHits, List> keywordHits) { + List vectorPostIds = vectorHits.stream() + .filter(hit -> hit.source() != null) + .map(hit -> hit.source().getPostId()) + .toList(); + + List keywordPostIds = keywordHits.stream() + .filter(hit -> hit.source() != null) + .map(hit -> hit.source().getPostId()) + .toList(); + + Map rrfScores = RrfScorer.calculateRrfScores(vectorPostIds, keywordPostIds); + + Map> hitMap = new HashMap<>(); + vectorHits.stream() + .filter(hit -> hit.source() != null) + .forEach(hit -> hitMap.putIfAbsent(hit.source().getPostId(), hit)); + keywordHits.stream() + .filter(hit -> hit.source() != null) + .forEach(hit -> hitMap.putIfAbsent(hit.source().getPostId(), hit)); + + // limit 없이 전체 후보 반환 + return rrfScores.entrySet().stream() + .sorted(Map.Entry.comparingByValue().reversed()) + .map(entry -> mapToMmrCandidate(hitMap.get(entry.getKey()), entry.getValue())) + .filter(candidate -> candidate.getSummaryVector() != null) + .toList(); + } + private List searchCandidatesWithCustomReadHistory( float[] userProfileVector, List keyKeywords, @@ -194,9 +261,15 @@ private List searchCandidatesWithCustomReadHistory( // 5. RRF로 결합 (부모 클래스의 protected 메서드 사용) long rrfStartTime = System.currentTimeMillis(); - List candidates = applyRrf(vectorHits, keywordHits); + List allCandidates = applyRrf(vectorHits, keywordHits); long rrfElapsedTime = System.currentTimeMillis() - rrfStartTime; - log.info("[EVAL] RRF 결합 실행 시간: {}ms (결과: {}개)", rrfElapsedTime, candidates.size()); + + // 테스트 properties의 mmrCandidateSize로 limit (부모 applyRrf는 기본 properties를 사용하므로) + List candidates = allCandidates.stream() + .limit(properties.getMmrCandidateSize()) + .toList(); + log.info("[EVAL] RRF 결합 실행 시간: {}ms (전체: {}개, mmrCandidateSize: {}개)", + rrfElapsedTime, allCandidates.size(), candidates.size()); return candidates; } diff --git a/src/test/java/com/techfork/evaluation/recommendation/RecommendationTestBase.java b/src/test/java/com/techfork/evaluation/recommendation/RecommendationTestBase.java index a9dcf54b..af42acf4 100644 --- a/src/test/java/com/techfork/evaluation/recommendation/RecommendationTestBase.java +++ b/src/test/java/com/techfork/evaluation/recommendation/RecommendationTestBase.java @@ -1,12 +1,17 @@ package com.techfork.evaluation.recommendation; +import co.elastic.clients.elasticsearch.ElasticsearchClient; import com.techfork.domain.activity.repository.ReadPostRepository; import com.techfork.domain.post.repository.PostDocumentRepository; import com.techfork.domain.recommendation.config.RecommendationProperties; import com.techfork.evaluation.recommendation.util.EvaluationFixtureLoader; import com.techfork.domain.user.entity.User; import com.techfork.global.common.IntegrationTestBase; +import com.techfork.global.config.ElasticsearchCacheManager; import com.techfork.global.util.VectorUtil; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; +import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; @@ -15,6 +20,10 @@ import org.junit.jupiter.api.TestInstance; import org.springframework.beans.factory.annotation.Autowired; +import java.io.File; +import java.io.IOException; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; import java.util.*; /** @@ -39,10 +48,12 @@ public abstract class RecommendationTestBase extends IntegrationTestBase { @Autowired protected EvaluationFixtureLoader fixtureLoader; @Autowired protected RecommendationQualityService qualityService; - @Autowired protected RecommendationEvaluationService evaluationService; // 새로운 서비스 + @Autowired protected RecommendationEvaluationService evaluationService; @Autowired protected PostDocumentRepository postDocumentRepository; @Autowired protected ReadPostRepository readPostRepository; @Autowired protected com.techfork.domain.user.repository.UserRepository userRepository; + @Autowired protected ElasticsearchClient elasticsearchClient; + @Autowired protected ElasticsearchCacheManager elasticsearchCacheManager; protected static List cachedTestUsers; protected static Map> cachedGroundTruth; @@ -55,6 +66,28 @@ void loadFixtures() { cachedGroundTruth = fixtureLoader.loadAll(); fixturesLoaded = true; log.info("===== Fixture 데이터 로드 완료: {} 명 =====", cachedGroundTruth.size()); + + // ES 세그먼트 병합 + HNSW 웜업 (프로덕션과 동일한 조건) + forceMergeIndices(); + // 웜업을 여러 번 실행하여 HNSW 그래프 + OS 캐시를 확실히 로드 + for (int i = 0; i < 3; i++) { + elasticsearchCacheManager.keepAliveWarmup(); + } + log.info("===== ES forcemerge + warmup 완료 ====="); + } + } + + private void forceMergeIndices() { + try { + long start = System.currentTimeMillis(); + elasticsearchClient.indices().forcemerge(f -> f + .index("posts", "user_profiles") + .maxNumSegments(1L) + ); + log.info("[TEST] forcemerge 완료 (posts, user_profiles → 1 segment). Time={}ms", + System.currentTimeMillis() - start); + } catch (Exception e) { + log.warn("[TEST] forcemerge 실패: {}", e.getMessage()); } } @@ -79,6 +112,7 @@ protected static class EvaluationResult { double avgRecall30; double avgNdcg30; double avgIld; double compositeScore; + double avgLatencyMs; } @Data @@ -88,6 +122,7 @@ protected static class UserMetrics { double recall8; double ndcg8; double recall30; double ndcg30; double ild; + double latencyMs; } protected List getTestUsers() { @@ -119,6 +154,7 @@ protected EvaluationResult calculateAverageMetrics(String configName, List evaluateUserWithGroundTruth(User user, Recommend double r30 = qualityService.calculateRecall(recIds, groundTruth.keySet(), K_DEEP_EXPLORE); double n30 = qualityService.calculateNDCG(recIds, groundTruth, K_DEEP_EXPLORE); - return Optional.of(new UserMetrics(r4, n4, r8, n8, r30, n30, 0.0)); + return Optional.of(new UserMetrics(r4, n4, r8, n8, r30, n30, 0.0, 0.0)); } catch (Exception e) { return Optional.empty(); } @@ -164,8 +201,9 @@ protected Optional evaluateUserWithGroundTruthAndILD(User user, Rec Set readIds = readPostRepository.findRecentReadPostsByUserIdWithMinDuration(user.getId(), org.springframework.data.domain.PageRequest.of(0, 10000)) .stream().map(rp -> rp.getPost().getId()).collect(java.util.stream.Collectors.toSet()); - // 새로운 서비스 사용 + long start = System.currentTimeMillis(); List recIds = evaluationService.generateRecommendationsForEvaluation(user, readIds, props); + double latencyMs = System.currentTimeMillis() - start; if (recIds.isEmpty()) return Optional.empty(); double r4 = qualityService.calculateRecall(recIds, groundTruth.keySet(), K_FIRST_ROW); @@ -180,7 +218,7 @@ protected Optional evaluateUserWithGroundTruthAndILD(User user, Rec .filter(Objects::nonNull).toList(); double ild = qualityService.calculateILD(vectors); - return Optional.of(new UserMetrics(r4, n4, r8, n8, r30, n30, ild)); + return Optional.of(new UserMetrics(r4, n4, r8, n8, r30, n30, ild, latencyMs)); } catch (Exception e) { log.warn("사용자 {} 평가 중 오류: {}", user.getId(), e.getMessage()); return Optional.empty(); @@ -233,4 +271,137 @@ protected void printLambdaOptimizationResult(EvaluationResult result) { log.info(String.format("%-25s | %.4f | %.4f | %.4f | %.4f", result.getConfigName(), result.getAvgRecall8(), result.getAvgNdcg8(), result.getAvgIld(), result.getCompositeScore())); } + + // ----------------------------------------------------------------------- + // MMR bypass (1차 후보군만 평가) + // ----------------------------------------------------------------------- + + protected Optional evaluateUserCandidatesOnly(User user, RecommendationProperties props) { + try { + Map groundTruth = cachedGroundTruth.get(user.getId()); + if (groundTruth == null || groundTruth.isEmpty()) return Optional.empty(); + + Set readIds = readPostRepository.findRecentReadPostsByUserIdWithMinDuration(user.getId(), org.springframework.data.domain.PageRequest.of(0, 10000)) + .stream().map(rp -> rp.getPost().getId()).collect(java.util.stream.Collectors.toSet()); + + long start = System.currentTimeMillis(); + List recIds = evaluationService.generateCandidatesOnly(user, readIds, props); + double latencyMs = System.currentTimeMillis() - start; + if (recIds.isEmpty()) return Optional.empty(); + + double r4 = qualityService.calculateRecall(recIds, groundTruth.keySet(), K_FIRST_ROW); + double n4 = qualityService.calculateNDCG(recIds, groundTruth, K_FIRST_ROW); + double r8 = qualityService.calculateRecall(recIds, groundTruth.keySet(), K_FIRST_SCREEN); + double n8 = qualityService.calculateNDCG(recIds, groundTruth, K_FIRST_SCREEN); + double r30 = qualityService.calculateRecall(recIds, groundTruth.keySet(), K_DEEP_EXPLORE); + double n30 = qualityService.calculateNDCG(recIds, groundTruth, K_DEEP_EXPLORE); + + return Optional.of(new UserMetrics(r4, n4, r8, n8, r30, n30, 0.0, latencyMs)); + } catch (Exception e) { + log.warn("사용자 {} candidates-only 평가 중 오류: {}", user.getId(), e.getMessage()); + return Optional.empty(); + } + } + + protected EvaluationResult evaluateConfigCandidatesOnly(ConfigCombo config, List testUsers) { + RecommendationProperties props = createProperties(config.getTitleWeight(), config.getSummaryWeight(), config.getContentWeight(), config.getMmrLambda()); + + List metrics = testUsers.stream() + .map(user -> evaluateUserCandidatesOnly(user, props)) + .filter(Optional::isPresent) + .map(Optional::get) + .toList(); + + return calculateAverageMetrics(config.getName(), metrics); + } + + // ----------------------------------------------------------------------- + // JSON 리포트 저장 + // ----------------------------------------------------------------------- + + protected void saveRecommendationReport(String fileName, String testName, boolean mmrApplied, + List results) throws IOException { + List> configList = new ArrayList<>(); + for (EvaluationResult r : results) { + Map entry = new LinkedHashMap<>(); + entry.put("configName", r.getConfigName()); + entry.put("averageRecall4", round4(r.getAvgRecall4())); + entry.put("averageRecall8", round4(r.getAvgRecall8())); + entry.put("averageRecall30", round4(r.getAvgRecall30())); + entry.put("averageNDCG4", round4(r.getAvgNdcg4())); + entry.put("averageNDCG8", round4(r.getAvgNdcg8())); + entry.put("averageNDCG30", round4(r.getAvgNdcg30())); + if (mmrApplied) { + entry.put("averageILD", round4(r.getAvgIld())); + entry.put("compositeScore", round4(r.getCompositeScore())); + } + entry.put("avgLatencyMs", Math.round(r.getAvgLatencyMs() * 100.0) / 100.0); + configList.add(entry); + } + + // Best 설정 계산 + String bestByRecall8 = results.stream() + .max(Comparator.comparingDouble(EvaluationResult::getAvgRecall8)) + .map(EvaluationResult::getConfigName).orElse(""); + String bestByNdcg8 = results.stream() + .max(Comparator.comparingDouble(EvaluationResult::getAvgNdcg8)) + .map(EvaluationResult::getConfigName).orElse(""); + String bestByBalanced = results.stream() + .max(Comparator.comparingDouble(r -> (r.getAvgRecall8() + r.getAvgNdcg8()) / 2.0)) + .map(EvaluationResult::getConfigName).orElse(""); + + Map report = new LinkedHashMap<>(); + report.put("evaluatedAt", LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME)); + report.put("groundTruthUsers", cachedGroundTruth.size()); + report.put("testName", testName); + report.put("mmrApplied", mmrApplied); + report.put("configs", configList); + report.put("bestByRecall8", bestByRecall8); + report.put("bestByNDCG8", bestByNdcg8); + report.put("bestByBalanced", bestByBalanced); + + ObjectMapper writer = new ObjectMapper() + .registerModule(new JavaTimeModule()) + .enable(SerializationFeature.INDENT_OUTPUT); + + File outputFile = new File("src/test/resources/" + fileName); + writer.writeValue(outputFile, report); + log.info("리포트 저장 완료: {}", outputFile.getAbsolutePath()); + } + + protected void saveKValueReport(String fileName, String testName, boolean mmrApplied, + List> kResults) throws IOException { + // Best 설정 계산 + String bestByRecall8 = kResults.stream() + .max(Comparator.comparingDouble(r -> (double) r.get("averageRecall8"))) + .map(r -> (String) r.get("configName")).orElse(""); + String bestByNdcg8 = kResults.stream() + .max(Comparator.comparingDouble(r -> (double) r.get("averageNDCG8"))) + .map(r -> (String) r.get("configName")).orElse(""); + String bestByBalanced = kResults.stream() + .max(Comparator.comparingDouble(r -> ((double) r.get("averageRecall8") + (double) r.get("averageNDCG8")) / 2.0)) + .map(r -> (String) r.get("configName")).orElse(""); + + Map report = new LinkedHashMap<>(); + report.put("evaluatedAt", LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME)); + report.put("groundTruthUsers", cachedGroundTruth.size()); + report.put("testName", testName); + report.put("mmrApplied", mmrApplied); + report.put("configs", kResults); + report.put("bestByRecall8", bestByRecall8); + report.put("bestByNDCG8", bestByNdcg8); + report.put("bestByBalanced", bestByBalanced); + + ObjectMapper writer = new ObjectMapper() + .registerModule(new JavaTimeModule()) + .enable(SerializationFeature.INDENT_OUTPUT); + + File outputFile = new File("src/test/resources/" + fileName); + writer.writeValue(outputFile, report); + log.info("리포트 저장 완료: {}", outputFile.getAbsolutePath()); + } + + private static double round4(double v) { + return Math.round(v * 10000.0) / 10000.0; + } } diff --git a/src/test/resources/evaluation-report-recommendation-phase1.json b/src/test/resources/evaluation-report-recommendation-phase1.json new file mode 100644 index 00000000..0b3fda82 --- /dev/null +++ b/src/test/resources/evaluation-report-recommendation-phase1.json @@ -0,0 +1,66 @@ +{ + "evaluatedAt" : "2026-03-08T17:19:29.2938803", + "groundTruthUsers" : 15, + "testName" : "설정별 성능 비교 (1차 후보군)", + "mmrApplied" : false, + "configs" : [ { + "configName" : "균등 가중치", + "averageRecall4" : 0.0422, + "averageRecall8" : 0.0911, + "averageRecall30" : 0.2356, + "averageNDCG4" : 0.2628, + "averageNDCG8" : 0.2777, + "averageNDCG30" : 0.2665 + }, { + "configName" : "제목 중심", + "averageRecall4" : 0.0444, + "averageRecall8" : 0.1, + "averageRecall30" : 0.2444, + "averageNDCG4" : 0.2619, + "averageNDCG8" : 0.2934, + "averageNDCG30" : 0.2783 + }, { + "configName" : "요약 중심", + "averageRecall4" : 0.04, + "averageRecall8" : 0.0689, + "averageRecall30" : 0.1956, + "averageNDCG4" : 0.256, + "averageNDCG8" : 0.2319, + "averageNDCG30" : 0.2257 + }, { + "configName" : "컨텐츠 중심", + "averageRecall4" : 0.0444, + "averageRecall8" : 0.0822, + "averageRecall30" : 0.2156, + "averageNDCG4" : 0.2744, + "averageNDCG8" : 0.2685, + "averageNDCG30" : 0.2537 + }, { + "configName" : "현재 기본값", + "averageRecall4" : 0.0467, + "averageRecall8" : 0.0911, + "averageRecall30" : 0.2356, + "averageNDCG4" : 0.2883, + "averageNDCG8" : 0.2892, + "averageNDCG30" : 0.2725 + }, { + "configName" : "제목+요약 중심", + "averageRecall4" : 0.0467, + "averageRecall8" : 0.0911, + "averageRecall30" : 0.2356, + "averageNDCG4" : 0.2883, + "averageNDCG8" : 0.2888, + "averageNDCG30" : 0.2725 + }, { + "configName" : "제목+요약만 (컨텐츠 0)", + "averageRecall4" : 0.0467, + "averageRecall8" : 0.0867, + "averageRecall30" : 0.2333, + "averageNDCG4" : 0.2906, + "averageNDCG8" : 0.281, + "averageNDCG30" : 0.27 + } ], + "bestByRecall8" : "제목 중심", + "bestByNDCG8" : "제목 중심", + "bestByBalanced" : "제목 중심" +} \ No newline at end of file diff --git a/src/test/resources/evaluation-report-recommendation-phase2.json b/src/test/resources/evaluation-report-recommendation-phase2.json new file mode 100644 index 00000000..a5918fef --- /dev/null +++ b/src/test/resources/evaluation-report-recommendation-phase2.json @@ -0,0 +1,65 @@ +{ + "evaluatedAt" : "2026-03-08T20:11:18.7722936", + "groundTruthUsers" : 15, + "testName" : "K값 성능 비교 (1차 후보군)", + "mmrApplied" : false, + "configs" : [ { + "configName" : "소형 (30/90)", + "knnSearchSize" : 30, + "numCandidates" : 90, + "averageRecall4" : 0.0356, + "averageRecall8" : 0.08, + "averageRecall30" : 0.2178, + "averageNDCG4" : 0.2065, + "averageNDCG8" : 0.2318, + "averageNDCG30" : 0.2338, + "avgLatencyMs" : 476.87 + }, { + "configName" : "중간-하 (40/120)", + "knnSearchSize" : 40, + "numCandidates" : 120, + "averageRecall4" : 0.04, + "averageRecall8" : 0.0867, + "averageRecall30" : 0.2244, + "averageNDCG4" : 0.2493, + "averageNDCG8" : 0.2618, + "averageNDCG30" : 0.2535, + "avgLatencyMs" : 549.73 + }, { + "configName" : "현재 (50/150)", + "knnSearchSize" : 50, + "numCandidates" : 150, + "averageRecall4" : 0.04, + "averageRecall8" : 0.0867, + "averageRecall30" : 0.2244, + "averageNDCG4" : 0.2542, + "averageNDCG8" : 0.2655, + "averageNDCG30" : 0.258, + "avgLatencyMs" : 618.47 + }, { + "configName" : "중간 (60/180)", + "knnSearchSize" : 60, + "numCandidates" : 180, + "averageRecall4" : 0.0422, + "averageRecall8" : 0.0844, + "averageRecall30" : 0.22, + "averageNDCG4" : 0.2625, + "averageNDCG8" : 0.2634, + "averageNDCG30" : 0.2568, + "avgLatencyMs" : 767.2 + }, { + "configName" : "중간-상 (70/210)", + "knnSearchSize" : 70, + "numCandidates" : 210, + "averageRecall4" : 0.0422, + "averageRecall8" : 0.0978, + "averageRecall30" : 0.2356, + "averageNDCG4" : 0.2536, + "averageNDCG8" : 0.2825, + "averageNDCG30" : 0.2697, + "avgLatencyMs" : 928.87 + } ], + "bestByRecall8" : "중간-상 (70/210)", + "bestByNDCG8" : "중간-상 (70/210)", + "bestByBalanced" : "중간-상 (70/210)" +} \ No newline at end of file diff --git a/src/test/resources/evaluation-report-recommendation-phase3.json b/src/test/resources/evaluation-report-recommendation-phase3.json new file mode 100644 index 00000000..923e81fb --- /dev/null +++ b/src/test/resources/evaluation-report-recommendation-phase3.json @@ -0,0 +1,54 @@ +{ + "evaluatedAt" : "2026-03-08T20:43:54.2551289", + "groundTruthUsers" : 15, + "testName" : "MMR 후보군 크기 비교", + "mmrApplied" : true, + "configs" : [ { + "configName" : "후보 40개", + "averageRecall4" : 0.04, + "averageRecall8" : 0.0889, + "averageRecall30" : 0.2089, + "averageNDCG4" : 0.229, + "averageNDCG8" : 0.2721, + "averageNDCG30" : 0.2385, + "averageILD" : 0.6376, + "compositeScore" : 0.2719, + "avgLatencyMs" : 866.27 + }, { + "configName" : "후보 60개", + "averageRecall4" : 0.0378, + "averageRecall8" : 0.0933, + "averageRecall30" : 0.2067, + "averageNDCG4" : 0.2144, + "averageNDCG8" : 0.279, + "averageNDCG30" : 0.2358, + "averageILD" : 0.653, + "compositeScore" : 0.2795, + "avgLatencyMs" : 853.8 + }, { + "configName" : "현재 (80개)", + "averageRecall4" : 0.0378, + "averageRecall8" : 0.0889, + "averageRecall30" : 0.2044, + "averageNDCG4" : 0.2189, + "averageNDCG8" : 0.2692, + "averageNDCG30" : 0.2332, + "averageILD" : 0.6633, + "compositeScore" : 0.2759, + "avgLatencyMs" : 932.07 + }, { + "configName" : "후보 100개", + "averageRecall4" : 0.0378, + "averageRecall8" : 0.0889, + "averageRecall30" : 0.2044, + "averageNDCG4" : 0.2189, + "averageNDCG8" : 0.2692, + "averageNDCG30" : 0.2324, + "averageILD" : 0.6657, + "compositeScore" : 0.2764, + "avgLatencyMs" : 944.6 + } ], + "bestByRecall8" : "후보 60개", + "bestByNDCG8" : "후보 60개", + "bestByBalanced" : "후보 60개" +} \ No newline at end of file diff --git a/src/test/resources/evaluation-report-recommendation-phase4.json b/src/test/resources/evaluation-report-recommendation-phase4.json new file mode 100644 index 00000000..42ef68b6 --- /dev/null +++ b/src/test/resources/evaluation-report-recommendation-phase4.json @@ -0,0 +1,87 @@ +{ + "evaluatedAt" : "2026-03-08T20:53:33.0543158", + "groundTruthUsers" : 15, + "testName" : "Lambda 최적화", + "mmrApplied" : true, + "configs" : [ { + "configName" : "λ=0.80", + "averageRecall4" : 0.0267, + "averageRecall8" : 0.0533, + "averageRecall30" : 0.2044, + "averageNDCG4" : 0.1664, + "averageNDCG8" : 0.1714, + "averageNDCG30" : 0.2087, + "averageILD" : 0.6936, + "compositeScore" : 0.2286, + "avgLatencyMs" : 972.0 + }, { + "configName" : "λ=0.85", + "averageRecall4" : 0.0311, + "averageRecall8" : 0.0622, + "averageRecall30" : 0.2, + "averageNDCG4" : 0.1779, + "averageNDCG8" : 0.1887, + "averageNDCG30" : 0.2087, + "averageILD" : 0.6897, + "compositeScore" : 0.2383, + "avgLatencyMs" : 848.87 + }, { + "configName" : "λ=0.90", + "averageRecall4" : 0.0378, + "averageRecall8" : 0.0711, + "averageRecall30" : 0.2044, + "averageNDCG4" : 0.2148, + "averageNDCG8" : 0.2212, + "averageNDCG30" : 0.2209, + "averageILD" : 0.6822, + "compositeScore" : 0.2534, + "avgLatencyMs" : 836.93 + }, { + "configName" : "λ=0.93", + "averageRecall4" : 0.04, + "averageRecall8" : 0.0867, + "averageRecall30" : 0.2022, + "averageNDCG4" : 0.2241, + "averageNDCG8" : 0.2618, + "averageNDCG30" : 0.2268, + "averageILD" : 0.6631, + "compositeScore" : 0.272, + "avgLatencyMs" : 813.2 + }, { + "configName" : "λ=0.95", + "averageRecall4" : 0.04, + "averageRecall8" : 0.0933, + "averageRecall30" : 0.2022, + "averageNDCG4" : 0.229, + "averageNDCG8" : 0.2816, + "averageNDCG30" : 0.2335, + "averageILD" : 0.6506, + "compositeScore" : 0.2801, + "avgLatencyMs" : 808.6 + }, { + "configName" : "λ=0.97", + "averageRecall4" : 0.04, + "averageRecall8" : 0.0911, + "averageRecall30" : 0.2111, + "averageNDCG4" : 0.2324, + "averageNDCG8" : 0.2733, + "averageNDCG30" : 0.2416, + "averageILD" : 0.6375, + "compositeScore" : 0.2733, + "avgLatencyMs" : 799.8 + }, { + "configName" : "λ=1.00", + "averageRecall4" : 0.0356, + "averageRecall8" : 0.0889, + "averageRecall30" : 0.2244, + "averageNDCG4" : 0.213, + "averageNDCG8" : 0.2582, + "averageNDCG30" : 0.25, + "averageILD" : 0.6136, + "compositeScore" : 0.2616, + "avgLatencyMs" : 828.6 + } ], + "bestByRecall8" : "λ=0.95", + "bestByNDCG8" : "λ=0.95", + "bestByBalanced" : "λ=0.95" +} \ No newline at end of file diff --git a/src/test/resources/logback-test.xml b/src/test/resources/logback-test.xml new file mode 100644 index 00000000..e70d203f --- /dev/null +++ b/src/test/resources/logback-test.xml @@ -0,0 +1,21 @@ + + + + + + %d{HH:mm:ss.SSS} %highlight(%-5level) [%cyan(%thread)] %boldWhite(%logger{36}) - %msg%n + UTF-8 + + + + + + + + + + + + + + \ No newline at end of file