Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
07a3268
NCU profiling wrapper generation and execution
Jan 7, 2026
3c4b124
Refactor profiling components and add kernel_perf_util
Jan 7, 2026
11f4e79
Refactor profiling components and add kernel_perf_util
Jan 7, 2026
251f419
Refactor profiling components and add kernel_perf_util
Jan 7, 2026
b789660
update directory name and add package in pyproject
Jan 7, 2026
4d35d57
Remove kernel_perf_util directory
Jan 7, 2026
d871678
move gpu spec.py to future PR and fix import
Jan 7, 2026
db0c754
Add copyright header
Jan 7, 2026
cd29759
fix ruff
Jan 7, 2026
bbfa6cd
address previous comments
Jan 13, 2026
543453a
fix ruff
Jan 13, 2026
706c9cc
Add unified benchmarking module for kernel performance measurement
Jan 8, 2026
4febdd6
Introducing benchmarking infra for kernel performance
Jan 8, 2026
d92a7b7
fix ruff
Jan 9, 2026
2994315
fix ruff
Jan 9, 2026
1378fc3
address comments
Jan 14, 2026
45fec80
Diagnose module - prompt constructor
Jan 11, 2026
b640cde
Refactors the diagnose_prompt module into a modular architecture
Jan 13, 2026
e952123
fix diff issue
Jan 13, 2026
e7ba29a
fix ruff issue
Jan 13, 2026
72ac4d1
fix
Jan 15, 2026
e2c599e
fix ruff
Jan 15, 2026
8ab907c
Merge branch 'main' into kaiming/opt_component_3
kaiming-cheng Jan 27, 2026
e350802
fix gpu_spec based on feedback and remove judger_prompt for future PR
Jan 29, 2026
8541299
Remove judger_prompts.py changes from this PR
Jan 29, 2026
313a84f
Merge branch 'main' into kaiming/opt_component_3
kaiming-cheng Jan 29, 2026
9e608ac
Update gpu_specs_database.py
kaiming-cheng Jan 29, 2026
f3220e1
address feedback
Jan 29, 2026
4443f33
ruff fix
Jan 29, 2026
b12b138
Merge branch 'main' into kaiming/opt_component_3
kaiming-cheng Jan 29, 2026
31d0d70
introduce roofline analyzer
Jan 29, 2026
3c607b5
update doc string in init and fix ncu_roofline
Jan 29, 2026
1aad0ad
introduce judger prompt
Jan 31, 2026
c0bd09c
add optimization template
Jan 31, 2026
56fba36
update prompt manager
Jan 31, 2026
c4a3641
Introduce bootleneck_analyzer
Feb 2, 2026
672fe42
Merge branch 'main' into kaiming/prescriber
kaiming-cheng Feb 16, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions triton_kernel_agent/opt_worker_component/prescribing/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Prescribing module for kernel optimization."""

from .bottleneck_analyzer import BottleneckAnalyzer

__all__ = ["BottleneckAnalyzer"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Bottleneck Analyzer - LLM-based NCU profiling analysis.

This module orchestrates LLM calls for bottleneck analysis using:
- judger_prompt.py: Prompt template, parsing, BottleneckResult dataclass
- ncu_roofline.py: Roofline analysis using NCU SOL metrics

Bottleneck Categories:
- memory: Memory bandwidth is the limiting factor
- compute: Compute throughput is the limiting factor
- underutilized: Neither saturated (<60% both), indicating stalls/occupancy issues
"""

from __future__ import annotations

import logging
from pathlib import Path
from typing import Any

from kernel_perf_agent.kernel_opt.diagnose_prompt.judger_prompt import (
BottleneckResult,
build_bottleneck_prompt,
parse_bottleneck_response,
)
from kernel_perf_agent.kernel_opt.roofline.ncu_roofline import RooflineAnalyzer
from triton_kernel_agent.worker_util import _call_llm, _save_debug_file
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This file doesn't exist anymore fyi

from utils.providers.base import BaseProvider


class BottleneckAnalyzer:
"""LLM-based bottleneck analyzer using NCU metrics."""

def __init__(
self,
provider: BaseProvider,
model: str,
gpu_specs: dict[str, Any],
logs_dir: Path | None = None,
logger: logging.Logger | None = None,
num_bottlenecks: int = 1,
num_causes: int = 2,
num_fixes: int = 1,
enable_debug: bool = True,
):
"""
Initialize bottleneck analyzer.

Args:
provider: LLM provider instance
model: Model name for LLM calls
gpu_specs: GPU hardware specifications
logs_dir: Directory for saving debug files
logger: Logger instance
num_bottlenecks: Number of bottlenecks to request from LLM
num_causes: Number of root causes per bottleneck
num_fixes: Number of recommended fixes per bottleneck
enable_debug: Whether to save debug files (prompts/responses)
"""
self.provider = provider
self.model = model
self.gpu_specs = gpu_specs
self.logs_dir = logs_dir
self.logger = logger or logging.getLogger(__name__)
self.num_bottlenecks = num_bottlenecks
self.num_causes = num_causes
self.num_fixes = num_fixes
self.enable_debug = enable_debug
self.roofline = RooflineAnalyzer(logger=logger)

def analyze(
self,
kernel_code: str,
ncu_metrics: dict[str, Any],
round_num: int = 0,
roofline_result: Any = None,
) -> list[BottleneckResult]:
"""
Analyze kernel bottlenecks using LLM.

Args:
kernel_code: The Triton kernel source code
ncu_metrics: NCU profiling metrics dictionary
round_num: Current optimization round (for logging)
roofline_result: Pre-computed RooflineResult (if None, computed internally)

Returns:
List of BottleneckResult (ordered by importance).
Empty list if analysis fails.
"""
if roofline_result is None:
# Filter out PyTorch kernels (at::*) and get Triton kernel metrics
if ncu_metrics:
triton_kernels = {
name: metrics
for name, metrics in ncu_metrics.items()
if not name.startswith("at::") and not name.startswith("void at::")
}
flat_metrics = (
next(iter(triton_kernels.values()))
if triton_kernels
else next(iter(ncu_metrics.values()), {})
)
else:
flat_metrics = {}
roofline_result = self.roofline.analyze(flat_metrics)

prompt = build_bottleneck_prompt(
kernel_code=kernel_code,
ncu_metrics=ncu_metrics,
roofline=roofline_result,
gpu_specs=self.gpu_specs,
num_bottlenecks=self.num_bottlenecks,
num_causes=self.num_causes,
num_fixes=self.num_fixes,
)

response = _call_llm(
provider=self.provider,
model=self.model,
messages=[{"role": "user", "content": prompt}],
logger=self.logger,
max_tokens=16384,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this max_tokens?

)

if self.enable_debug and self.logs_dir:
_save_debug_file(
self.logs_dir / f"round{round_num:03d}_bottleneck_prompt.txt",
prompt,
self.logger,
)
_save_debug_file(
self.logs_dir / f"round{round_num:03d}_bottleneck_response.txt",
response,
self.logger,
)

results = parse_bottleneck_response(response)

if results:
categories = [r.category for r in results]
self.logger.info(f"[{round_num}] Bottlenecks: {', '.join(categories)}")
else:
self.logger.warning(f"[{round_num}] Failed to parse bottleneck response")

return results