Skip to content

Commit 0a889da

Browse files
Rangeet PanRangeet Pan
authored andcommitted
add support for symbol table call graph for getting callees and callers
1 parent e4524d5 commit 0a889da

5 files changed

Lines changed: 197 additions & 20 deletions

File tree

cldk/analysis/java/codeanalyzer/codeanalyzer.py

Lines changed: 115 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,7 @@ def get_call_graph_json(self) -> str:
399399
callgraph_list.append(callgraph_dict)
400400
return json.dumps(callgraph_list)
401401

402-
def get_all_callers(self, target_class_name: str, target_method_signature: str) -> Dict:
402+
def get_all_callers(self, target_class_name: str, target_method_signature: str, using_symbol_table: bool) -> Dict:
403403
"""
404404
Get all the caller details for a given java method.
405405
@@ -410,27 +410,33 @@ def get_all_callers(self, target_class_name: str, target_method_signature: str)
410410
"""
411411

412412
caller_detail_dict = {}
413-
if (target_method_signature, target_class_name) not in self.call_graph.nodes():
413+
call_graph = None
414+
if using_symbol_table:
415+
call_graph = self.__raw_call_graph_using_symbol_table_target_method(target_class_name=target_class_name,
416+
target_method_signature=target_method_signature)
417+
else:
418+
call_graph = self.call_graph
419+
if (target_method_signature, target_class_name) not in call_graph.nodes():
414420
return caller_detail_dict
415421

416-
in_edge_view = self.call_graph.in_edges(
422+
in_edge_view = call_graph.in_edges(
417423
nbunch=(
418424
target_method_signature,
419425
target_class_name,
420426
),
421427
data=True,
422428
)
423429
caller_detail_dict["caller_details"] = []
424-
caller_detail_dict["target_method"] = self.call_graph.nodes[(target_method_signature, target_class_name)][
430+
caller_detail_dict["target_method"] = call_graph.nodes[(target_method_signature, target_class_name)][
425431
"method_detail"]
426432

427433
for source, target, data in in_edge_view:
428-
cm = {"caller_method": self.call_graph.nodes[source]["method_detail"],
434+
cm = {"caller_method": call_graph.nodes[source]["method_detail"],
429435
"calling_lines": data["calling_lines"]}
430436
caller_detail_dict["caller_details"].append(cm)
431437
return caller_detail_dict
432438

433-
def get_all_callees(self, source_class_name: str, source_method_signature: str) -> Dict:
439+
def get_all_callees(self, source_class_name: str, source_method_signature: str, using_symbol_table: bool) -> Dict:
434440
"""
435441
Get all the callee details for a given java method.
436442
@@ -440,16 +446,22 @@ def get_all_callees(self, source_class_name: str, source_method_signature: str)
440446
Callee details in a dictionary.
441447
"""
442448
callee_detail_dict = {}
443-
if (source_method_signature, source_class_name) not in self.call_graph.nodes():
449+
call_graph = None
450+
if using_symbol_table:
451+
call_graph = self.__call_graph_using_symbol_table(qualified_class_name=source_class_name,
452+
method_signature=source_method_signature)
453+
else:
454+
call_graph = self.call_graph
455+
if (source_method_signature, source_class_name) not in call_graph.nodes():
444456
return callee_detail_dict
445457

446-
out_edge_view = self.call_graph.out_edges(nbunch=(source_method_signature, source_class_name), data=True)
458+
out_edge_view = call_graph.out_edges(nbunch=(source_method_signature, source_class_name), data=True)
447459

448460
callee_detail_dict["callee_details"] = []
449-
callee_detail_dict["source_method"] = self.call_graph.nodes[(source_method_signature, source_class_name)][
461+
callee_detail_dict["source_method"] = call_graph.nodes[(source_method_signature, source_class_name)][
450462
"method_detail"]
451463
for source, target, data in out_edge_view:
452-
cm = {"callee_method": self.call_graph.nodes[target]["method_detail"]}
464+
cm = {"callee_method": call_graph.nodes[target]["method_detail"]}
453465
cm["calling_lines"] = data["calling_lines"]
454466
callee_detail_dict["callee_details"].append(cm)
455467
return callee_detail_dict
@@ -738,9 +750,23 @@ def get_class_call_graph_using_symbol_table(self, qualified_class_name: str,
738750

739751
def __call_graph_using_symbol_table(self,
740752
qualified_class_name: str,
741-
method_signature: str):
753+
method_signature: str, is_target_method: bool = False)-> DiGraph:
754+
"""
755+
Generate call graph using symbol table
756+
Args:
757+
qualified_class_name: qualified class name
758+
method_signature: method signature
759+
is_target_method: is the input method is a target method. By default, it is the source method
760+
761+
Returns:
762+
DiGraph: call graph
763+
"""
742764
cg = nx.DiGraph()
743-
sdg = self.__raw_call_graph_using_symbol_table(qualified_class_name=qualified_class_name,
765+
sdg = None
766+
if is_target_method:
767+
sdg = None
768+
else:
769+
sdg = self.__raw_call_graph_using_symbol_table(qualified_class_name=qualified_class_name,
744770
method_signature=method_signature)
745771
tsu = JavaSitter()
746772
edge_list = [
@@ -767,6 +793,83 @@ def __call_graph_using_symbol_table(self,
767793
cg.add_edges_from(edge_list)
768794
return cg
769795

796+
def __raw_call_graph_using_symbol_table_target_method(self,
797+
target_class_name: str,
798+
target_method_signature: str,
799+
cg: list[JGraphEdgesST] = []) -> list[JGraphEdgesST]:
800+
"""
801+
Generates call graph using symbol table information given the target method and target class
802+
Args:
803+
qualified_class_name: qualified class name
804+
method_signature: source method signature
805+
cg: call graph
806+
807+
Returns:
808+
list[JGraphEdgesST]: list of call edges
809+
"""
810+
target_method_details = self.get_method(qualified_class_name=target_class_name,
811+
method_signature=target_method_signature)
812+
for class_name in self.get_all_classes():
813+
for method in self.get_all_methods_in_class(qualified_class_name=class_name):
814+
method_details = self.get_method(qualified_class_name=class_name,
815+
method_signature=method)
816+
for call_site in method_details.call_sites:
817+
source_method_details = None
818+
source_class = ''
819+
callee_signature = ''
820+
if call_site.callee_signature != '':
821+
pattern = r'\b(?:[a-zA-Z_][\w\.]*\.)+([a-zA-Z_][\w]*)\b|<[^>]*>'
822+
823+
# Find the part within the parentheses
824+
start = call_site.callee_signature.find('(') + 1
825+
end = call_site.callee_signature.rfind(')')
826+
827+
# Extract the elements inside the parentheses
828+
elements = call_site.callee_signature[start:end].split(',')
829+
830+
# Apply the regex to each element
831+
simplified_elements = [re.sub(pattern, r'\1', element.strip()) for element in elements]
832+
833+
# Reconstruct the string with simplified elements
834+
callee_signature = f"{call_site.callee_signature[:start]}{', '.join(simplified_elements)}{call_site.callee_signature[end:]}"
835+
836+
if call_site.receiver_type != "":
837+
# call to any class
838+
if self.get_class(qualified_class_name=call_site.receiver_type):
839+
if callee_signature==target_method_signature and call_site.receiver_type == target_class_name:
840+
source_method_details = self.get_method(method_signature=method,
841+
qualified_class_name=class_name)
842+
source_class = class_name
843+
# private calls
844+
if call_site.is_private:
845+
if callee_signature == target_method_signature and class_name == target_class_name:
846+
source_method_details = self.get_method(method_signature=method,
847+
qualified_class_name=class_name)
848+
source_class = class_name
849+
if source_class == '':
850+
# check if any method exists with the signature in the class even if the receiver type is blank
851+
if callee_signature == target_method_signature and class_name == target_class_name:
852+
source_method_details = self.get_method(method_signature=method,
853+
qualified_class_name=class_name)
854+
source_class = class_name
855+
if source_class != '' and source_method_details is not None:
856+
source: JMethodDetail
857+
target: JMethodDetail
858+
type: str
859+
weight: str
860+
call_edge = JGraphEdgesST(
861+
source=JMethodDetail(method_declaration=source_method_details.declaration,
862+
klass=source_class,
863+
method=source_method_details),
864+
target=JMethodDetail(method_declaration=target_method_details.declaration,
865+
klass=target_class_name,
866+
method=target_method_details),
867+
type='CALL_DEP',
868+
weight='1')
869+
if call_edge not in cg:
870+
cg.append(call_edge)
871+
return cg
872+
770873
def __raw_call_graph_using_symbol_table(self,
771874
qualified_class_name: str,
772875
method_signature: str,

cldk/analysis/java/java.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,8 @@ def get_call_graph_json(self) -> str:
157157
raise NotImplementedError("Producing a call graph over a single file is not implemented yet.")
158158
return self.backend.get_call_graph_json()
159159

160-
def get_callers(self, target_class_name: str, target_method_declaration: str):
160+
def get_callers(self, target_class_name: str, target_method_declaration: str,
161+
using_symbol_table: bool = False) -> Dict:
161162
"""
162163
Get all the caller details for a given java method.
163164
@@ -168,7 +169,7 @@ def get_callers(self, target_class_name: str, target_method_declaration: str):
168169
"""
169170
if self.source_code:
170171
raise NotImplementedError("Generating all callers over a single file is not implemented yet.")
171-
return self.backend.get_all_callers(target_class_name, target_method_declaration)
172+
return self.backend.get_all_callers(target_class_name, target_method_declaration, using_symbol_table)
172173

173174
def get_callees(self, source_class_name: str, source_method_declaration: str):
174175
"""

cldk/core.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def analysis(
3737
analysis_backend: str | None = "codeanalyzer",
3838
analysis_level: str = "symbol_table",
3939
analysis_backend_path: str | None = None,
40-
analysis_json_path: str | Path | None = None,
40+
analysis_json_path: str | Path = '.',
4141
use_graalvm_binary: bool = False,
4242
) -> JavaAnalysis:
4343
"""

docs/examples/java/generate_unit_tests.ipynb

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
"execution_count": null,
2020
"outputs": [],
2121
"source": [
22-
"from pathlib import Path\n",
2322
"import ollama\n",
2423
"from cldk import CLDK\n",
2524
"from cldk.analysis import AnalysisLevel"
@@ -94,7 +93,7 @@
9493
{
9594
"cell_type": "markdown",
9695
"source": [
97-
"(Step 3) Third, collect all the information needed for each method. "
96+
"(Step 4) Fourth, collect all the information needed for each method. In this process, we go through all the classes in the application, and then for each class, we collect the signature of all the constructors. If there is no constructor present, we add the signature of the default constructor. Then, we go through all the non-private methods of the class and formulate the prompt using the constructor and the method information. Finally, we use the prompt to call LLM and get the final output."
9897
],
9998
"metadata": {
10099
"collapsed": false

docs/examples/java/validating_code_translation.ipynb

Lines changed: 77 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,88 @@
11
{
22
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"source": [
6+
"Code translation aims to convert source code from one programming language (PL) to another. Given the promising abilities of large language models (LLMs) in code synthesis, researchers are exploring their potential to automate code translation. In our recent paper [https://dl.acm.org/doi/10.1145/3597503.3639226] published at ICSE'24, we found that LLM-based code translation is very promising. In this example, we will walk through the steps of translating each Java class to Python and checking various properties of translated code, such as the number of methods, number of fields, formal arguments, etc.\n",
7+
"\n",
8+
"(Step 1) First, we will import all the necessary libraries"
9+
],
10+
"metadata": {
11+
"collapsed": false
12+
},
13+
"id": "47af1410ab0a3b4d"
14+
},
15+
{
16+
"cell_type": "code",
17+
"execution_count": null,
18+
"outputs": [],
19+
"source": [
20+
"import ollama\n",
21+
"from cldk import CLDK\n",
22+
"from cldk.analysis import AnalysisLevel"
23+
],
24+
"metadata": {
25+
"collapsed": false
26+
},
27+
"id": "47a78f61a53b2b55"
28+
},
29+
{
30+
"cell_type": "markdown",
31+
"source": [
32+
"(Step 2) Second, we will form the prompt for the model, which will include the body of the Java class after removing all the comments and the import statements."
33+
],
34+
"metadata": {
35+
"collapsed": false
36+
},
37+
"id": "c6d2f67e1a17cf1"
38+
},
339
{
440
"cell_type": "code",
541
"execution_count": null,
6-
"id": "initial_id",
42+
"outputs": [],
43+
"source": [
44+
"def format_inst(code, focal_class, language):\n",
45+
" \"\"\"\n",
46+
" Format the instruction for the given focal method and class.\n",
47+
" \"\"\"\n",
48+
" inst = f\"Question: Can you translate the Java class `{focal_class}` below to Python and generate under code block (```)?\\n\"\n",
49+
"\n",
50+
" inst += \"\\n\"\n",
51+
" inst += f\"```{language}\\n\"\n",
52+
" inst += code\n",
53+
" inst += \"```\" if code.endswith(\"\\n\") else \"\\n```\"\n",
54+
" inst += \"\\n\"\n",
55+
" return inst"
56+
],
57+
"metadata": {
58+
"collapsed": false
59+
},
60+
"id": "dc1ec56e92e90c15"
61+
},
62+
{
63+
"cell_type": "markdown",
64+
"source": [
65+
"(Step 3) Create a function to call LLM. There are various ways to achieve that. However, for illustrative purpose, we use ollama, a library to communicate with models downloaded locally."
66+
],
767
"metadata": {
8-
"collapsed": true
68+
"collapsed": false
969
},
70+
"id": "1239041c3315e5e5"
71+
},
72+
{
73+
"cell_type": "code",
74+
"execution_count": null,
1075
"outputs": [],
11-
"source": []
76+
"source": [
77+
"def prompt_ollama(message: str, model_id: str = \"granite-code:8b-instruct\") -> str:\n",
78+
" \"\"\"Prompt local model on Ollama\"\"\"\n",
79+
" response_object = ollama.generate(model=model_id, prompt=message)\n",
80+
" return response_object[\"response\"]"
81+
],
82+
"metadata": {
83+
"collapsed": false
84+
},
85+
"id": "1c86224032a6eb70"
1286
}
1387
],
1488
"metadata": {

0 commit comments

Comments
 (0)