Skip to content

Commit 25d7c5d

Browse files
authored
Merge pull request #101 from codellm-devkit/issue-98-crud-in-javaee-apps
Issue 98: CLDK now supports getting crud operations from Java JPA applications
2 parents 2589257 + c4ba0dc commit 25d7c5d

15 files changed

Lines changed: 54017 additions & 178 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
# Don't ignore test fixture zip files
3333
!tests/resources/java/application/daytrader8-1.2.zip
34+
!tests/resources/java/application/plantsbywebsphere.zip
3435

3536
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
3637
hs_err_pid*

cldk/analysis/java/codeanalyzer/codeanalyzer.py

Lines changed: 131 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -13,33 +13,28 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515
################################################################################
16-
from itertools import chain, groupby
17-
from pdb import set_trace
18-
import re
1916
import json
17+
import logging
18+
import re
2019
import shlex
21-
import requests
22-
import networkx as nx
23-
from pathlib import Path
2420
import subprocess
25-
from subprocess import CompletedProcess
26-
from urllib.request import urlretrieve
27-
from datetime import datetime
2821
from importlib import resources
22+
from itertools import chain, groupby
23+
from pathlib import Path
24+
from subprocess import CompletedProcess
25+
from typing import Any, Dict, List, Tuple
26+
from typing import Union
2927

28+
import networkx as nx
3029
from networkx import DiGraph
3130

3231
from cldk.analysis import AnalysisLevel
3332
from cldk.analysis.java.treesitter import JavaSitter
3433
from cldk.models.java import JGraphEdges
35-
from cldk.models.java.models import JApplication, JCallable, JField, JMethodDetail, JType, JCompilationUnit, JGraphEdgesST
36-
from typing import Dict, List, Tuple
37-
from typing import Union
38-
34+
from cldk.models.java.enums import CRUDOperationType
35+
from cldk.models.java.models import JApplication, JCRUDOperation, JCallable, JField, JMethodDetail, JType, JCompilationUnit, JGraphEdgesST
3936
from cldk.utils.exceptions.exceptions import CodeanalyzerExecutionException
4037

41-
import logging
42-
4338
logger = logging.getLogger(__name__)
4439

4540

@@ -143,28 +138,31 @@ def _get_codeanalyzer_exec(self) -> List[str]:
143138
with resources.as_file(resources.files("cldk.analysis.java.codeanalyzer.bin") / "codeanalyzer") as codeanalyzer_bin_path:
144139
codeanalyzer_exec = shlex.split(codeanalyzer_bin_path.__str__())
145140
else:
146-
147141
if self.analysis_backend_path:
148142
analysis_backend_path = Path(self.analysis_backend_path)
149143
logger.info(f"Using codeanalyzer jar from {analysis_backend_path}")
150144
codeanalyzer_jar_file = next(analysis_backend_path.rglob("codeanalyzer-*.jar"), None)
145+
if codeanalyzer_jar_file is None:
146+
raise CodeanalyzerExecutionException("Codeanalyzer jar not found in the provided path.")
151147
codeanalyzer_exec = shlex.split(f"java -jar {codeanalyzer_jar_file}")
152148
else:
153-
# Since the path to codeanalyzer.jar was not provided, we'll download the latest version from GitHub.
149+
# Since the path to codeanalyzer.jar we will use the default jar from the cldk/analysis/java/codeanalyzer/jar folder
154150
with resources.as_file(resources.files("cldk.analysis.java.codeanalyzer.jar")) as codeanalyzer_jar_path:
155-
# Download the codeanalyzer jar if it doesn't exist, update if it's outdated,
156-
# do nothing if it's up-to-date.
157151
codeanalyzer_jar_file = next(codeanalyzer_jar_path.rglob("codeanalyzer-*.jar"), None)
158152
codeanalyzer_exec = shlex.split(f"java -jar {codeanalyzer_jar_file}")
159153
return codeanalyzer_exec
160154

161-
def init_japplication(self, data: str) -> JApplication:
155+
@staticmethod
156+
def _init_japplication(data: str) -> JApplication:
162157
"""Return JApplication giving the stringified JSON as input.
163158
Returns
164159
-------
165160
JApplication
166161
The application view of the Java code with the analysis results.
167162
"""
163+
# from ipdb import set_trace
164+
165+
# set_trace()
168166
return JApplication(**json.loads(data))
169167

170168
def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
@@ -197,7 +195,7 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
197195
text=True,
198196
check=True,
199197
)
200-
return JApplication(**json.loads(console_out.stdout))
198+
return self._init_japplication(console_out.stdout)
201199
except Exception as e:
202200
raise CodeanalyzerExecutionException(str(e)) from e
203201
else:
@@ -217,7 +215,7 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
217215
# flag is set, we'll run the analysis every time the object is created. This will happen regradless
218216
# of the existence of the analysis file.
219217
# Create the executable command for codeanalyzer.
220-
codeanalyzer_args = codeanalyzer_exec + shlex.split(f"-i {Path(self.project_dir)} --analysis-level={analysis_level} -o {self.analysis_json_path}")
218+
codeanalyzer_args = codeanalyzer_exec + shlex.split(f"-i {Path(self.project_dir)} --analysis-level={analysis_level} -o {self.analysis_json_path} -v")
221219
is_run_code_analyzer = True
222220

223221
if is_run_code_analyzer:
@@ -236,7 +234,7 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
236234
raise CodeanalyzerExecutionException(str(e)) from e
237235
with open(analysis_json_path_file) as f:
238236
data = json.load(f)
239-
return JApplication(**data)
237+
return self._init_japplication(json.dumps(data))
240238

241239
def _codeanalyzer_single_file(self) -> JApplication:
242240
"""Invokes codeanalyzer in a single file mode.
@@ -248,12 +246,11 @@ def _codeanalyzer_single_file(self) -> JApplication:
248246
codeanalyzer_args = ["--source-analysis", self.source_code]
249247
codeanalyzer_cmd = codeanalyzer_exec + codeanalyzer_args
250248
try:
251-
print(f"Running {' '.join(codeanalyzer_cmd)}")
252249
logger.info(f"Running {' '.join(codeanalyzer_cmd)}")
253250
console_out: CompletedProcess[str] = subprocess.run(codeanalyzer_cmd, capture_output=True, text=True, check=True)
254251
if console_out.returncode != 0:
255252
raise CodeanalyzerExecutionException(console_out.stderr)
256-
return JApplication(**json.loads(console_out.stdout))
253+
return self._init_japplication(console_out.stdout)
257254
except Exception as e:
258255
raise CodeanalyzerExecutionException(str(e)) from e
259256

@@ -870,14 +867,9 @@ def get_all_entry_point_methods(self) -> Dict[str, Dict[str, JCallable]]:
870867
Dict[str, Dict[str, JCallable]]: A dictionary of all entry point methods in the Java code.
871868
"""
872869
methods = chain.from_iterable(
873-
((typename, method, callable)
874-
for method, callable in methods.items() if callable.is_entrypoint)
875-
for typename, methods in self.get_all_methods_in_application().items()
870+
((typename, method, callable) for method, callable in methods.items() if callable.is_entrypoint) for typename, methods in self.get_all_methods_in_application().items()
876871
)
877-
return {
878-
typename: {method: callable for _, method, callable in group}
879-
for typename, group in groupby(methods, key=lambda x: x[0])
880-
}
872+
return {typename: {method: callable for _, method, callable in group} for typename, group in groupby(methods, key=lambda x: x[0])}
881873

882874
def get_all_entry_point_classes(self) -> Dict[str, JType]:
883875
"""Returns a dictionary of all entry point classes in the Java code.
@@ -887,8 +879,110 @@ def get_all_entry_point_classes(self) -> Dict[str, JType]:
887879
with qualified class names as keys.
888880
"""
889881

890-
return {
891-
typename: klass
892-
for typename, klass in self.get_all_classes().items()
893-
if klass.is_entrypoint_class
894-
}
882+
return {typename: klass for typename, klass in self.get_all_classes().items() if klass.is_entrypoint_class}
883+
884+
def get_all_crud_operations(self) -> List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]:
885+
"""Returns a dictionary of all CRUD operations in the source code.
886+
887+
Raises:
888+
NotImplementedError: Raised when current AnalysisEngine does not support this function.
889+
890+
Returns:
891+
Dict[str, List[str]]: A dictionary of all CRUD operations in the source code.
892+
"""
893+
894+
crud_operations = []
895+
for class_name, class_details in self.get_all_classes().items():
896+
for method_name, method_details in class_details.callable_declarations.items():
897+
if len(method_details.crud_operations) > 0:
898+
crud_operations.append({class_name: class_details, method_name: method_details, "crud_operations": method_details.crud_operations})
899+
return crud_operations
900+
901+
def get_all_read_operations(self) -> List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]:
902+
"""Returns a list of all read operations in the source code.
903+
904+
Raises:
905+
NotImplementedError: Raised when current AnalysisEngine does not support this function.
906+
907+
Returns:
908+
List[Dict[str, Union[str, JCallable, List[CRUDOperation]]]]:: A list of all read operations in the source code.
909+
"""
910+
crud_read_operations = []
911+
for class_name, class_details in self.get_all_classes().items():
912+
for method_name, method_details in class_details.callable_declarations.items():
913+
if len(method_details.crud_operations) > 0:
914+
crud_read_operations.append(
915+
{
916+
class_name: class_details,
917+
method_name: method_details,
918+
"crud_operations": [crud_op for crud_op in method_details.crud_operations if crud_op.operation_type == CRUDOperationType.READ],
919+
}
920+
)
921+
return crud_read_operations
922+
923+
def get_all_create_operations(self) -> List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]:
924+
"""Returns a list of all create operations in the source code.
925+
926+
Raises:
927+
NotImplementedError: Raised when current AnalysisEngine does not support this function.
928+
929+
Returns:
930+
List[Dict[str, Union[str, JCallable, List[CRUDOperation]]]]: A list of all create operations in the source code.
931+
"""
932+
crud_create_operations = []
933+
for class_name, class_details in self.get_all_classes().items():
934+
for method_name, method_details in class_details.callable_declarations.items():
935+
if len(method_details.crud_operations) > 0:
936+
crud_create_operations.append(
937+
{
938+
class_name: class_details,
939+
method_name: method_details,
940+
"crud_operations": [crud_op for crud_op in method_details.crud_operations if crud_op.operation_type == CRUDOperationType.CREATE],
941+
}
942+
)
943+
return crud_create_operations
944+
945+
def get_all_update_operations(self) -> List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]:
946+
"""Returns a list of all update operations in the source code.
947+
948+
Raises:
949+
NotImplementedError: Raised when current AnalysisEngine does not support this function.
950+
951+
Returns:
952+
List[Dict[str, Union[str, JCallable, List[CRUDOperation]]]]: A list of all update operations in the source code.
953+
"""
954+
crud_update_operations = []
955+
for class_name, class_details in self.get_all_classes().items():
956+
for method_name, method_details in class_details.callable_declarations.items():
957+
if len(method_details.crud_operations) > 0:
958+
crud_update_operations.append(
959+
{
960+
class_name: class_details,
961+
method_name: method_details,
962+
"crud_operations": [crud_op for crud_op in method_details.crud_operations if crud_op.operation_type == CRUDOperationType.UPDATE],
963+
}
964+
)
965+
966+
return crud_update_operations
967+
968+
def get_all_delete_operations(self) -> List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]:
969+
"""Returns a list of all delete operations in the source code.
970+
971+
Raises:
972+
NotImplementedError: Raised when current AnalysisEngine does not support this function.
973+
974+
Returns:
975+
List[Dict[str, Union[str, JCallable, List[CRUDOperation]]]]: A list of all delete operations in the source code.
976+
"""
977+
crud_delete_operations = []
978+
for class_name, class_details in self.get_all_classes().items():
979+
for method_name, method_details in class_details.callable_declarations.items():
980+
if len(method_details.crud_operations) > 0:
981+
crud_delete_operations.append(
982+
{
983+
class_name: class_details,
984+
method_name: method_details,
985+
"crud_operations": [crud_op for crud_op in method_details.crud_operations if crud_op.operation_type == CRUDOperationType.DELETE],
986+
}
987+
)
988+
return crud_delete_operations
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
*.jar
1+
!codeanalyzer-*.jar
26.9 MB
Binary file not shown.

cldk/analysis/java/java_analysis.py

Lines changed: 42 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,15 @@
1919
"""
2020

2121
from pathlib import Path
22-
from typing import Dict, List, Tuple, Set
22+
from typing import Any, Dict, List, Tuple, Set, Union
2323
from networkx import DiGraph
2424
from tree_sitter import Tree
2525

2626
from cldk.analysis import SymbolTable, CallGraph, AnalysisLevel
2727
from cldk.analysis.java.treesitter import JavaSitter
2828
from cldk.models.java import JCallable
2929
from cldk.models.java import JApplication
30-
from cldk.models.java.models import JCompilationUnit, JMethodDetail, JType, JField
30+
from cldk.models.java.models import JCRUDOperation, JCompilationUnit, JMethodDetail, JType, JField
3131
from cldk.analysis.java.codeanalyzer import JCodeanalyzer
3232
from cldk.analysis.java.codeql import JCodeQL
3333
from cldk.utils.analysis_engine import AnalysisEngine
@@ -619,8 +619,6 @@ def get_calling_lines(self, target_method_name: str) -> List[int]:
619619
List[int]: List of line numbers within in source method code block.
620620
"""
621621

622-
if self.analysis_backend in [AnalysisEngine.CODEQL, AnalysisEngine.TREESITTER]:
623-
raise NotImplementedError("Support for this functionality has not been implemented yet.")
624622
return self.backend.get_calling_lines(self.source_code, target_method_name)
625623

626624
def get_call_targets(self, declared_methods: dict) -> Set[str]:
@@ -629,12 +627,47 @@ def get_call_targets(self, declared_methods: dict) -> Set[str]:
629627
Args:
630628
declared_methods (dict): A dictionary of all declared methods in the class.
631629
632-
Raises:
633-
NotImplementedError: Raised when current AnalysisEngine does not support this function.
634-
635630
Returns:
636631
Set[str]: A list of call targets (methods).
637632
"""
638-
if self.analysis_backend in [AnalysisEngine.CODEQL, AnalysisEngine.TREESITTER]:
639-
raise NotImplementedError("Support for this functionality has not been implemented yet.")
640633
return self.backend.get_call_targets(self.source_code, declared_methods)
634+
635+
def get_all_crud_operations(self) -> List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]:
636+
"""Returns a dictionary of all CRUD operations in the source code.
637+
638+
Returns:
639+
List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]: A list of all CRUD operations in the source code.
640+
"""
641+
return self.backend.get_all_crud_operations()
642+
643+
def get_all_create_operations(self) -> List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]:
644+
"""Returns a list of all create operations in the source code.
645+
646+
Returns:
647+
List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]: A list of all create operations in the source code.
648+
"""
649+
return self.backend.get_all_create_operations()
650+
651+
def get_all_read_operations(self) -> List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]:
652+
"""Returns a list of all read operations in the source code.
653+
654+
Returns:
655+
List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]: A list of all read operations in the source code.
656+
"""
657+
return self.backend.get_all_read_operations()
658+
659+
def get_all_update_operations(self) -> List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]:
660+
"""Returns a list of all update operations in the source code.
661+
662+
Returns:
663+
List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]: A list of all update operations in the source code.
664+
"""
665+
return self.backend.get_all_update_operations()
666+
667+
def get_all_delete_operations(self) -> List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]:
668+
"""Returns a list of all delete operations in the source code.
669+
670+
Returns:
671+
List[Dict[str, Union[JType, JCallable, List[JCRUDOperation]]]]: A list of all delete operations in the source code.
672+
"""
673+
return self.backend.get_all_delete_operations()

cldk/models/java/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,6 @@
2626
JGraphEdges,
2727
)
2828

29-
__all__ = ["JApplication", "JCallable", "JType", "JCompilationUnit", "JGraphEdges", "ConstantsNamespace"]
29+
from .enums import CRUDOperationType, CRUDQueryType
30+
31+
__all__ = ["JApplication", "JCallable", "JType", "JCompilationUnit", "JGraphEdges", "CRUDOperationType", "CRUDQueryType"]

cldk/models/java/enums.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
from enum import Enum
2+
3+
4+
class CRUDOperationType(Enum):
5+
"""An enumeration of CRUD operation types.
6+
7+
Attributes:
8+
CREATE (str): The create operation type.
9+
READ (str): The read operation type.
10+
UPDATE (str): The update operation type.
11+
DELETE (str): The delete operation type.
12+
"""
13+
CREATE = "CREATE"
14+
READ = "READ"
15+
UPDATE = "UPDATE"
16+
DELETE = "DELETE"
17+
18+
class CRUDQueryType(Enum):
19+
"""An enumeration of CRUD query types.
20+
21+
Attributes:
22+
READ (str): The read query type.
23+
WRITE (str): The write query type.
24+
NAMED (str): The named query type.
25+
"""
26+
READ = "READ"
27+
WRITE = "WRITE"
28+
NAMED = "NAMED"

0 commit comments

Comments
 (0)