-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathjava_cfg_builder.py
More file actions
3060 lines (2460 loc) · 135 KB
/
Copy pathjava_cfg_builder.py
File metadata and controls
3060 lines (2460 loc) · 135 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import subprocess
import sys
import json
import tempfile
import os
from pathlib import Path
from loguru import logger
from typing import Dict, List, Set, Optional, Tuple, Any
import copy
import re
class JavaCFG:
def __init__(self, source_path: str, target_method: str = None, target_class: str = None):
"""
改进的Java函数级CFG构建器
Args:
source_path: Java源代码文件路径
target_method: 目标方法名(不包含参数),如果不指定则使用第一个方法
target_class: 目标类名,如果不指定则使用第一个类
"""
# #logger.info(f"🚀🚀🚀 JavaCFG.__init__ called with source_path={source_path}")
self.source_path = source_path
self.source_code = Path(source_path).read_text(encoding='utf-8')
self.source_lines = self.source_code.splitlines()
# #logger.info(f"📖 Read {len(self.source_lines)} lines from Java file")
# Java关键字集合
self.java_keywords = {
'abstract', 'assert', 'boolean', 'break', 'byte', 'case', 'catch', 'char',
'class', 'const', 'continue', 'default', 'do', 'double', 'else', 'enum',
'extends', 'final', 'finally', 'float', 'for', 'goto', 'if', 'implements',
'import', 'instanceof', 'int', 'interface', 'long', 'native', 'new',
'package', 'private', 'protected', 'public', 'return', 'short', 'static',
'strictfp', 'super', 'switch', 'synchronized', 'this', 'throw', 'throws',
'transient', 'try', 'void', 'volatile', 'while'
}
# 解析Java AST
self.java_ast = self._parse_java_ast()
# 解析所有类和方法
self.all_classes = self._parse_all_classes()
self.all_methods = self._parse_all_methods()
# 确定目标类和方法
if target_class:
if target_class not in self.all_classes:
raise ValueError(f"目标类 '{target_class}' 在源代码中未找到")
self.target_class = target_class
else:
self.target_class = list(self.all_classes.keys())[0] if self.all_classes else None
if target_method:
if target_method not in self.all_methods:
raise ValueError(f"目标方法 '{target_method}' 在源代码中未找到")
self.target_method = target_method
else:
# 从目标类中选择第一个方法
class_methods = [method for method in self.all_methods.keys()
if self.all_methods[method]['class'] == self.target_class]
self.target_method = class_methods[0] if class_methods else None
if not self.target_method:
raise ValueError("未找到任何方法定义")
# #logger.info(f"目标类: {self.target_class}")
# #logger.info(f"目标方法: {self.target_method}")
# 构建CFG
self.blocks = []
self.connections = []
self.method_signature = self._get_method_signature(self.target_method)
# 跟踪当前的循环和异常处理上下文
self.loop_stack = [] # 用于处理break/continue
self.try_stack = [] # 用于处理异常
# 构建完整的CFG
self._build_complete_cfg()
# 生成文本表示
self.cfg_text = self._generate_cfg_text()
self.block_num = len(self.blocks)
self.block_code_list = [block['code'] for block in self.blocks]
def _parse_java_ast(self) -> Dict:
"""使用改进的Java解析方法"""
return self._improved_parse()
def _improved_parse(self) -> Dict:
"""改进的Java代码解析方法"""
classes = {}
methods = {}
# 解析类定义 - 更精确的正则表达式
class_pattern = r'(?:public\s+|private\s+|protected\s+)?(?:abstract\s+|final\s+)?class\s+(\w+)(?:\s+extends\s+\w+)?(?:\s+implements\s+[\w,\s]+)?\s*\{'
for match in re.finditer(class_pattern, self.source_code):
class_name = match.group(1)
classes[class_name] = {
'name': class_name,
'start_line': self.source_code[:match.start()].count('\n') + 1,
'start_pos': match.start(),
'end_pos': self._find_class_end(match.start())
}
# 解析方法定义 - 更精确的正则表达式
method_pattern = r'(?:public\s+|private\s+|protected\s+)?(?:static\s+)?(?:final\s+)?(?:\w+(?:\[\])?)\s+(\w+)\s*\([^)]*\)\s*(?:throws\s+[\w,\s]+)?\s*\{'
for match in re.finditer(method_pattern, self.source_code):
method_name = match.group(1)
# 过滤Java关键字和常见的非方法名
if method_name in self.java_keywords:
continue
method_line = self.source_code[:match.start()].count('\n') + 1
# 找到方法所属的类
belonging_class = None
for class_name, class_info in classes.items():
if (match.start() > class_info['start_pos'] and
match.start() < class_info['end_pos']):
belonging_class = class_name
break
if belonging_class: # 只添加属于某个类的方法
methods[method_name] = {
'name': method_name,
'class': belonging_class,
'start_line': method_line,
'body_start': match.end(),
'body_end': self._find_method_end(match.start())
}
return {
'classes': classes,
'methods': methods,
'source_lines': self.source_lines
}
def _find_class_end(self, start_pos: int) -> int:
"""找到类定义的结束位置"""
brace_count = 0
in_class = False
for i in range(start_pos, len(self.source_code)):
char = self.source_code[i]
if char == '{':
in_class = True
brace_count += 1
elif char == '}' and in_class:
brace_count -= 1
if brace_count == 0:
return i
return len(self.source_code)
def _find_method_end(self, start_pos: int) -> int:
"""找到方法定义的结束位置"""
brace_count = 0
in_method = False
for i in range(start_pos, len(self.source_code)):
char = self.source_code[i]
if char == '{':
in_method = True
brace_count += 1
elif char == '}' and in_method:
brace_count -= 1
if brace_count == 0:
return i
return len(self.source_code)
def _parse_all_classes(self) -> Dict[str, Dict]:
"""解析所有类定义"""
return self.java_ast.get('classes', {})
def _parse_all_methods(self) -> Dict[str, Dict]:
"""解析所有方法定义"""
return self.java_ast.get('methods', {})
def _get_method_signature(self, method_name: str) -> str:
"""获取带参数的方法签名"""
if method_name in self.all_methods:
method_info = self.all_methods[method_name]
class_name = method_info.get('class', '')
return f"{class_name}.{method_name}()"
return f"{method_name}()"
def _build_complete_cfg(self):
"""构建完整的CFG"""
# #logger.info("🏗️🏗️🏗️ Building complete CFG...")
visited_methods = set()
self._build_method_cfg(self.target_method, visited_methods)
# #logger.info(f"📊 Total blocks created: {len(self.blocks)}")
# #logger.info(f"📊 Total connections before control structures: {len(self.connections)}")
# 在所有方法处理完后,统一添加控制结构连接
# #logger.info("🔗 Adding control structure connections...")
self._add_java_control_structure_connections()
# #logger.info(f"📊 Total connections after control structures: {len(self.connections)}")
def _build_method_cfg(self, method_name: str, visited_methods: Set[str]):
"""递归构建方法的CFG"""
if method_name in visited_methods:
# #logger.warning(f"检测到递归调用: {method_name}")
return
if method_name not in self.all_methods:
# #logger.warning(f"方法 {method_name} 未找到定义,跳过")
return
visited_methods.add(method_name)
method_info = self.all_methods[method_name]
# #logger.info(f"🏗️ 处理方法: {method_name}")
# #logger.info(f"📋 Method info keys: {list(method_info.keys())}")
# #logger.info(f"📋 Method info: body_start={method_info['body_start']}, body_end={method_info['body_end']}")
# 从源代码中提取方法体语句
body_start = method_info['body_start']
body_end = method_info['body_end']
method_body = self.source_code[body_start:body_end]
# #logger.info(f"📝 Method body content: {method_body[:200]}...")
# 将方法体分解为语句
statements = self._extract_statements_from_body(method_body)
# #logger.info(f"📋 方法 {method_name} 包含 {len(statements)} 个语句")
# 显示前几个语句
# for i, stmt in enumerate(statements[:10]):
# #logger.info(f"📝 语句 {i}: '{stmt.strip()}'")
# 解析方法体
main_blocks = self._process_java_statements(statements, visited_methods, method_name)
# 处理方法调用
self._process_method_calls_in_blocks(visited_methods)
# 完成try-catch-finally连接
self._finalize_try_catch_finally()
visited_methods.remove(method_name)
def _extract_statements_from_body(self, method_body: str) -> List[str]:
"""从方法体字符串中提取语句,参考Python CFG构建器的逻辑,每行一个语句"""
# #logger.info(f"🔍 Extracting statements from method body...")
# 去掉开头和结尾的大括号
method_body = method_body.strip()
if method_body.startswith('{'):
method_body = method_body[1:]
if method_body.endswith('}'):
method_body = method_body[:-1]
# 按行分割
lines = method_body.split('\n')
statements = []
for line in lines:
line = line.strip()
# 过滤掉空行和注释行
if not line:
continue
if line.startswith('//'):
continue
if line.startswith('/*') or line.startswith('*'):
continue
if line == '}' or line == '{':
continue
# 去掉行内注释
line = self._remove_inline_comments(line)
if not line.strip():
continue
# 检查是否是控制结构语句
is_control_structure = any(line.startswith(keyword) for keyword in
['if ', 'else', 'for ', 'while ', 'do ', 'switch ', 'try', 'catch', 'finally'])
# 对于控制结构,只提取条件部分
if is_control_structure and '{' in line:
brace_pos = line.find('{')
control_condition = line[:brace_pos + 1].strip()
statements.append(control_condition)
# 处理{后面的内容(如果有的话)
remaining_content = line[brace_pos + 1:].strip()
if remaining_content and remaining_content != '}':
statements.append(remaining_content)
else:
# 普通语句,直接添加
statements.append(line)
# #logger.info(f"✅ Extracted {len(statements)} statements")
return statements
def _remove_inline_comments(self, line: str) -> str:
"""移除行内注释,但要注意字符串中的//"""
in_string = False
quote_char = None
i = 0
while i < len(line):
char = line[i]
if not in_string:
if char in ['"', "'"]:
in_string = True
quote_char = char
elif char == '/' and i + 1 < len(line) and line[i + 1] == '/':
# 找到注释,返回注释前的部分
return line[:i].strip()
else:
if char == quote_char and (i == 0 or line[i-1] != '\\'):
in_string = False
quote_char = None
i += 1
return line
def _extract_method_body(self, method_info: Dict) -> List[str]:
"""提取方法体的语句"""
start_line = method_info['start_line']
# 找到方法体的开始和结束
lines = []
brace_count = 0
in_method_body = False
for i, line in enumerate(self.source_lines[start_line - 1:], start=start_line):
stripped = line.strip()
# 跳过空行和注释
if not stripped or stripped.startswith('//') or stripped.startswith('/*'):
continue
if not in_method_body and '{' in line:
in_method_body = True
brace_count += line.count('{') - line.count('}')
# 如果开始行有代码(除了{),也要包含
content_before_brace = line[:line.index('{')].strip()
if content_before_brace and not content_before_brace.endswith(')'):
lines.append(line)
continue
if in_method_body:
brace_count += line.count('{') - line.count('}')
if brace_count > 0:
lines.append(line)
else:
# 方法结束
break
return lines
def _process_java_statements(self, statements: List[str], visited_methods: Set[str], method_name: str) -> List[int]:
"""处理Java语句列表"""
block_ids = []
i = 0
while i < len(statements):
stmt = statements[i].strip()
if not stmt:
i += 1
continue
# 根据语句类型处理
stmt_blocks, consumed_lines = self._process_single_java_statement(
statements[i:], visited_methods, method_name, i + 1)
block_ids.extend(stmt_blocks)
i += consumed_lines
# 建立顺序连接
self._connect_sequential_blocks(block_ids)
return block_ids
def _process_single_java_statement(self, statements: List[str], visited_methods: Set[str],
method_name: str, line_number: int) -> Tuple[List[int], int]:
"""处理单个Java语句"""
stmt = statements[0].strip()
# 跳过只有大括号的行
if stmt in ['{', '}']:
return [], 1
# if语句
if stmt.startswith('if'):
return self._process_java_if(statements, visited_methods, method_name, line_number)
# else语句(单独的else)
elif stmt.startswith('} else if') or stmt.startswith('else if'):
return self._process_java_else_if(statements, visited_methods, method_name, line_number)
elif stmt.startswith('} else') or stmt.startswith('else'):
return self._process_java_else(statements, visited_methods, method_name, line_number)
# for循环
elif stmt.startswith('for'):
return self._process_java_for(statements, visited_methods, method_name, line_number)
# while循环
elif stmt.startswith('while'):
return self._process_java_while(statements, visited_methods, method_name, line_number)
# do-while循环
elif stmt.startswith('do'):
return self._process_java_do_while(statements, visited_methods, method_name, line_number)
# switch语句
elif stmt.startswith('switch'):
return self._process_java_switch(statements, visited_methods, method_name, line_number)
# try语句
elif stmt.startswith('try'):
return self._process_java_try(statements, visited_methods, method_name, line_number)
# catch语句
elif stmt.startswith('} catch') or stmt.startswith('catch'):
return self._process_java_catch(statements, visited_methods, method_name, line_number)
# finally语句
elif stmt.startswith('} finally') or stmt.startswith('finally'):
return self._process_java_finally(statements, visited_methods, method_name, line_number)
# return语句
elif stmt.startswith('return'):
return self._process_java_return(statements, visited_methods, method_name, line_number)
# break语句
elif stmt.startswith('break'):
return self._process_java_break(stmt, method_name, line_number)
# continue语句
elif stmt.startswith('continue'):
return self._process_java_continue(stmt, method_name, line_number)
# throw语句
elif stmt.startswith('throw'):
return self._process_java_throw(stmt, method_name, line_number)
# 变量声明或赋值
else:
return self._process_java_assignment(stmt, visited_methods, method_name, line_number)
def _process_java_if(self, statements: List[str], visited_methods: Set[str],
method_name: str, line_number: int) -> Tuple[List[int], int]:
"""处理Java if语句"""
all_blocks = []
consumed_lines = 0
# 解析if条件
if_line = statements[0].strip()
condition = self._extract_condition(if_line)
# 创建if块
if_block_id = self._create_java_block(if_line, 'if_statement', method_name, line_number, {
'condition': condition,
'is_control_structure': True # 标记为控制结构,避免sequential连接
})
all_blocks.append(if_block_id)
consumed_lines += 1
# 处理if体
then_statements, then_consumed = self._extract_block_statements(statements[1:])
then_blocks = []
if then_statements:
then_blocks = self._process_java_statements(then_statements, visited_methods, method_name)
all_blocks.extend(then_blocks)
consumed_lines += then_consumed
# 建立连接 - 只创建condition_true连接,condition_false将在_add_control_structure_connections中处理
if then_blocks:
self._add_connection(if_block_id, then_blocks[0], f'condition_true:{condition}')
# 存储if块信息供后续处理condition_false连接
self.blocks[if_block_id]['then_blocks'] = then_blocks
return all_blocks, consumed_lines
def _process_java_else_if(self, statements: List[str], visited_methods: Set[str],
method_name: str, line_number: int) -> Tuple[List[int], int]:
"""处理Java else if语句"""
# 递归处理as if语句
else_if_line = statements[0].strip()
# 提取else if中的if部分
if_part = else_if_line.replace('} else if', 'if').replace('else if', 'if')
modified_statements = [if_part] + statements[1:]
return self._process_java_if(modified_statements, visited_methods, method_name, line_number)
def _process_java_else(self, statements: List[str], visited_methods: Set[str],
method_name: str, line_number: int) -> Tuple[List[int], int]:
"""处理Java else语句"""
all_blocks = []
consumed_lines = 1 # else行本身
# 处理else体
else_statements, else_consumed = self._extract_block_statements(statements[1:])
if else_statements:
else_blocks = self._process_java_statements(else_statements, visited_methods, method_name)
all_blocks.extend(else_blocks)
consumed_lines += else_consumed
return all_blocks, consumed_lines
def _process_java_for(self, statements: List[str], visited_methods: Set[str],
method_name: str, line_number: int) -> Tuple[List[int], int]:
"""处理Java for循环(参考Python CFG builder思路)"""
all_blocks = []
# 1. 创建for循环头部块
for_line = statements[0].strip()
condition = self._extract_condition(for_line)
for_block_id = self._create_java_block(for_line, 'for_statement', method_name, line_number, {
'condition': condition,
'is_control_structure': True
})
all_blocks.append(for_block_id)
# #logger.info(f"🔄 Created for loop header block {for_block_id}: '{for_line}'")
# 将for循环推入栈
self.loop_stack.append({
'type': 'for',
'header_id': for_block_id,
'line': for_line
})
# 2. 提取循环体语句
body_statements, body_consumed = self._extract_java_for_body(statements)
# #logger.info(f"📋 Extracted {len(body_statements)} body statements")
# 3. 处理循环体语句
body_blocks = []
if body_statements:
body_blocks = self._process_java_statements(body_statements, visited_methods, method_name)
all_blocks.extend(body_blocks)
# #logger.info(f"🔗 Created {len(body_blocks)} body blocks: {body_blocks}")
# 4. 建立连接(参考Python CFG思路)
self._connect_java_for_loop(for_block_id, body_blocks, condition)
# 存储for块信息
self.blocks[for_block_id]['body_blocks'] = body_blocks
# 弹出循环栈
self.loop_stack.pop()
consumed_lines = 1 + body_consumed # for头 + 循环体
return all_blocks, consumed_lines
def _extract_java_for_body(self, statements: List[str]) -> Tuple[List[str], int]:
"""提取Java for循环体语句"""
# #logger.info(f"🔍 Extracting for body from {len(statements)} total statements")
# #logger.info(f"📝 Available statements: {[s.strip() for s in statements[:5]]}")
for_header = statements[0].strip()
# 如果for头包含开大括号,从后续语句中提取循环体
if '{' in for_header:
body_statements = []
brace_count = for_header.count('{') - for_header.count('}')
consumed_lines = 0
# #logger.info(f"🔢 Initial brace_count from header: {brace_count}")
# 从第二行开始提取循环体
i = 1
while i < len(statements) and brace_count > 0:
stmt = statements[i]
stmt_stripped = stmt.strip()
# #logger.debug(f"🔍 Processing statement {i}: '{stmt_stripped}' (brace_count: {brace_count})")
if not stmt_stripped:
i += 1
consumed_lines += 1
continue
# 计算大括号
open_braces = stmt.count('{')
close_braces = stmt.count('}')
brace_count += open_braces - close_braces
#logger.debug(f"🔢 Statement {i}: +{open_braces} -{close_braces} = {brace_count}")
if brace_count > 0:
body_statements.append(stmt)
#logger.info(f"📋 Added body statement: '{stmt_stripped}'")
elif brace_count == 0 and stmt_stripped == '}':
#logger.info(f"✅ Found closing brace, ending body extraction")
consumed_lines += 1
break
i += 1
consumed_lines += 1
#logger.info(f"✅ Extracted {len(body_statements)} for body statements")
return body_statements, consumed_lines
else:
# for头没有大括号,可能是单行循环
#logger.info(f"🔄 For header has no brace, using _extract_block_statements")
body_statements, body_consumed = self._extract_block_statements(statements[1:])
return body_statements, body_consumed
def _connect_java_for_loop(self, for_block_id: int, body_blocks: List[int], condition: str):
"""建立Java for循环的连接(参考Python CFG思路)"""
# for -> 循环体(condition_true)
if body_blocks:
#logger.info(f"🔗 Creating for_match connection: {for_block_id} -> {body_blocks[0]}")
self._add_connection(for_block_id, body_blocks[0], f'condition_true:{condition}')
# condition_false连接会在后续的_add_loop_condition_false_connections中处理
def _process_java_while(self, statements: List[str], visited_methods: Set[str],
method_name: str, line_number: int) -> Tuple[List[int], int]:
"""处理Java while循环"""
all_blocks = []
consumed_lines = 0
# 解析while语句
while_line = statements[0].strip()
condition = self._extract_condition(while_line)
# 创建while块
while_block_id = self._create_java_block(while_line, 'while_statement', method_name, line_number, {
'condition': condition,
'is_control_structure': True # 标记为控制结构,避免sequential连接
})
all_blocks.append(while_block_id)
consumed_lines += 1
# 将while循环推入栈
self.loop_stack.append({
'type': 'while',
'header_id': while_block_id,
'line': while_line
})
# 处理循环体
body_statements, body_consumed = self._extract_block_statements(statements[1:])
body_blocks = []
if body_statements:
body_blocks = self._process_java_statements(body_statements, visited_methods, method_name)
all_blocks.extend(body_blocks)
# 建立连接 - condition_true进入循环体
self._add_connection(while_block_id, body_blocks[0], f'condition_true:{condition}')
consumed_lines += body_consumed
# 存储while块信息供后续处理condition_false连接
self.blocks[while_block_id]['body_blocks'] = body_blocks
# 确保while循环块不会被误认为是if块
if 'then_blocks' in self.blocks[while_block_id]:
del self.blocks[while_block_id]['then_blocks']
# 弹出循环栈
self.loop_stack.pop()
return all_blocks, consumed_lines
def _process_java_do_while(self, statements: List[str], visited_methods: Set[str],
method_name: str, line_number: int) -> Tuple[List[int], int]:
"""处理Java do-while循环"""
all_blocks = []
consumed_lines = 0
# 创建do块
do_line = statements[0].strip()
do_block_id = self._create_java_block(do_line, 'do_statement', method_name, line_number)
all_blocks.append(do_block_id)
consumed_lines += 1
# 处理do体
body_statements, body_consumed = self._extract_do_while_body(statements[1:])
if body_statements:
body_blocks = self._process_java_statements(body_statements, visited_methods, method_name)
all_blocks.extend(body_blocks)
# do -> 循环体
self._add_connection(do_block_id, body_blocks[0], 'sequential')
consumed_lines += body_consumed
# 处理while条件
while_line_index = consumed_lines
if while_line_index < len(statements):
while_line = statements[while_line_index].strip()
if while_line.startswith('} while'):
condition = self._extract_condition(while_line)
while_block_id = self._create_java_block(while_line, 'while_condition', method_name,
line_number + while_line_index, {'condition': condition})
all_blocks.append(while_block_id)
consumed_lines += 1
# 建立连接
if body_statements:
last_body_block = body_blocks[-1] if body_blocks else do_block_id
self._add_connection(last_body_block, while_block_id, 'sequential')
self._add_connection(while_block_id, do_block_id, f'condition_true:{condition}')
return all_blocks, consumed_lines
def _extract_do_while_body(self, statements: List[str]) -> Tuple[List[str], int]:
"""提取do-while循环体"""
body_statements = []
consumed_lines = 0
brace_count = 0
for i, line in enumerate(statements):
stripped = line.strip()
if stripped.startswith('} while'):
break
# 计算大括号
brace_count += line.count('{') - line.count('}')
if stripped == '{':
consumed_lines += 1
continue
elif brace_count >= 0:
body_statements.append(line)
consumed_lines += 1
return body_statements, consumed_lines
def _process_java_switch(self, statements: List[str], visited_methods: Set[str],
method_name: str, line_number: int) -> Tuple[List[int], int]:
"""处理Java switch语句"""
all_blocks = []
consumed_lines = 0
# 解析switch语句
switch_line = statements[0].strip()
condition = self._extract_condition(switch_line)
# 创建switch块
switch_block_id = self._create_java_block(switch_line, 'switch_statement', method_name, line_number, {
'condition': condition
})
all_blocks.append(switch_block_id)
consumed_lines += 1
# 解析switch体
switch_body, switch_consumed = self._extract_switch_body(statements[1:])
consumed_lines += switch_consumed
# 处理case和default
case_blocks = []
i = 0
while i < len(switch_body):
line = switch_body[i].strip()
if line.startswith('case') or line.startswith('default'):
# 创建case/default块
case_block_id = self._create_java_block(line, 'case_statement', method_name,
line_number + consumed_lines + i)
all_blocks.append(case_block_id)
case_blocks.append((case_block_id, line))
i += 1
# 处理case体
case_statements = []
while i < len(switch_body):
case_line = switch_body[i].strip()
if case_line.startswith(('case', 'default')):
break
if case_line and case_line != '}':
case_statements.append(switch_body[i])
i += 1
if case_statements:
case_body_blocks = self._process_java_statements(case_statements, visited_methods, method_name)
all_blocks.extend(case_body_blocks)
# case -> case体
if case_body_blocks:
self._add_connection(case_block_id, case_body_blocks[0], 'sequential')
else:
i += 1
# 建立switch连接
for case_block_id, case_line in case_blocks:
if case_line.startswith('case'):
case_value = case_line.split()[1].rstrip(':')
self._add_connection(switch_block_id, case_block_id, f'case_match:{case_value}')
elif case_line.startswith('default'):
self._add_connection(switch_block_id, case_block_id, 'default_case')
return all_blocks, consumed_lines
def _extract_switch_body(self, statements: List[str]) -> Tuple[List[str], int]:
"""提取switch体"""
body_statements = []
consumed_lines = 0
brace_count = 0
for i, line in enumerate(statements):
stripped = line.strip()
# 计算大括号
brace_count += line.count('{') - line.count('}')
if stripped == '{':
consumed_lines += 1
continue
elif stripped == '}' and brace_count == 0:
consumed_lines += 1
break
elif brace_count > 0:
body_statements.append(line)
consumed_lines += 1
return body_statements, consumed_lines
def _process_java_try(self, statements: List[str], visited_methods: Set[str],
method_name: str, line_number: int) -> Tuple[List[int], int]:
"""处理Java try语句 - 参考Python CFG构建器,不创建单独的try块"""
all_blocks = []
consumed_lines = 1 # 跳过try {这一行
# 处理try体内的语句,不创建单独的try块
try_statements, try_consumed = self._extract_block_statements(statements[1:])
try_blocks = []
if try_statements:
try_blocks = self._process_java_statements(try_statements, visited_methods, method_name)
all_blocks.extend(try_blocks)
consumed_lines += try_consumed
# 将try信息推入栈中,供后续catch处理使用
try_info = {
'try_blocks': try_blocks,
'catch_blocks': [],
'finally_blocks': []
}
self.try_stack.append(try_info)
return all_blocks, consumed_lines
def _process_java_catch(self, statements: List[str], visited_methods: Set[str],
method_name: str, line_number: int) -> Tuple[List[int], int]:
"""处理Java catch语句"""
all_blocks = []
consumed_lines = 0
catch_line = statements[0].strip()
catch_block_id = self._create_java_block(catch_line, 'catch_statement', method_name, line_number)
all_blocks.append(catch_block_id)
consumed_lines += 1
# 处理catch体
catch_statements, catch_consumed = self._extract_block_statements(statements[1:])
catch_blocks = []
if catch_statements:
catch_blocks = self._process_java_statements(catch_statements, visited_methods, method_name)
all_blocks.extend(catch_blocks)
# catch -> catch体
if catch_blocks:
self._add_connection(catch_block_id, catch_blocks[0], 'sequential')
consumed_lines += catch_consumed
# 将catch信息添加到当前try上下文中
if self.try_stack:
current_try = self.try_stack[-1]
current_try['catch_blocks'].append({
'catch_block_id': catch_block_id,
'catch_body_blocks': catch_blocks,
'all_catch_blocks': [catch_block_id] + catch_blocks
})
# 建立try块到catch块的异常连接
self._add_try_catch_exception_connections(current_try, catch_block_id)
# 建立try块正常执行完成后的连接(跳过catch,到try-catch外的下一步)
self._add_try_normal_completion_connections(current_try)
return all_blocks, consumed_lines
def _process_java_finally(self, statements: List[str], visited_methods: Set[str],
method_name: str, line_number: int) -> Tuple[List[int], int]:
"""处理Java finally语句"""
all_blocks = []
consumed_lines = 0
finally_line = statements[0].strip()
finally_block_id = self._create_java_block(finally_line, 'finally_statement', method_name, line_number)
all_blocks.append(finally_block_id)
consumed_lines += 1
# 处理finally体
finally_statements, finally_consumed = self._extract_block_statements(statements[1:])
if finally_statements:
finally_blocks = self._process_java_statements(finally_statements, visited_methods, method_name)
all_blocks.extend(finally_blocks)
# finally -> finally体
if finally_blocks:
self._add_connection(finally_block_id, finally_blocks[0], 'sequential')
consumed_lines += finally_consumed
return all_blocks, consumed_lines
def _process_java_return(self, statements: List[str], visited_methods: Set[str],
method_name: str, line_number: int) -> Tuple[List[int], int]:
"""处理Java return语句"""
return_line = statements[0].strip()
block_id = self._create_java_block(return_line, 'return', method_name, line_number)
return [block_id], 1
def _process_java_break(self, stmt: str, method_name: str, line_number: int) -> Tuple[List[int], int]:
"""处理Java break语句"""
block_id = self._create_java_block(stmt, 'break', method_name, line_number)
# 连接到最近的循环外部
if self.loop_stack:
current_loop = self.loop_stack[-1]
self.blocks[block_id]['break_target'] = current_loop
return [block_id], 1
def _process_java_continue(self, stmt: str, method_name: str, line_number: int) -> Tuple[List[int], int]:
"""处理Java continue语句"""
block_id = self._create_java_block(stmt, 'continue', method_name, line_number)
# 连接到最近的循环头部
if self.loop_stack:
current_loop = self.loop_stack[-1]
self._add_connection(block_id, current_loop['header_id'], 'continue')
return [block_id], 1
def _process_java_throw(self, stmt: str, method_name: str, line_number: int) -> Tuple[List[int], int]:
"""处理Java throw语句"""
block_id = self._create_java_block(stmt, 'throw', method_name, line_number)
return [block_id], 1
def _process_java_assignment(self, stmt: str, visited_methods: Set[str],
method_name: str, line_number: int) -> Tuple[List[int], int]:
"""处理Java赋值或表达式语句"""
# 检测语句类型
if ('=' in stmt and
not any(op in stmt for op in ['==', '!=', '<=', '>=', '++', '--']) and
not stmt.strip().endswith(';')):
block_type = 'assignment'
else:
block_type = 'expression'
block_id = self._create_java_block(stmt, block_type, method_name, line_number)
return [block_id], 1
def _create_java_block(self, code: str, block_type: str, method_name: str,
line_number: int, extra_info: Dict = None) -> int:
"""创建一个新的Java block"""
block_id = len(self.blocks)
block_info = {
'id': block_id,
'type': block_type,
'code': code.strip(),
'line_number': line_number,
'method': method_name,
'method_calls': self._extract_java_method_calls(code)
}
if extra_info:
block_info.update(extra_info)
self.blocks.append(block_info)
return block_id
def _extract_condition(self, line: str) -> str:
"""提取条件表达式"""
# 匹配完整的条件表达式,处理嵌套括号
if '(' in line and ')' in line:
start = line.find('(')
if start != -1:
# 找到匹配的右括号,处理嵌套括号
paren_count = 0
end = start
for i in range(start, len(line)):
if line[i] == '(':
paren_count += 1
elif line[i] == ')':
paren_count -= 1
if paren_count == 0:
end = i
break
if end > start:
return line[start+1:end]
return ""
def _extract_block_statements(self, statements: List[str]) -> Tuple[List[str], int]:
"""提取块语句(处理大括号),正确处理控制结构"""
block_statements = []