@@ -1024,85 +1024,76 @@ def parse_all(self, states: list[State], post_parse: bool = True) -> None:
10241024 self .post_parse_all (states )
10251025 return
10261026
1027- sequential_states = []
10281027 parallel_states = []
10291028 for state in states :
1029+ if not self .fscache .exists (state .xpath , real_only = True ):
1030+ state .source = state .get_source ()
10301031 if state .tree is not None :
10311032 # The file was already parsed.
1032- continue
1033- if not self .fscache .exists (state .xpath , real_only = True ):
1034- # New parser only supports parsing on-disk files.
1035- sequential_states .append (state )
1033+ state .needs_parse = False
10361034 continue
10371035 parallel_states .append (state )
1036+
10381037 if len (parallel_states ) > 1 :
1039- self .parse_parallel (sequential_states , parallel_states )
1040- else :
1041- # Avoid using executor when there is no parallelism.
1042- for state in states :
1043- state .parse_file ()
1044- if post_parse :
1045- self .post_parse_all (states )
1038+ # This duplicates a bit of logic from State.parse_file(). This is done as an
1039+ # optimization to parallelize only those parts of the code that can be
1040+ # parallelized efficiently.
10461041
1047- def parse_parallel (self , sequential_states : list [State ], parallel_states : list [State ]) -> None :
1048- """Perform parallel parsing of states.
1042+ parallel_parsed_states , parallel_parsed_states_set = self .parse_files_threaded_raw (
1043+ parallel_states
1044+ )
10491045
1050- Note: this duplicates a bit of logic from State.parse_file(). This is done
1051- as an optimization to parallelize only those parts of the code that can be
1052- parallelized efficiently.
1053- """
1054- parallel_parsed_states , parallel_parsed_states_set = self .parse_files_threaded_raw (
1055- sequential_states , parallel_states
1056- )
1046+ for state in parallel_parsed_states :
1047+ # New parser only returns serialized ASTs
1048+ with state .wrap_context ():
1049+ assert state .tree is not None
1050+ raw_data = state .tree .raw_data
1051+ if raw_data is not None :
1052+ # Apply inline mypy config before deserialization, since
1053+ # some options (e.g. implicit_optional) affect how the
1054+ # AST is built during deserialization.
1055+ state .source_hash = raw_data .source_hash
1056+ state .apply_inline_configuration (raw_data .mypy_comments )
1057+ state .tree = load_from_raw (
1058+ state .xpath ,
1059+ state .id ,
1060+ raw_data ,
1061+ self .errors ,
1062+ state .options ,
1063+ imports_only = bool (self .workers ),
1064+ )
1065+ if self .errors .is_blockers ():
1066+ self .log ("Bailing due to parse errors" )
1067+ self .errors .raise_error ()
10571068
1058- for state in parallel_parsed_states :
1059- # New parser returns serialized ASTs. Deserialize full trees only if not using
1060- # parallel workers.
1061- with state .wrap_context ():
1069+ for state in parallel_states :
10621070 assert state .tree is not None
1063- raw_data = state .tree .raw_data
1064- if raw_data is not None :
1065- # Apply inline mypy config before deserialization, since
1066- # some options (e.g. implicit_optional) affect deserialization
1067- state .source_hash = raw_data .source_hash
1068- state .apply_inline_configuration (raw_data .mypy_comments )
1069- state .tree = load_from_raw (
1070- state .xpath ,
1071- state .id ,
1072- raw_data ,
1073- self .errors ,
1074- state .options ,
1075- imports_only = bool (self .workers ),
1076- )
1077- if self .errors .is_blockers ():
1078- self .log ("Bailing due to parse errors" )
1079- self .errors .raise_error ()
1080-
1081- for state in parallel_states :
1082- assert state .tree is not None
1083- if state in parallel_parsed_states_set :
1071+ if state in parallel_parsed_states_set :
1072+ if state .tree .raw_data is not None :
1073+ # source_hash was already extracted above, but raw_data
1074+ # may have been preserved for workers (imports_only=True).
1075+ pass
1076+ elif state .source_hash is None :
1077+ # At least namespace packages may not have source.
1078+ state .get_source ()
1079+ state .early_errors = list (self .errors .error_info_map .get (state .xpath , []))
1080+ state .semantic_analysis_pass1 ()
1081+ self .ast_cache [state .id ] = (state .tree , state .early_errors , state .source_hash )
1082+ self .modules [state .id ] = state .tree
10841083 if state .tree .raw_data is not None :
1085- # source_hash was already extracted above, but raw_data
1086- # may have been preserved for workers (imports_only=True).
1087- pass
1088- elif state .source_hash is None :
1089- # At least namespace packages may not have source.
1090- state .get_source ()
1091- state .early_errors = list (self .errors .error_info_map .get (state .xpath , []))
1092- state .semantic_analysis_pass1 ()
1093- self .ast_cache [state .id ] = (state .tree , state .early_errors , state .source_hash )
1094- self .modules [state .id ] = state .tree
1095- if state .tree .raw_data is not None :
1096- state .size_hint = len (state .tree .raw_data .defs ) + MIN_SIZE_HINT
1097- state .check_blockers ()
1098- state .setup_errors ()
1099-
1100- def parse_files_threaded_raw (
1101- self , sequential_states : list [State ], parallel_states : list [State ]
1102- ) -> tuple [list [State ], set [State ]]:
1103- """Parse files using a thread pool.
1104-
1105- Also parse sequential states while waiting for the parallel results.
1084+ state .size_hint = len (state .tree .raw_data .defs ) + MIN_SIZE_HINT
1085+ state .check_blockers ()
1086+ state .setup_errors ()
1087+ elif len (parallel_states ) == 1 :
1088+ # Avoid using executor when there is no parallelism.
1089+ parallel_states [0 ].parse_file ()
1090+
1091+ if post_parse :
1092+ self .post_parse_all (states )
1093+
1094+ def parse_files_threaded_raw (self , states : list [State ]) -> tuple [list [State ], set [State ]]:
1095+ """Parse files in parallel using a thread pool.
1096+
11061097 Trees from the new parser are left in raw (serialized) form.
11071098
11081099 Return (list, set) of states that were actually parsed (not cached).
@@ -1118,25 +1109,21 @@ def parse_files_threaded_raw(
11181109 # parse_file_inner() results in no visible improvement with more than 8 threads.
11191110 # TODO: reuse thread pool and/or batch small files in single submit() call.
11201111 with ThreadPoolExecutor (max_workers = min (available_threads , 8 )) as executor :
1121- for state in parallel_states :
1112+ for state in states :
11221113 state .needs_parse = False
11231114 if state .id not in self .ast_cache :
11241115 self .log (f"Parsing { state .xpath } ({ state .id } )" )
11251116 ignore_errors = state .ignore_all or state .options .ignore_errors
11261117 if ignore_errors :
11271118 self .errors .ignored_files .add (state .xpath )
1128- futures .append (executor .submit (state .parse_file_inner , "" ))
1119+ futures .append (executor .submit (state .parse_file_inner , state . source ))
11291120 parallel_parsed_states .append (state )
11301121 parallel_parsed_states_set .add (state )
11311122 else :
11321123 self .log (f"Using cached AST for { state .xpath } ({ state .id } )" )
11331124 state .tree , state .early_errors , source_hash = self .ast_cache [state .id ]
11341125 state .source_hash = source_hash
11351126
1136- # Parse sequential before waiting on parallel.
1137- for state in sequential_states :
1138- state .parse_file ()
1139-
11401127 for fut in wait (futures ).done :
11411128 fut .result ()
11421129
@@ -1279,21 +1266,20 @@ def parse_file(
12791266 self ,
12801267 id : str ,
12811268 path : str ,
1282- source : str ,
1269+ source : str | None ,
12831270 options : Options ,
12841271 raw_data : FileRawData | None = None ,
12851272 ) -> MypyFile :
12861273 """Parse the source of a file with the given name.
12871274
12881275 Raise CompileError if there is a parse error.
12891276 """
1290- file_exists = self .fscache .exists (path , real_only = True )
12911277 t0 = time .time ()
12921278 if raw_data :
12931279 # If possible, deserialize from known binary data instead of parsing from scratch.
12941280 tree = load_from_raw (path , id , raw_data , self .errors , options )
12951281 else :
1296- tree = parse (source , path , id , self .errors , options = options , file_exists = file_exists )
1282+ tree = parse (source , path , id , self .errors , options = options )
12971283 tree ._fullname = id
12981284 if self .stats_enabled :
12991285 with self .stats_lock :
@@ -3179,7 +3165,7 @@ def get_source(self) -> str:
31793165 else :
31803166 err = f"{ self .path } : error: Cannot decode file: { str (decodeerr )} "
31813167 raise CompileError ([err ], module_with_blocker = self .id ) from decodeerr
3182- elif self .path and self . manager .fscache .isdir (self .path ):
3168+ elif self .path and manager .fscache .isdir (self .path ):
31833169 source = ""
31843170 self .source_hash = ""
31853171 else :
@@ -3192,7 +3178,7 @@ def get_source(self) -> str:
31923178 self .time_spent_us += time_spent_us (t0 )
31933179 return source
31943180
3195- def parse_file_inner (self , source : str , raw_data : FileRawData | None = None ) -> None :
3181+ def parse_file_inner (self , source : str | None , raw_data : FileRawData | None = None ) -> None :
31963182 t0 = time_ref ()
31973183 self .tree = self .manager .parse_file (
31983184 self .id , self .xpath , source , options = self .options , raw_data = raw_data
@@ -3319,9 +3305,7 @@ def semantic_analysis_pass1(self) -> None:
33193305 #
33203306 # TODO: This should not be considered as a semantic analysis
33213307 # pass -- it's an independent pass.
3322- if not options .native_parser or not self .manager .fscache .exists (
3323- self .xpath , real_only = True
3324- ):
3308+ if not options .native_parser :
33253309 analyzer = SemanticAnalyzerPreAnalysis ()
33263310 with self .wrap_context ():
33273311 analyzer .visit_file (self .tree , self .xpath , self .id , options )
0 commit comments