Skip to content

Commit c1dd208

Browse files
committed
More CR
1 parent 91368a3 commit c1dd208

6 files changed

Lines changed: 113 additions & 73 deletions

File tree

mypy/build.py

Lines changed: 84 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -959,66 +959,87 @@ def dump_stats(self) -> None:
959959
print(f"{key + ':':24}{value}")
960960

961961
def parse_all(self, states: list[State]) -> None:
962-
"""Parse multiple files in parallel (if possible) and compute dependencies.
962+
"""Parse multiple files in parallel (if possible) and compute dependencies."""
963+
if not self.options.native_parser:
964+
# Old parser cannot be parallelized.
965+
for state in states:
966+
state.parse_file()
967+
self.post_parse_all(states)
968+
return
969+
970+
sequential_states = []
971+
parallel_states = []
972+
for state in states:
973+
if state.tree is not None:
974+
# The file was already parsed.
975+
continue
976+
if not self.fscache.exists(state.xpath):
977+
# New parser only supports parsing on-disk files.
978+
sequential_states.append(state)
979+
continue
980+
parallel_states.append(state)
981+
self.parse_parallel(sequential_states, parallel_states)
982+
self.post_parse_all(states)
983+
984+
def parse_parallel(self, sequential_states: list[State], parallel_states: list[State]) -> None:
985+
"""Perform parallel parsing of states.
963986
964987
Note: this duplicates a bit of logic from State.parse_file(). This is done
965-
as a micro-optimization to parallelize only those parts of the code that
966-
can be parallelized efficiently.
988+
as an optimization to parallelize only those parts of the code that can be
989+
parallelized efficiently.
967990
"""
968-
if self.options.native_parser:
969-
futures = []
970-
parsed_states = {}
971-
# Use at least --num-workers if specified by user.
972-
available_threads = max(get_available_threads(), self.options.num_workers)
973-
# Overhead from trying to parallelize (small) blocking portion of
974-
# parse_file_inner() results in no visible improvement with more than 8 threads.
975-
# TODO: reuse thread pool and/or batch small files in single submit() call.
976-
with ThreadPoolExecutor(max_workers=min(available_threads, 8)) as executor:
977-
for state in states:
978-
state.needs_parse = False
979-
if state.tree is not None:
980-
# The file was already parsed.
981-
continue
982-
# New parser reads source from file directly, we do this only for
983-
# the side effect of parsing inline mypy configurations.
984-
state.get_source()
985-
if state.id not in self.ast_cache:
986-
self.log(f"Parsing {state.xpath} ({state.id})")
987-
ignore_errors = state.ignore_all or state.options.ignore_errors
988-
if ignore_errors:
989-
self.errors.ignored_files.add(state.xpath)
990-
futures.append(executor.submit(state.parse_file_inner, state.source or ""))
991-
parsed_states[state.id] = state
992-
else:
993-
self.log(f"Using cached AST for {state.xpath} ({state.id})")
994-
state.tree, state.early_errors = self.ast_cache[state.id]
995-
for fut in wait(futures).done:
996-
state_id, parse_errors = fut.result()
997-
if parse_errors:
998-
state = parsed_states[state_id]
999-
with state.wrap_context():
1000-
self.errors.set_file(state.xpath, state.id, options=state.options)
1001-
for error in parse_errors:
1002-
# New parser reports errors lazily.
1003-
report_parse_error(error, self.errors)
1004-
if self.errors.is_blockers():
1005-
self.log("Bailing due to parse errors")
1006-
self.errors.raise_error()
991+
futures = []
992+
parallel_parsed_states = {}
993+
# Use at least --num-workers if specified by user.
994+
available_threads = max(get_available_threads(), self.options.num_workers)
995+
# Overhead from trying to parallelize (small) blocking portion of
996+
# parse_file_inner() results in no visible improvement with more than 8 threads.
997+
# TODO: reuse thread pool and/or batch small files in single submit() call.
998+
with ThreadPoolExecutor(max_workers=min(available_threads, 8)) as executor:
999+
for state in parallel_states:
1000+
state.needs_parse = False
1001+
# New parser reads source from file directly, we do this only for
1002+
# the side effect of parsing inline mypy configurations.
1003+
state.get_source()
1004+
if state.id not in self.ast_cache:
1005+
self.log(f"Parsing {state.xpath} ({state.id})")
1006+
ignore_errors = state.ignore_all or state.options.ignore_errors
1007+
if ignore_errors:
1008+
self.errors.ignored_files.add(state.xpath)
1009+
futures.append(executor.submit(state.parse_file_inner, state.source or ""))
1010+
parallel_parsed_states[state.id] = state
1011+
else:
1012+
self.log(f"Using cached AST for {state.xpath} ({state.id})")
1013+
state.tree, state.early_errors = self.ast_cache[state.id]
10071014

1008-
for state in states:
1009-
assert state.tree is not None
1010-
if state.id in parsed_states:
1011-
state.early_errors = list(self.errors.error_info_map.get(state.xpath, []))
1012-
state.semantic_analysis_pass1()
1013-
self.ast_cache[state.id] = (state.tree, state.early_errors)
1014-
self.modules[state.id] = state.tree
1015-
state.check_blockers()
1016-
state.setup_errors()
1017-
else:
1018-
# Old parser cannot be parallelized.
1019-
for state in states:
1015+
# Parse sequential before waiting on parallel.
1016+
for state in sequential_states:
10201017
state.parse_file()
10211018

1019+
for fut in wait(futures).done:
1020+
state_id, parse_errors = fut.result()
1021+
# New parser reports errors lazily, add them if any.
1022+
if parse_errors:
1023+
state = parallel_parsed_states[state_id]
1024+
with state.wrap_context():
1025+
self.errors.set_file(state.xpath, state.id, options=state.options)
1026+
for error in parse_errors:
1027+
report_parse_error(error, self.errors)
1028+
if self.errors.is_blockers():
1029+
self.log("Bailing due to parse errors")
1030+
self.errors.raise_error()
1031+
1032+
for state in parallel_states:
1033+
assert state.tree is not None
1034+
if state.id in parallel_parsed_states:
1035+
state.early_errors = list(self.errors.error_info_map.get(state.xpath, []))
1036+
state.semantic_analysis_pass1()
1037+
self.ast_cache[state.id] = (state.tree, state.early_errors)
1038+
self.modules[state.id] = state.tree
1039+
state.check_blockers()
1040+
state.setup_errors()
1041+
1042+
def post_parse_all(self, states: list[State]) -> None:
10221043
for state in states:
10231044
state.compute_dependencies()
10241045
if self.workers and state.tree:
@@ -1152,7 +1173,8 @@ def parse_file(
11521173
Raise CompileError if there is a parse error.
11531174
"""
11541175
imports_only = False
1155-
if self.workers and self.fscache.exists(path):
1176+
file_exists = self.fscache.exists(path)
1177+
if self.workers and file_exists:
11561178
# Currently, we can use the native parser only for actual files.
11571179
imports_only = True
11581180
t0 = time.time()
@@ -1162,7 +1184,13 @@ def parse_file(
11621184
tree = load_from_raw(path, id, raw_data, self.errors, options)
11631185
else:
11641186
tree, parse_errors = parse(
1165-
source, path, id, self.errors, options=options, imports_only=imports_only
1187+
source,
1188+
path,
1189+
id,
1190+
self.errors,
1191+
options=options,
1192+
file_exists=file_exists,
1193+
imports_only=imports_only,
11661194
)
11671195
tree._fullname = id
11681196
if self.stats_enabled:

mypy/checkstrformat.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
TempNode,
4545
TupleExpr,
4646
)
47-
from mypy.parse import parse, report_parse_error
47+
from mypy.parse import parse
4848
from mypy.subtypes import is_subtype
4949
from mypy.typeops import custom_special_method
5050
from mypy.types import (
@@ -581,12 +581,14 @@ def apply_field_accessors(
581581

582582
temp_errors = Errors(self.chk.options)
583583
dummy = DUMMY_FIELD_NAME + spec.field[len(spec.key) :]
584-
temp_ast, parse_errors = parse(
585-
dummy, fnam="<format>", module=None, options=self.chk.options, errors=temp_errors
584+
temp_ast, _ = parse(
585+
dummy,
586+
fnam="<format>",
587+
module=None,
588+
options=self.chk.options,
589+
errors=temp_errors,
590+
file_exists=False,
586591
)
587-
for error in parse_errors:
588-
# New parser reports errors lazily.
589-
report_parse_error(error, temp_errors)
590592
if temp_errors.is_errors():
591593
self.msg.fail(
592594
f'Syntax error in format specifier "{spec.field}"',

mypy/nativeparse.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,8 +271,11 @@ def parse_to_binary_ast(
271271
) -> tuple[bytes, list[ParseError], TypeIgnores, bytes, bool, bool]:
272272
# This is a horrible hack to work around a mypyc bug where imported
273273
# module may be not ready in a thread sometimes.
274+
t0 = time.time()
274275
while ast_serialize is None:
275276
time.sleep(0.0001) # type: ignore[unreachable]
277+
if time.time() - t0 > 10.0:
278+
raise ImportError("Cannot import ast_serialize")
276279
ast_bytes, errors, ignores, import_bytes, ast_data = ast_serialize.parse(
277280
filename,
278281
skip_function_bodies=skip_function_bodies,

mypy/parse.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import annotations
22

3-
import os
43
import re
54

65
from librt.internal import ReadBuffer
@@ -18,6 +17,7 @@ def parse(
1817
module: str | None,
1918
errors: Errors,
2019
options: Options,
20+
file_exists: bool,
2121
imports_only: bool = False,
2222
) -> tuple[MypyFile, list[ParseError]]:
2323
"""Parse a source file, without doing any semantic analysis.
@@ -30,7 +30,7 @@ def parse(
3030
if options.native_parser:
3131
# Native parser only works with actual files on disk
3232
# Fall back to fastparse for in-memory source or non-existent files
33-
if os.path.exists(fnam):
33+
if file_exists:
3434
import mypy.nativeparse
3535

3636
ignore_errors = options.ignore_errors or fnam in errors.ignored_files

mypy/stubgen.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1745,7 +1745,12 @@ def parse_source_file(mod: StubSource, mypy_options: MypyOptions) -> None:
17451745
source = mypy.util.decode_python_encoding(data)
17461746
errors = Errors(mypy_options)
17471747
mod.ast, errs = mypy.parse.parse(
1748-
source, fnam=mod.path, module=mod.module, errors=errors, options=mypy_options
1748+
source,
1749+
fnam=mod.path,
1750+
module=mod.module,
1751+
errors=errors,
1752+
options=mypy_options,
1753+
file_exists=True,
17491754
)
17501755
mod.ast._fullname = mod.module
17511756
for err in errs:

mypy/test/testparse.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from mypy.config_parser import parse_mypy_comments
1111
from mypy.errors import CompileError, Errors
1212
from mypy.options import Options
13-
from mypy.parse import parse, report_parse_error
13+
from mypy.parse import parse
1414
from mypy.test.data import DataDrivenTestCase, DataSuite
1515
from mypy.test.helpers import assert_string_arrays_equal, find_test_files, parse_options
1616
from mypy.util import get_mypy_comments
@@ -60,11 +60,14 @@ def test_parser(testcase: DataDrivenTestCase) -> None:
6060

6161
try:
6262
errors = Errors(options)
63-
n, errs = parse(
64-
bytes(source, "ascii"), fnam="main", module="__main__", errors=errors, options=options
63+
n, _ = parse(
64+
bytes(source, "ascii"),
65+
fnam="main",
66+
module="__main__",
67+
errors=errors,
68+
options=options,
69+
file_exists=False,
6570
)
66-
for err in errs:
67-
report_parse_error(err, errors)
6871
if errors.is_errors():
6972
errors.raise_error()
7073
a = n.str_with_options(options).split("\n")
@@ -98,15 +101,14 @@ def test_parse_error(testcase: DataDrivenTestCase) -> None:
98101
skip()
99102
# Compile temporary file. The test file contains non-ASCII characters.
100103
errors = Errors(options)
101-
_, errs = parse(
104+
parse(
102105
bytes("\n".join(testcase.input), "utf-8"),
103106
INPUT_FILE_NAME,
104107
"__main__",
105108
errors=errors,
106109
options=options,
110+
file_exists=False,
107111
)
108-
for err in errs:
109-
report_parse_error(err, errors)
110112
if errors.is_errors():
111113
errors.raise_error()
112114
raise AssertionError("No errors reported")

0 commit comments

Comments
 (0)