diff --git a/.gitignore b/.gitignore index 7ebd0ef65..f921e89ca 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,13 @@ TECHNICAL_BRIEF.md /compare-baseline Tools/bytecodes_gen/bytecodes_gen /bytecodes_gen + +# test_zipimport scratch artifacts +junk*.zip +junk*/ + +ziptestmodule +ziptestmodule.py + +# test_module_with_large_stack writes this into the cwd +longlist.py diff --git a/builtins/compile.go b/builtins/compile.go index 6eb198678..2388516c0 100644 --- a/builtins/compile.go +++ b/builtins/compile.go @@ -109,7 +109,7 @@ func parseCompileArgs(args []objects.Object, kwargs map[string]objects.Object) ( return compileArgs{}, err } } - filename, err := stringArg(bound[1], "filename") + filename, err := compileFilenameArg(bound[1]) if err != nil { return compileArgs{}, err } @@ -150,6 +150,31 @@ func parseCompileArgs(args []objects.Object, kwargs map[string]objects.Object) ( }, nil } +// compileFilenameArg decodes the filename argument. compile() runs it +// through PyUnicode_FSDecoder, which accepts str, bytes, or any +// os.PathLike (pathlib.Path) by invoking __fspath__. importlib's source +// loaders pass a pathlib.Path here, so a bare str check is too strict. +// +// CPython: Python/bltinmodule.c builtin_compile (filename: object, +// +// PyUnicode_FSDecoder) and Objects/unicodeobject.c PyOS_FSPath +func compileFilenameArg(o objects.Object) (string, error) { + switch v := o.(type) { + case *objects.Unicode: + return v.Value(), nil + case *objects.Bytes: + return string(v.Bytes()), nil + } + if fspath, err := objects.GetAttr(o, objects.NewStr("__fspath__")); err == nil { + result, callErr := objects.CallNoArgs(fspath) + if callErr != nil { + return "", callErr + } + return compileFilenameArg(result) + } + return "", fmt.Errorf("TypeError: compile() filename must be str, bytes or os.PathLike, not %s", o.Type().Name) +} + // compileSourceArg accepts the first positional argument to compile(). // str routes through ParseString. bytes / bytearray route through // ParseBytes so the PEP 263 coding cookie controls the decode. AST diff --git a/builtins/eval.go b/builtins/eval.go index 7e23c1dd0..d6c28351d 100644 --- a/builtins/eval.go +++ b/builtins/eval.go @@ -329,3 +329,23 @@ func runCode(code *objects.Code, globals, locals, closure objects.Object) (objec } return currentEvaluator(code, globals, locals, closure) } + +// RunInFreshNamespace compiles and runs source in a brand-new __main__ +// namespace and returns PyRun_SimpleStringFlags's result code: 0 when the +// code runs to completion, -1 when it raises. It backs the subinterpreter +// test entries (_testcapi.run_in_subinterp and +// _testinternalcapi.run_in_subinterp_with_config). Every gopy extension is +// a Go builtin compiled into the runtime (multi-phase by construction), so +// importing one inside a subinterpreter behaves exactly like a fresh- +// namespace exec in the current process; the only observable result the +// callers read is the integer status. +// +// CPython: Python/pythonrun.c:592 PyRun_SimpleStringFlags +func RunInFreshNamespace(source string) int { + ns := objects.NewDict() + _ = ns.SetItem(objects.NewStr("__name__"), objects.NewStr("__main__")) + if _, err := Exec([]objects.Object{objects.NewStr(source), ns}, nil); err != nil { + return -1 + } + return 0 +} diff --git a/builtins/import.go b/builtins/import.go index 5b6d08e8a..332a680c2 100644 --- a/builtins/import.go +++ b/builtins/import.go @@ -20,15 +20,22 @@ import ( ) // Importer resolves a module by name, with pkgname as the anchor for -// relative imports and level as the dot-count. fromlist is empty for -// `import a.b.c` and non-empty for `from a.b import c, d`. The hook -// returns the resolved module along with the same chain CPython hands -// back: when fromlist is empty the caller wants the top-level package, -// when fromlist is non-empty the caller wants the deepest module so +// relative imports and level as the dot-count. fromlist is the raw +// object the caller passed (None for `import a.b.c`, a sequence for +// `from a.b import c, d`); it is handed to _handle_fromlist unchanged, +// so a non-str entry surfaces as the TypeError _handle_fromlist raises +// rather than an early gopy-only rejection, and an arbitrary iterable +// is iterated the same way CPython iterates it. globals is the dict the +// caller handed to __import__ (or nil); the live importlib re-derives +// the package anchor from it via _calc___package__, so it must be the +// caller's explicit globals, not the running frame's. The hook returns +// the resolved module along with the same chain CPython hands back: +// when fromlist is empty the caller wants the top-level package, when +// fromlist is non-empty the caller wants the deepest module so // IMPORT_FROM can grab attributes off it. // // CPython: Python/import.c:1561 PyImport_ImportModuleLevelObject -type Importer func(name, pkgname string, level int, fromlist []string) (objects.Object, error) +type Importer func(name, pkgname string, level int, fromlist objects.Object, globals objects.Object) (objects.Object, error) var currentImporter Importer @@ -73,13 +80,13 @@ func Import(args []objects.Object, kwargs map[string]objects.Object) (objects.Ob } } pkgname := pkgnameFromGlobals(parsed.globals) - return currentImporter(parsed.name, pkgname, parsed.level, parsed.fromlist) + return currentImporter(parsed.name, pkgname, parsed.level, parsed.fromlist, parsed.globals) } type importArgs struct { name string globals objects.Object - fromlist []string + fromlist objects.Object level int } @@ -130,9 +137,15 @@ func parseImportArgs(args []objects.Object, kwargs map[string]objects.Object) (i return importArgs{}, fmt.Errorf("ValueError: level must be >= 0") } } - fromlist, err := fromlistArg(bound[3]) - if err != nil { - return importArgs{}, err + // fromlist reaches the import machinery untouched. CPython's + // builtin___import___impl performs no type or element check; an empty + // tuple stands in for a missing argument, and _handle_fromlist raises + // the TypeError for any non-str entry or iterates a custom iterable. + // + // CPython: Python/bltinmodule.c:259 builtin___import___impl + fromlist := bound[3] + if fromlist == nil { + fromlist = objects.NewTuple(nil) } return importArgs{ name: name, @@ -142,42 +155,6 @@ func parseImportArgs(args []objects.Object, kwargs map[string]objects.Object) (i }, nil } -// fromlistArg unpacks the fromlist argument into a flat []string. -// None and missing both mean "empty"; a tuple or list is iterated; any -// other type is a TypeError. The element check matches CPython's -// import.c which rejects non-str entries before lookup. -// -// CPython: Python/import.c:1726 import_from -func fromlistArg(o objects.Object) ([]string, error) { - if o == nil || objects.IsNone(o) { - return nil, nil - } - var raw []objects.Object - switch v := o.(type) { - case *objects.Tuple: - raw = make([]objects.Object, v.Len()) - for i := range raw { - raw[i] = v.Item(i) - } - case *objects.List: - raw = make([]objects.Object, v.Len()) - for i := range raw { - raw[i] = v.Item(i) - } - default: - return nil, fmt.Errorf("TypeError: fromlist must be a tuple or list") - } - out := make([]string, 0, len(raw)) - for _, item := range raw { - s, err := stringArg(item, "fromlist item") - if err != nil { - return nil, err - } - out = append(out, s) - } - return out, nil -} - // stringArg coerces o to a Go string, raising TypeError when o isn't a // Python str. The label is the argument name used in the error. func stringArg(o objects.Object, label string) (string, error) { diff --git a/builtins/import_test.go b/builtins/import_test.go index f01b80b48..2bc5a292a 100644 --- a/builtins/import_test.go +++ b/builtins/import_test.go @@ -15,17 +15,40 @@ type importCall struct { fromlist []string } +// fromlistStrings flattens the raw fromlist object the hook now +// receives into the []string the assertions below compare against. It +// mirrors how _handle_fromlist iterates the object, stopping at the +// first non-str entry (none of these tests pass one). +func fromlistStrings(o objects.Object) []string { + var out []string + switch v := o.(type) { + case *objects.Tuple: + for i := 0; i < v.Len(); i++ { + if u, ok := v.Item(i).(*objects.Unicode); ok { + out = append(out, u.Value()) + } + } + case *objects.List: + for i := 0; i < v.Len(); i++ { + if u, ok := v.Item(i).(*objects.Unicode); ok { + out = append(out, u.Value()) + } + } + } + return out +} + func captureImporter(t *testing.T, mod objects.Object, returnErr error) *importCall { t.Helper() prev := currentImporter t.Cleanup(func() { SetImporter(prev) }) got := &importCall{} - SetImporter(func(name, pkgname string, level int, fromlist []string) (objects.Object, error) { + SetImporter(func(name, pkgname string, level int, fromlist objects.Object, _ objects.Object) (objects.Object, error) { got.name = name got.pkgname = pkgname got.level = level - got.fromlist = fromlist + got.fromlist = fromlistStrings(fromlist) return mod, returnErr }) return got @@ -198,16 +221,26 @@ func TestImportNegativeLevel(t *testing.T) { } } -func TestImportFromlistRejectsString(t *testing.T) { - captureImporter(t, nil, nil) - _, err := Import([]objects.Object{ +func TestImportFromlistPassesThroughRawObject(t *testing.T) { + // CPython's builtin___import__ never type-checks fromlist; it hands the + // object straight to _handle_fromlist, which iterates it. A str is a + // valid (if unusual) fromlist, so __import__ must not reject it early. + mod := objects.NewModule("a") + got := captureImporter(t, mod, nil) + out, err := Import([]objects.Object{ objects.NewStr("a"), objects.None(), objects.None(), - objects.NewStr("notalist"), + objects.NewStr("xy"), }, nil) - if err == nil || !strings.Contains(err.Error(), "fromlist must be a tuple or list") { - t.Fatalf("__import__: err=%v, want fromlist TypeError", err) + if err != nil { + t.Fatalf("__import__: %v", err) + } + if out != mod { + t.Fatalf("__import__ returned %v, want %v", out, mod) + } + if got.name != "a" { + t.Fatalf("hook name = %q, want a", got.name) } } diff --git a/builtins/init.go b/builtins/init.go index 87576b9c2..034997686 100644 --- a/builtins/init.go +++ b/builtins/init.go @@ -23,6 +23,13 @@ import ( var wireOnce sync.Once +// DefaultImport holds the interpreter's original __import__ builtin so the +// IMPORT_NAME fast path can recognize it by identity even after user code +// rebinds builtins.__import__. +// +// CPython: pycore_interp.h interp->imports.import_func +var DefaultImport objects.Object + // Init constructs the builtins dict and stamps the v0.6 surface into // it: None / True / False / NotImplemented as named constants, and // print as the single callable. defaultFile is the io.Writer the @@ -151,6 +158,13 @@ func Init(defaultFile io.Writer) (*objects.Dict, error) { if err := setBuiltin(dict, "__import__", importFn); err != nil { return nil, err } + // Capture the interpreter's original __import__ so the IMPORT_NAME fast + // path can compare against it by identity. Re-reading the builtins + // module is wrong: a test that swaps builtins.__import__ would make the + // swapped callable compare equal to "the default" and never get called. + // + // CPython: pycore_interp.h interp->imports.import_func (captured at init) + DefaultImport = importFn // breakpoint() forwards to sys.breakpointhook. Register the builtin // here and hand the default hook to sys so sys.breakpointhook and diff --git a/cmd/gopy/main.go b/cmd/gopy/main.go index 1c79b6dfc..24bc26b1a 100644 --- a/cmd/gopy/main.go +++ b/cmd/gopy/main.go @@ -17,6 +17,7 @@ import ( "github.com/tamnd/gopy/builtins" "github.com/tamnd/gopy/codecs" "github.com/tamnd/gopy/compile" + pyerrors "github.com/tamnd/gopy/errors" "github.com/tamnd/gopy/getopt" "github.com/tamnd/gopy/imp" "github.com/tamnd/gopy/module/gc" @@ -58,7 +59,16 @@ func mainWithProfile() int { _ = f.Close() }() } - return run(os.Args[1:], os.Stdout, os.Stderr) + exitcode := run(os.Args[1:], os.Stdout, os.Stderr) + // bpo-1054041: if a KeyboardInterrupt went unhandled, exit through + // the default SIGINT handler so a calling shell sees the ^C and the + // process reports death-by-signal rather than a plain error code. + // + // CPython: Modules/main.c:786 Py_RunMain (unhandled_keyboard_interrupt) + if pyerrors.UnhandledKeyboardInterrupt() { + exitcode = exitSigint() + } + return exitcode } // run drives _PyOS_GetOpt the same way pymain_init walks argv before @@ -93,6 +103,7 @@ func run(args []string, stdout, stderr *os.File) int { modName string hasC, hasM bool xOptions []string + safePath bool ) opts: @@ -118,6 +129,18 @@ opts: break opts case 'X': xOptions = append(xOptions, st.OptArg) + case 'P': + // -P sets safe_path: the script directory / cwd / '' is not + // prepended to sys.path[0]. + // + // CPython: Python/initconfig.c:2098 config_parse_cmdline ('P') + safePath = true + case 'I': + // -I (isolated) implies -P plus -E/-s; the safe_path effect on + // sys.path[0] is what the shadowing tests exercise. + // + // CPython: Python/initconfig.c:2080 config_parse_cmdline ('I') + safePath = true default: // Other CPython flags (-b, -B, -O, -W, ...) are accepted for // option-set parity. Wiring each to the runtime config lands @@ -134,6 +157,17 @@ opts: codecs.SetDevMode(true) } + // safe_path: -P, -I, or PYTHONSAFEPATH suppresses prepending the + // script directory / cwd / "" to sys.path[0], and is exposed as + // sys.flags.safe_path. installPathFinder reads safePathMode to skip + // the unsafe leading entry. + // + // CPython: Python/initconfig.c:1828 config_init_safe_path + if safePath || os.Getenv("PYTHONSAFEPATH") != "" { + safePathMode = true + sys.SetSafePath(true) + } + switch { case showVersion: fmt.Fprintln(stdout, build.VersionString()) @@ -154,6 +188,25 @@ opts: return runInteractive(stdout, stderr) } +// safePathMode records whether -P / -I / PYTHONSAFEPATH was supplied, so +// installPathFinder omits the unsafe leading sys.path[0] entry. +// +// CPython: Python/initconfig.c:1828 config_init_safe_path +var safePathMode bool + +// sysPath0Entry / sysPath0Present hold the leading sys.path entry +// (script directory, or "" for -c / -m / interactive). CPython prepends +// config->sys_path_0 to sys.path AFTER site.main() runs, so +// site.removeduppaths() never rewrites a "" entry into an absolute cwd. +// gopy mirrors that: installPathFinder records the entry here, and +// prependSysPath0 inserts it once site has run. +// +// CPython: Modules/main.c:pymain_run_python (sys.path[0] insertion) +var ( + sysPath0Entry string + sysPath0Present bool +) + // hasXOption reports whether the -X option named key was supplied, // matching against the part before any '=' so "-X dev" and "-X dev=1" // both count. @@ -179,12 +232,37 @@ func hasXOption(xOptions []string, key string) bool { // CPython: Python/initconfig.c:1734 _PyConfig_InitPathConfig // CPython: Lib/importlib/_bootstrap_external.py:1196 PathFinder func installPathFinder(scriptPath string) { + // The leading sys.path entry (script dir, or "" for -c / -m / + // interactive) is NOT placed in `paths` here: CPython inserts + // config->sys_path_0 after site.main() runs, so site.removeduppaths() + // does not rewrite a "" entry into an absolute cwd. prependSysPath0 + // adds it once site has run. var paths []string switch { + case safePathMode: + // safe_path drops the leading script-dir / cwd / "" entry so an + // importable name is resolved only from PYTHONPATH and the stdlib. + // CPython leaves config->sys_path_0 unset, which disables the + // module-shadowing heuristic. + // + // CPython: Python/initconfig.c:1828 config_init_safe_path + sysPath0Present = false + imp.SetConfigSysPath0("", false) case scriptPath != "": - paths = append(paths, filepath.Dir(scriptPath)) + // config->sys_path_0 for a script is the ABSOLUTE directory of the + // script file (CPython resolves it), so the shadowing check and the + // live sys.path[0] both use an absolute path. + dir := filepath.Dir(scriptPath) + if abs, err := filepath.Abs(dir); err == nil { + dir = abs + } + sysPath0Entry = dir + sysPath0Present = true + imp.SetConfigSysPath0(dir, true) default: - paths = append(paths, "") + sysPath0Entry = "" + sysPath0Present = true + imp.SetConfigSysPath0("", true) } if env := os.Getenv("PYTHONPATH"); env != "" { for _, p := range strings.Split(env, string(os.PathListSeparator)) { @@ -195,11 +273,49 @@ func installPathFinder(scriptPath string) { } if root := findStdlibRoot(); root != "" { paths = append(paths, root) + // Materialize the compiled-in extension modules as stub files in a + // lib-dynload directory and add it to sys.path, the gopy analog of + // CPython's /lib-dynload. The real PathFinder -> FileFinder + // discovers them by suffix and routes them through ExtensionFileLoader + // -> _imp.create_dynamic, so module.__spec__.loader is an + // ExtensionFileLoader exactly as for a CPython .so. The stub lives + // outside the vendored stdlib tree so that tree stays pristine. + // + // CPython: Modules/getpath.py (lib-dynload on sys.path) + dynload := filepath.Join(os.TempDir(), "gopy-lib-dynload") + if err := imp.MaterializeExtensions(dynload); err == nil { + paths = append(paths, dynload) + } + // Expose the resolved stdlib root as sys._stdlib_dir so + // FrozenImporter._resolve_filename can compute __file__ and a + // frozen package's __path__ against the on-disk Lib copy, letting + // an unfrozen submodule (e.g. __phello__.spam from disk) be found + // when its parent was loaded frozen. + // + // CPython: Lib/importlib/_bootstrap.py:1108 _resolve_filename + sys.SetStdlibDir(root) + // Pin the resolved root into the environment so any subprocess + // this interpreter spawns through sys.executable bootstraps from + // the same stdlib, even when it runs in an unrelated cwd (e.g. + // subprocess.run(cwd=tmpdir)). CPython's child interpreters + // self-locate from the executable's prefix; gopy carries it + // explicitly via GOPY_STDLIB. + // + // CPython: Modules/getpath.py:550 calculate_path (prefix inherited) + if os.Getenv("GOPY_STDLIB") == "" { + _ = os.Setenv("GOPY_STDLIB", root) + } } imp.SetPathFinder(&imp.PathFinder{ Paths: paths, Compiler: gopyCompile, }) + // Frozen test modules (__hello__, __phello__ and friends) keep their + // source verbatim and compile lazily through the same compiler the + // path finder uses. + // + // CPython: Python/frozen.c _PyImport_FrozenModules + imp.FrozenCompiler = gopyCompile sys.SetPath(paths) // Wire the meta-path finder to consult the live sys.path so // `sys.path.insert(0, x)` from user code is honored on the next @@ -210,6 +326,20 @@ func installPathFinder(scriptPath string) { imp.SetLivePathHook(sys.LivePath) } +// prependSysPath0 inserts the leading sys.path entry (config->sys_path_0) +// recorded by installPathFinder. CPython does this after site.main() +// runs, so the entry (notably "" for -c) is never absolutized by +// site.removeduppaths(). Call it once the site bootstrap has completed. +// +// CPython: Modules/main.c:pymain_run_python (sys.path[0] insertion) +func prependSysPath0() { + if !sysPath0Present { + return + } + cur := sys.LivePath() + sys.SetPath(append([]string{sysPath0Entry}, cur...)) +} + // bootstrapEncodings imports the encodings package so its // search_function lands in the codec search path. CPython does this // from _PyCodec_Init at the tail of interpreter startup, after the @@ -225,6 +355,86 @@ func installPathFinder(scriptPath string) { // // CPython: Python/codecs.c:1690 _PyCodec_Init (PyImport_ImportModule "encodings") func bootstrapEncodings(ts *state.Thread, globals *objects.Dict, stderr *os.File) int { + // Initialize the importlib bootstrap before any Python-level import. + // CPython freezes importlib._bootstrap / _bootstrap_external and runs + // init_importlib well before _PyCodec_Init. gopy loads them as regular + // .py modules on first reference; the encodings preload below pulls + // _bootstrap_external in transitively (encodings -> codecs -> + // importlib.util -> _bootstrap_external). Importing it here first means + // it is fully cached before encodings runs, so its own load does not + // re-enter the import system while the encodings package is still + // half-initialized (which would strand `from . import aliases`). + // + // CPython: Python/pylifecycle.c:1041 init_importlib_external + // Two-phase importlib install, mirroring init_importlib / + // init_importlib_external. importlib/__init__.py self-bootstraps via + // its `except ImportError` branch (gopy has no frozen _frozen_importlib), + // which runs _bootstrap._setup(sys, _imp) and binds _bootstrap_external. + // Phase 2 then calls _bootstrap_external._install(_bootstrap) directly + // (CPython's _install_external_importers imports _frozen_importlib_external, + // which gopy lacks), appending PathFinder to sys.meta_path and the + // FileFinder path hook to sys.path_hooks. + // + // CPython: Python/pylifecycle.c:1041 init_importlib_external + // CPython runs init_importlib exactly once, against a fresh per-process + // interpreter. gopy reuses one process-wide sys.modules across every run() + // invocation (the cmd/gopy tests call run() several times in a single + // binary), so the install must be idempotent. Once the import system is + // live, sys.modules already holds _frozen_importlib aliased to the source + // _bootstrap module, which carries no __origname__; re-running + // _bootstrap._install would make _setup re-scan sys.modules and trip the + // frozen fix-up assert on it. Guard the whole install on the first run. + // + // CPython: Python/pylifecycle.c:1041 init_importlib_external + install := "import sys\n" + + "if '_frozen_importlib' not in sys.modules:\n" + + " import importlib, _imp\n" + + " from importlib import _bootstrap, _bootstrap_external\n" + + " _bootstrap._install(sys, _imp)\n" + + " _bootstrap_external._install(_bootstrap)\n" + + // CPython's C bootstrap freezes _bootstrap / _bootstrap_external and + // publishes them under the _frozen_importlib* names; importlib then + // aliases those exact objects to importlib._bootstrap[_external]. gopy + // loads them as plain .py modules, so re-publish the same objects + // under the frozen names to keep sys.modules['_frozen_importlib'] and + // importlib._bootstrap identical (issue #15386 / bootstrap tests). + // + // CPython: Lib/importlib/__init__.py:50 (_bootstrap aliasing) + " sys.modules['_frozen_importlib'] = _bootstrap\n" + + " sys.modules['_frozen_importlib_external'] = _bootstrap_external\n" + + // CPython registers the zipimporter path hook ahead of FileFinder + // (C-side, _PyImportZip_Init) so a sys.path entry pointing at a zip + // archive is claimed before the directory finder rejects it. + // CPython: Python/pylifecycle.c init_importlib_external (zipimport) + " try:\n" + + " import zipimport\n" + + " sys.path_hooks.insert(0, zipimport.zipimporter)\n" + + " except ImportError:\n" + + " pass\n" + + // CPython freezes importlib._bootstrap[_external] and the importlib + // package, so _setup gives them a __spec__ via the frozen loader + // before any user import runs. gopy loads these as plain .py files + // through the Go-side driver during this bootstrap, before the + // machinery is live, so they reach sys.modules without __spec__. + // Rebuild a SourceFileLoader spec for every still-spec-less module + // that carries a __file__ (importlib, importlib._bootstrap, + // _bootstrap_external, importlib.util), matching the spec PathFinder + // would have produced. Without __spec__ on the importlib package, + // `import importlib.util` raises AttributeError at _bootstrap.py:1325. + // + // CPython: Lib/importlib/_bootstrap.py:1517 _setup (spec fix-up loop) + " for _n in list(sys.modules):\n" + + " _m = sys.modules[_n]\n" + + " if getattr(_m, '__spec__', None) is None and getattr(_m, '__file__', None):\n" + + " try:\n" + + " _sp = _bootstrap_external.spec_from_file_location(_n, _m.__file__)\n" + + " _bootstrap._init_module_attrs(_sp, _m, override=True)\n" + + " except Exception:\n" + + " pass\n" + if _, err := pythonrun.RunString(ts, install, "", parser.ModeFile, globals, nil); err != nil { + fmt.Fprintln(stderr, "preload importlib:", err) + return 1 + } if _, err := pythonrun.RunString(ts, "import encodings", "", parser.ModeFile, globals, nil); err != nil { fmt.Fprintln(stderr, "preload encodings:", err) return 1 @@ -232,6 +442,24 @@ func bootstrapEncodings(ts *state.Thread, globals *objects.Dict, stderr *os.File return 0 } +// bootstrapSite imports the site module, which runs site.main() at import +// time (the no_site flag is clear) to install the interpreter builtins +// exit / quit / help / copyright / credits / license via setquit / +// setcopyright / sethelper. CPython drives this from init_import_site +// during Py_Initialize after the import system is online; without it +// sys.flags.no_site reads 0 (claiming site loaded) while the builtins it +// installs are missing, so code.InteractiveConsole(local_exit=True) and +// other site-dependent paths diverge. +// +// CPython: Python/pylifecycle.c:1255 init_import_site (PyImport_ImportModule "site") +func bootstrapSite(ts *state.Thread, globals *objects.Dict, stderr *os.File) int { + if _, err := pythonrun.RunString(ts, "import site", "", parser.ModeFile, globals, nil); err != nil { + fmt.Fprintln(stderr, "preload site:", err) + return 1 + } + return 0 +} + // findStdlibRoot locates the vendored gopy stdlib tree. CPython's // equivalent is Modules/getpath.py's prefix discovery; the gopy port // (pathconfig/) targets the CPython install layout, not the gopy @@ -311,6 +539,15 @@ func gopyCompile(src []byte, filename string) (*objects.Code, error) { if len(src) == 0 || src[len(src)-1] != '\n' { src = append(src, '\n') } + // CPython freezes importlib._bootstrap[_external], so the code objects of + // the import machinery carry the synthetic co_filename + // "" rather than a source path. gopy loads + // them from source; stamp the same frozen name so tracebacks that pass + // through the machinery read identically (test_import_bug) and + // remove_importlib_frames can recognize them. + // + // CPython: Python/pylifecycle.c:1041 init_importlib (frozen modules) + filename = frozenImportlibName(filename) mod, err := parser.ParseBytes(src, filename, parser.ModeFile) if err != nil { return nil, err @@ -345,6 +582,21 @@ func gopyCompile(src []byte, filename string) (*objects.Code, error) { return out, nil } +// frozenImportlibName maps the source paths of the two importlib bootstrap +// modules to the synthetic co_filename CPython gives their frozen code +// objects. Any other path is returned unchanged. +// +// CPython: Python/import.c:3501 remove_importlib_frames (frozen names) +func frozenImportlibName(filename string) string { + switch { + case strings.HasSuffix(filename, "importlib/_bootstrap_external.py"): + return "" + case strings.HasSuffix(filename, "importlib/_bootstrap.py"): + return "" + } + return filename +} + // runSource is the gopy -c entry. It dispatches to // pythonrun.RunSimpleString, the port of CPython's // PyRun_SimpleStringFlags. @@ -362,6 +614,10 @@ func runSource(src string, stdout, stderr *os.File) int { if rc := bootstrapEncodings(ts, mainGlobals, stderr); rc != 0 { return rc } + if rc := bootstrapSite(ts, mainGlobals, stderr); rc != 0 { + return rc + } + prependSysPath0() rc := pythonrun.RunSimpleString(ts, src, mainGlobals, stderr) gc.RunShutdownFinalizers() pythonrun.FlushStdFiles() @@ -388,6 +644,10 @@ func runModule(modName string, modArgs []string, stdout, stderr *os.File) int { if rc := bootstrapEncodings(ts, mainGlobals, stderr); rc != 0 { return rc } + if rc := bootstrapSite(ts, mainGlobals, stderr); rc != 0 { + return rc + } + prependSysPath0() // Equivalent of CPython's pymain_run_module which calls // runpy._run_module_as_main(modName) on the Python side. src := fmt.Sprintf("import runpy\nrunpy._run_module_as_main(%q)\n", modName) @@ -416,11 +676,70 @@ func runFile(path string, stdout, stderr *os.File) int { return 1 } installPathFinder(path) - mainGlobals := newMainGlobals(g, mainModuleName(path)) + modName := mainModuleName(path) + mainGlobals := newMainGlobals(g, modName) + // Anchor relative imports inside a vendored test package. CPython's + // import machinery stamps __package__ when it loads the module; a + // synthesized main module would otherwise have no anchor and any + // `from . import x` inside it would raise "no known parent package". + // + // CPython: Lib/importlib/_bootstrap.py:1350 _calc___package__ + if pkg := mainPackageName(path, modName); pkg != "" { + _ = mainGlobals.SetItem(objects.NewStr("__package__"), objects.NewStr(pkg)) + // A package's __init__ also carries __path__ pointing at its dir, + // so submodule imports (`from .data import x`) find sibling files. + if filepath.Base(path) == "__init__.py" { + if abs, absErr := filepath.Abs(filepath.Dir(path)); absErr == nil { + _ = mainGlobals.SetItem(objects.NewStr("__path__"), + objects.NewList([]objects.Object{objects.NewStr(abs)})) + } + } + } ts := state.NewThread() if rc := bootstrapEncodings(ts, mainGlobals, stderr); rc != 0 { return rc } + if rc := bootstrapSite(ts, mainGlobals, stderr); rc != 0 { + return rc + } + prependSysPath0() + // A vendored test runs under "test."; regrtest imports it as a + // normal module, so its __spec__ is a real ModuleSpec. Build the same + // file-location spec here so code that resolves the module by name and + // reads __spec__ (pyclbr, runpy, inspect) matches CPython. The plain + // "__main__" run keeps __spec__ None, like `python script.py`. The + // snippet runs through pythonrun so importlib.util loads under a real + // Executor, the same way bootstrapEncodings drives the encodings import. + // + // CPython: Lib/test/libregrtest/runtest.py (imports test.) + if modName != "__main__" { + abs, absErr := filepath.Abs(path) + if absErr != nil { + abs = path + } + // regrtest imports the test under "test." the normal way, so + // the import machinery runs setattr(parent_package, child, module): + // the `test` package ends up with a `test_import` attribute. gopy + // pre-injects the gate module into sys.modules without that parent + // binding, so a test like data/circular_imports/.../child.py that + // evaluates `test.test_import.<...>` as an expression would fail its + // first hop getattr(test, 'test_import'). Import the parent package + // and bind the leaf to mirror what _find_and_load does. + // + // CPython: Lib/importlib/_bootstrap.py:1350 setattr(parent_module, child, module) + src := fmt.Sprintf("import importlib, importlib.util as _u, sys as _s\n"+ + "_m = _s.modules.get(%q)\n"+ + "if _m is not None and getattr(_m, '__spec__', None) is None:\n"+ + " _m.__spec__ = _u.spec_from_file_location(%q, %q)\n"+ + " _m.__loader__ = _m.__spec__.loader\n"+ + "_parent, _, _child = %q.rpartition('.')\n"+ + "if _m is not None and _parent:\n"+ + " setattr(importlib.import_module(_parent), _child, _m)\n"+ + "del importlib, _u, _s, _m, _parent, _child\n", modName, modName, abs, modName) + if _, err := pythonrun.RunString(ts, src, "", parser.ModeFile, mainGlobals, nil); err != nil { + fmt.Fprintln(stderr, "attach main spec:", err) + } + } var rc int if suffix, ok := unittestRunnerSuffix(path); ok { src, readErr := os.ReadFile(path) //nolint:gosec // reading a caller-supplied test file path is the entire contract @@ -451,7 +770,12 @@ func runFile(path string, stdout, stderr *os.File) int { // CPython: Lib/test/libregrtest/runtest.py unittest.main func unittestRunnerSuffix(path string) (string, bool) { base := filepath.Base(path) - if !strings.HasPrefix(base, "test_") || !strings.HasSuffix(base, ".py") { + // A package test is laid out as test_xxx/__init__.py; accept it too so + // the runner fires even though its basename is not test_*.py. The + // module runs under "test.test_xxx" (not "__main__"), so its own + // `if __name__ == '__main__'` guard never triggers the suite. + isPkgInit := base == "__init__.py" && strings.HasPrefix(filepath.Base(filepath.Dir(path)), "test_") + if !isPkgInit && (!strings.HasPrefix(base, "test_") || !strings.HasSuffix(base, ".py")) { return "", false } src, err := os.ReadFile(path) //nolint:gosec // reading a caller-supplied test file path is the entire contract @@ -485,12 +809,42 @@ func unittestRunnerSuffix(path string) (string, bool) { // CPython: Lib/test/libregrtest/runtest.py (imports test.) func mainModuleName(path string) string { base := filepath.Base(path) + // A package laid out as test_xxx/__init__.py runs under the dotted + // name "test.test_xxx": regrtest imports the directory as a package, so + // the __init__ body sees __name__ == "test.test_xxx" and relative + // imports inside it resolve against that anchor. + if base == "__init__.py" { + parent := filepath.Base(filepath.Dir(path)) + if strings.HasPrefix(parent, "test_") { + return "test." + parent + } + return "__main__" + } if strings.HasPrefix(base, "test_") && strings.HasSuffix(base, ".py") { return "test." + strings.TrimSuffix(base, ".py") } return "__main__" } +// mainPackageName returns the __package__ anchor for the main module at +// path. A package __init__ anchors at its own dotted name; a plain module +// anchors at its parent package. Relative imports inside the file resolve +// against this value. +// +// CPython: Lib/importlib/_bootstrap.py:1350 _calc___package__ +func mainPackageName(path, modName string) string { + if modName == "__main__" { + return "" + } + if filepath.Base(path) == "__init__.py" { + return modName + } + if dot := strings.LastIndex(modName, "."); dot >= 0 { + return modName[:dot] + } + return "" +} + // runInteractive is the gopy bare-invocation entry: print the banner // and hand control to pythonrun.InteractiveLoop. Mirrors // pymain_run_stdin. @@ -509,6 +863,10 @@ func runInteractive(stdout, stderr *os.File) int { if rc := bootstrapEncodings(ts, mainGlobals, stderr); rc != 0 { return rc } + if rc := bootstrapSite(ts, mainGlobals, stderr); rc != 0 { + return rc + } + prependSysPath0() rc := pythonrun.InteractiveLoop(ts, os.Stdin, stdout, stderr, mainGlobals) pythonrun.FlushStdFiles() if rc != 0 { @@ -556,7 +914,25 @@ func bootstrapBuiltins(stdout, stderr *os.File) (*objects.Dict, error) { func newMainGlobals(builtinsDict *objects.Dict, name string) *objects.Dict { mainDict := objects.NewDict() _ = mainDict.SetItem(objects.NewStr("__name__"), objects.NewStr(name)) - _ = mainDict.SetItem(objects.NewStr("__builtins__"), builtinsDict) + // CPython binds __main__.__builtins__ to the builtins *module* object; + // every other module receives the builtins dict instead. The frame + // builder unwraps the module back to its dict for LOAD_GLOBAL, so the + // only observable difference is that `del __builtins__.__import__` + // reaches a module attribute, after which the import machinery raises + // ImportError (test_import.test_delete_builtins_import). + // + // CPython: Python/pylifecycle.c init_interp_main (binds __main__.__builtins__) + var builtinsBinding objects.Object = builtinsDict + if bm, ok := imp.GetModule("builtins"); ok { + builtinsBinding = bm + } + _ = mainDict.SetItem(objects.NewStr("__builtins__"), builtinsBinding) + // CPython always binds __main__.__spec__: None for `-c`/script runs, + // a real ModuleSpec under `-m`. runFile overwrites this with a + // file-location spec for vendored "test." runs. + // + // CPython: Python/pylifecycle.c init_interp_main (sets __main__.__spec__) + _ = mainDict.SetItem(objects.NewStr("__spec__"), objects.None()) mod := objects.NewModuleWithDict(name, mainDict) if _, ok := imp.GetModule(name); !ok { imp.AddModule(name, mod) diff --git a/cmd/gopy/sigint_unix.go b/cmd/gopy/sigint_unix.go new file mode 100644 index 000000000..b71976c69 --- /dev/null +++ b/cmd/gopy/sigint_unix.go @@ -0,0 +1,24 @@ +//go:build !windows + +package main + +import ( + "os/signal" + "syscall" +) + +// exitSigint resets SIGINT to its default disposition and delivers it +// to this process, so an unhandled KeyboardInterrupt terminates the +// interpreter by signal (exit status -SIGINT / 128+SIGINT). +// +// CPython: Modules/main.c:730 exit_sigint +func exitSigint() int { + signal.Reset(syscall.SIGINT) + if err := syscall.Kill(syscall.Getpid(), syscall.SIGINT); err != nil { + // Impossible in normal environments; fall back to the code + // CPython returns when the signal could not be delivered. + return int(syscall.SIGINT) + 128 + } + // Give the signal a moment to be delivered before falling through. + select {} +} diff --git a/cmd/gopy/sigint_windows.go b/cmd/gopy/sigint_windows.go new file mode 100644 index 000000000..a2f7209b9 --- /dev/null +++ b/cmd/gopy/sigint_windows.go @@ -0,0 +1,13 @@ +//go:build windows + +package main + +// exitSigint mirrors the Windows branch of CPython's exit_sigint: there is +// no POSIX kill(getpid, SIGINT), so the interpreter exits with SIGINT+128 +// (the value CPython returns when raise(SIGINT) does not abort the process). +// +// CPython: Modules/main.c:730 exit_sigint +func exitSigint() int { + // SIGINT is 2 on Windows; 2 + 128 = 130. + return 2 + 128 +} diff --git a/errors/api.go b/errors/api.go index 86994b350..aa932215c 100644 --- a/errors/api.go +++ b/errors/api.go @@ -24,6 +24,68 @@ func SetString(ts *state.Thread, t *objects.Type, msg string) { Set(ts, t, args) } +// MakeModuleNotFound builds (without raising) a ModuleNotFoundError +// instance carrying the `name` member, so a caller that returns it as a +// Go error preserves the attribute through synthesizeException. +// +// CPython: Python/import.c:1759 import_name (ModuleNotFoundError, name=) +func MakeModuleNotFound(name string) *Exception { + msg := "No module named '" + name + "'" + exc := New(PyExc_ModuleNotFoundError, objects.NewTuple([]objects.Object{objects.NewStr(msg)})) + _ = exc.EnsureAttrDict().SetItem(objects.NewStr("name"), objects.NewStr(name)) + return exc +} + +// SetModuleNotFound raises ModuleNotFoundError("No module named %r", +// name=name), stamping the `name` member the import machinery promises +// on every miss so callers like runpy can read exc.name. +// +// CPython: Python/import.c:1759 import_name (ModuleNotFoundError, name=) +func SetModuleNotFound(ts *state.Thread, name string) { + msg := "No module named '" + name + "'" + exc := New(PyExc_ModuleNotFoundError, objects.NewTuple([]objects.Object{objects.NewStr(msg)})) + _ = exc.EnsureAttrDict().SetItem(objects.NewStr("name"), objects.NewStr(name)) + Raise(ts, exc) +} + +// SetModuleNotFoundHalted raises ModuleNotFoundError(f'import of {name} +// halted; None in sys.modules', name=name), the exact exception CPython's +// _bootstrap._find_and_load produces when sys.modules[name] is None. The +// `name` member is what importlib/abc.py reads to recognize a blocked +// _frozen_importlib import. +// +// CPython: Lib/importlib/_bootstrap.py:1387 _find_and_load (None sentinel) +func SetModuleNotFoundHalted(ts *state.Thread, name string) { + msg := "import of " + name + " halted; None in sys.modules" + exc := New(PyExc_ModuleNotFoundError, objects.NewTuple([]objects.Object{objects.NewStr(msg)})) + _ = exc.EnsureAttrDict().SetItem(objects.NewStr("name"), objects.NewStr(name)) + Raise(ts, exc) +} + +// SetImportErrorWithNameFrom raises ImportError(msg, name=modName, +// path=origin, name_from=nameFrom), stamping the three members the +// IMPORT_FROM diagnostic promises so a caught exception exposes +// exc.name / exc.path / exc.name_from. Empty modName/origin leave the +// corresponding member unset (read back as None), matching the NULL +// arguments _PyErr_SetImportErrorWithNameFrom forwards to new_importerror. +// +// CPython: Python/errors.c:1152 _PyErr_SetImportErrorWithNameFrom +func SetImportErrorWithNameFrom(ts *state.Thread, msg, modName, origin, nameFrom string) { + exc := New(PyExc_ImportError, objects.NewTuple([]objects.Object{objects.NewStr(msg)})) + d := exc.EnsureAttrDict() + _ = d.SetItem(objects.NewStr("msg"), objects.NewStr(msg)) + if modName != "" { + _ = d.SetItem(objects.NewStr("name"), objects.NewStr(modName)) + } + if origin != "" { + _ = d.SetItem(objects.NewStr("path"), objects.NewStr(origin)) + } + if nameFrom != "" { + _ = d.SetItem(objects.NewStr("name_from"), objects.NewStr(nameFrom)) + } + Raise(ts, exc) +} + // Format raises an exception built from a printf-style template. // Returns nil so callers can `return errors.Format(ts, ...)`. // diff --git a/errors/builtins.go b/errors/builtins.go index 7332c428a..094203449 100644 --- a/errors/builtins.go +++ b/errors/builtins.go @@ -117,6 +117,11 @@ func init() { // CPython: Objects/exceptions.c:684 StopIteration_init objects.SetTypeDescr(PyExc_StopIteration, "value", objects.NewGetSetDescr("value", stopIterValueGet, stopIterValueSet)) + // SystemExit exposes a dedicated `code` member seeded by SystemExit_init. + // + // CPython: Objects/exceptions.c:880 SystemExit_members + objects.SetTypeDescr(PyExc_SystemExit, "code", + objects.NewGetSetDescr("code", sysExitCodeGet, sysExitCodeSet)) // AsyncGenStopIterationHook lets objects/async_gen.go raise a typed // StopIteration(value) without importing this package. Mirrors // _PyGen_SetStopIterationValue in the async_gen_unwrap_value path. @@ -280,6 +285,50 @@ func excTpNew(cls *objects.Type, args []objects.Object, kwargs map[string]object return exc, nil } +// sysExitCodeGet returns SystemExit's `code`. An explicit assignment is +// preserved in the dedicated slot; otherwise the value is derived from the +// constructor args exactly as SystemExit_init seeds it (args[0] for one +// arg, the args tuple for several, None for none). +// +// CPython: Objects/exceptions.c:866 SystemExit_init +// CPython: Objects/exceptions.c:880 SystemExit_members (code) +func sysExitCodeGet(owner objects.Object) (objects.Object, error) { + e, ok := owner.(*Exception) + if !ok { + return objects.None(), nil + } + if e.SysExitCode != nil { + return e.SysExitCode, nil + } + if e.Args == nil { + return objects.None(), nil + } + switch e.Args.Len() { + case 0: + return objects.None(), nil + case 1: + return e.Args.Item(0), nil + default: + return e.Args, nil + } +} + +// sysExitCodeSet writes only the dedicated SysExitCode slot, leaving +// args untouched. Mirrors the _Py_T_OBJECT member on PySystemExitObject. +// +// CPython: Objects/exceptions.c:880 SystemExit_members (code) +func sysExitCodeSet(owner objects.Object, value objects.Object) error { + e, ok := owner.(*Exception) + if !ok { + return stderrors.New("TypeError: descriptor 'code' requires SystemExit") + } + if value == nil { + value = objects.None() + } + e.SysExitCode = value + return nil +} + // excStr ports BaseException_str: empty for no args, str(args[0]) for // a single arg, repr(args) otherwise. // diff --git a/errors/errnocodes_other.go b/errors/errnocodes_other.go new file mode 100644 index 000000000..1ed571010 --- /dev/null +++ b/errors/errnocodes_other.go @@ -0,0 +1,33 @@ +//go:build !windows + +package errors + +import "syscall" + +// errno codes for the errnomap promotion table. On non-Windows +// platforms Go's syscall package carries the real POSIX values, which +// vary across systems (ETIMEDOUT is 110 on Linux, 60 on macOS), so we +// take them from syscall rather than hard-coding. +// +// CPython: Objects/exceptions.c:4470 _PyExc_InitState ADD_ERRNO panel +var ( + errEAGAIN = int(syscall.EAGAIN) + errEALREADY = int(syscall.EALREADY) + errEINPROGRESS = int(syscall.EINPROGRESS) + errEWOULDBLOCK = int(syscall.EWOULDBLOCK) + errEPIPE = int(syscall.EPIPE) + errESHUTDOWN = int(syscall.ESHUTDOWN) + errECHILD = int(syscall.ECHILD) + errECONNABORTED = int(syscall.ECONNABORTED) + errECONNREFUSED = int(syscall.ECONNREFUSED) + errECONNRESET = int(syscall.ECONNRESET) + errEEXIST = int(syscall.EEXIST) + errENOENT = int(syscall.ENOENT) + errEISDIR = int(syscall.EISDIR) + errENOTDIR = int(syscall.ENOTDIR) + errEINTR = int(syscall.EINTR) + errEACCES = int(syscall.EACCES) + errEPERM = int(syscall.EPERM) + errESRCH = int(syscall.ESRCH) + errETIMEDOUT = int(syscall.ETIMEDOUT) +) diff --git a/errors/errnocodes_windows.go b/errors/errnocodes_windows.go new file mode 100644 index 000000000..5d5b5a08f --- /dev/null +++ b/errors/errnocodes_windows.go @@ -0,0 +1,35 @@ +//go:build windows + +package errors + +// errno codes for the errnomap promotion table on Windows. Go's +// syscall package fabricates E* constants as 1<<29+iota there, so we +// hard-code the Universal CRT values CPython actually uses +// (EEXIST == 17). These line up with the winerror->errno translation +// the VM applies before promotion and with the errno module's table. +// ESHUTDOWN has no ucrt definition, so CPython omits it on Windows; the +// negative sentinel makes errnomap skip it. +// +// CPython: Objects/exceptions.c:4470 _PyExc_InitState ADD_ERRNO panel +// (values from ucrt ) +const ( + errEAGAIN = 11 + errEALREADY = 103 + errEINPROGRESS = 112 + errEWOULDBLOCK = 140 + errEPIPE = 32 + errESHUTDOWN = -1 + errECHILD = 10 + errECONNABORTED = 106 + errECONNREFUSED = 107 + errECONNRESET = 108 + errEEXIST = 17 + errENOENT = 2 + errEISDIR = 21 + errENOTDIR = 20 + errEINTR = 4 + errEACCES = 13 + errEPERM = 1 + errESRCH = 3 + errETIMEDOUT = 138 +) diff --git a/errors/exc_import_init.go b/errors/exc_import_init.go index 2e22359fa..c3e59026c 100644 --- a/errors/exc_import_init.go +++ b/errors/exc_import_init.go @@ -21,6 +21,58 @@ func init() { objects.SetTypeDescr(PyExc_ImportError, "__init__", objects.NewMethodDescr(PyExc_ImportError, "__init__", importErrorInit). WithKwParams("ImportError", importErrorKwlist, len(importErrorKwlist))) + + // msg / name / path / name_from are Py_T_OBJECT members on + // PyImportErrorObject: reading a member that was never set yields + // None rather than raising AttributeError, and writing stores the + // value. runpy/importlib both read e.name on a caught ImportError, + // so the attribute must always exist. msg is set from the single + // positional arg by ImportError_init rather than via a keyword. + // + // CPython: Objects/exceptions.c:1932 ImportError_members + for _, name := range append([]string{"msg"}, importErrorKwlist...) { + field := name + objects.SetTypeDescr(PyExc_ImportError, field, objects.NewGetSetDescr(field, + func(o objects.Object) (objects.Object, error) { return importErrorMember(o, field) }, + func(o, v objects.Object) error { return importErrorMemberSet(o, field, v) })) + } +} + +// importErrorMember reads an ImportError member from the instance attr +// dict, returning None when unset to mirror Py_T_OBJECT's NULL->None. +// +// CPython: Include/descrobject.h Py_T_OBJECT (member_get NULL -> None) +func importErrorMember(o objects.Object, field string) (objects.Object, error) { + e, ok := o.(*Exception) + if !ok { + return objects.None(), nil + } + d := e.AttrDict() + if d == nil { + return objects.None(), nil + } + // A missing member reads back as None (Py_T_OBJECT NULL->None), so a + // lookup miss is not an error here; discard it deliberately. + v, _ := d.GetItem(objects.NewStr(field)) + if v == nil { + return objects.None(), nil + } + return v, nil +} + +// importErrorMemberSet writes an ImportError member through the +// instance attr dict, allocating it lazily. +// +// CPython: Objects/exceptions.c:1893 ImportError_members (member_set) +func importErrorMemberSet(o objects.Object, field string, v objects.Object) error { + e, ok := o.(*Exception) + if !ok { + return nil + } + if v == nil { + v = objects.None() + } + return e.EnsureAttrDict().SetItem(objects.NewStr(field), v) } // importErrorInit ports ImportError_init: it runs BaseException_init over @@ -46,6 +98,14 @@ func importErrorInit(args []objects.Object, kwargs map[string]objects.Object) (o return objects.None(), nil } + // msg is set from the lone positional argument: PyTuple_GET_SIZE(args) + // counts the exception args tuple, which here is args[1:]. + // + // CPython: Objects/exceptions.c:1836 ImportError_init (self->msg) + if len(args) == 2 { + _ = e.EnsureAttrDict().SetItem(objects.NewStr("msg"), args[1]) + } + if len(kwargs) > 0 { // PyArg_ParseTupleAndKeywords("|$OOO") with an empty positional // tuple: the surplus check counts every keyword against the three diff --git a/errors/exc_os.go b/errors/exc_os.go index af61fe3a3..8630c1d64 100644 --- a/errors/exc_os.go +++ b/errors/exc_os.go @@ -1,8 +1,6 @@ package errors import ( - "syscall" - "github.com/tamnd/gopy/objects" ) @@ -43,29 +41,35 @@ var errnomap = map[int]*objects.Type{} func init() { add := func(code int, t *objects.Type) { + // Codes a platform does not define arrive as a negative + // sentinel from the errnocodes table; skip them so they never + // collide with a real errno (errnos are always positive). + if code < 0 { + return + } if _, dup := errnomap[code]; !dup { errnomap[code] = t } } - add(int(syscall.EAGAIN), PyExc_BlockingIOError) - add(int(syscall.EALREADY), PyExc_BlockingIOError) - add(int(syscall.EINPROGRESS), PyExc_BlockingIOError) - add(int(syscall.EWOULDBLOCK), PyExc_BlockingIOError) - add(int(syscall.EPIPE), PyExc_BrokenPipeError) - add(int(syscall.ESHUTDOWN), PyExc_BrokenPipeError) - add(int(syscall.ECHILD), PyExc_ChildProcessError) - add(int(syscall.ECONNABORTED), PyExc_ConnectionAbortedError) - add(int(syscall.ECONNREFUSED), PyExc_ConnectionRefusedError) - add(int(syscall.ECONNRESET), PyExc_ConnectionResetError) - add(int(syscall.EEXIST), PyExc_FileExistsError) - add(int(syscall.ENOENT), PyExc_FileNotFoundError) - add(int(syscall.EISDIR), PyExc_IsADirectoryError) - add(int(syscall.ENOTDIR), PyExc_NotADirectoryError) - add(int(syscall.EINTR), PyExc_InterruptedError) - add(int(syscall.EACCES), PyExc_PermissionError) - add(int(syscall.EPERM), PyExc_PermissionError) - add(int(syscall.ESRCH), PyExc_ProcessLookupError) - add(int(syscall.ETIMEDOUT), PyExc_TimeoutError) + add(errEAGAIN, PyExc_BlockingIOError) + add(errEALREADY, PyExc_BlockingIOError) + add(errEINPROGRESS, PyExc_BlockingIOError) + add(errEWOULDBLOCK, PyExc_BlockingIOError) + add(errEPIPE, PyExc_BrokenPipeError) + add(errESHUTDOWN, PyExc_BrokenPipeError) + add(errECHILD, PyExc_ChildProcessError) + add(errECONNABORTED, PyExc_ConnectionAbortedError) + add(errECONNREFUSED, PyExc_ConnectionRefusedError) + add(errECONNRESET, PyExc_ConnectionResetError) + add(errEEXIST, PyExc_FileExistsError) + add(errENOENT, PyExc_FileNotFoundError) + add(errEISDIR, PyExc_IsADirectoryError) + add(errENOTDIR, PyExc_NotADirectoryError) + add(errEINTR, PyExc_InterruptedError) + add(errEACCES, PyExc_PermissionError) + add(errEPERM, PyExc_PermissionError) + add(errESRCH, PyExc_ProcessLookupError) + add(errETIMEDOUT, PyExc_TimeoutError) } // ErrnoSubclass returns the OSError subclass that CPython would pick diff --git a/errors/exc_os_internal_test.go b/errors/exc_os_internal_test.go new file mode 100644 index 000000000..dd9763f12 --- /dev/null +++ b/errors/exc_os_internal_test.go @@ -0,0 +1,40 @@ +package errors + +import ( + "testing" + + "github.com/tamnd/gopy/objects" +) + +// TestErrnoSubclass drives ErrnoSubclass off the same platform errno +// codes errnomap is built from (errEEXIST and friends), so the mapping +// is exercised with the values that actually reach it at runtime: real +// POSIX numbers on Unix, ucrt numbers on Windows. +func TestErrnoSubclass(t *testing.T) { + cases := []struct { + errno int + want *objects.Type + }{ + {errENOENT, PyExc_FileNotFoundError}, + {errEEXIST, PyExc_FileExistsError}, + {errEACCES, PyExc_PermissionError}, + {errEPERM, PyExc_PermissionError}, + {errEINTR, PyExc_InterruptedError}, + {errEPIPE, PyExc_BrokenPipeError}, + {errECHILD, PyExc_ChildProcessError}, + {errEISDIR, PyExc_IsADirectoryError}, + {errENOTDIR, PyExc_NotADirectoryError}, + {errECONNREFUSED, PyExc_ConnectionRefusedError}, + {errECONNRESET, PyExc_ConnectionResetError}, + {errECONNABORTED, PyExc_ConnectionAbortedError}, + {errESRCH, PyExc_ProcessLookupError}, + {errETIMEDOUT, PyExc_TimeoutError}, + {0, PyExc_OSError}, + {99999, PyExc_OSError}, + } + for _, c := range cases { + if got := ErrnoSubclass(c.errno); got != c.want { + t.Errorf("ErrnoSubclass(%d) = %v, want %v", c.errno, got, c.want) + } + } +} diff --git a/errors/exc_os_test.go b/errors/exc_os_test.go index b2e79c9df..6fb6a9ba4 100644 --- a/errors/exc_os_test.go +++ b/errors/exc_os_test.go @@ -1,7 +1,6 @@ package errors_test import ( - "syscall" "testing" "github.com/tamnd/gopy/errors" @@ -39,32 +38,3 @@ func TestOSErrorHierarchy(t *testing.T) { t.Fatal("BrokenPipeError must inherit from ConnectionError") } } - -func TestErrnoSubclass(t *testing.T) { - cases := []struct { - errno int - want *objects.Type - }{ - {int(syscall.ENOENT), errors.PyExc_FileNotFoundError}, - {int(syscall.EEXIST), errors.PyExc_FileExistsError}, - {int(syscall.EACCES), errors.PyExc_PermissionError}, - {int(syscall.EPERM), errors.PyExc_PermissionError}, - {int(syscall.EINTR), errors.PyExc_InterruptedError}, - {int(syscall.EPIPE), errors.PyExc_BrokenPipeError}, - {int(syscall.ECHILD), errors.PyExc_ChildProcessError}, - {int(syscall.EISDIR), errors.PyExc_IsADirectoryError}, - {int(syscall.ENOTDIR), errors.PyExc_NotADirectoryError}, - {int(syscall.ECONNREFUSED), errors.PyExc_ConnectionRefusedError}, - {int(syscall.ECONNRESET), errors.PyExc_ConnectionResetError}, - {int(syscall.ECONNABORTED), errors.PyExc_ConnectionAbortedError}, - {int(syscall.ESRCH), errors.PyExc_ProcessLookupError}, - {int(syscall.ETIMEDOUT), errors.PyExc_TimeoutError}, - {0, errors.PyExc_OSError}, - {99999, errors.PyExc_OSError}, - } - for _, c := range cases { - if got := errors.ErrnoSubclass(c.errno); got != c.want { - t.Errorf("ErrnoSubclass(%d) = %v, want %v", c.errno, got, c.want) - } - } -} diff --git a/errors/exception.go b/errors/exception.go index 32d58b111..21ec989fb 100644 --- a/errors/exception.go +++ b/errors/exception.go @@ -62,6 +62,16 @@ type Exception struct { // CPython: Objects/exceptions.c:867 PyBaseExceptionGroupObject EG *ExceptionGroupState + // SysExitCode stores SystemExit's separate `code` member per + // PySystemExitObject. SystemExit_init seeds it from the positional + // args (args[0] for one arg, the args tuple for several, None for + // none); assigning exc.code rewrites only this slot, leaving args + // untouched. Meaningful only when ExcType is SystemExit or a subclass. + // + // CPython: Objects/exceptions.c:854 PySystemExitObject + // CPython: Objects/exceptions.c:866 SystemExit_init + SysExitCode objects.Object + // NotesObj holds a __notes__ value that is not a plain list. CPython // stores __notes__ as an ordinary instance attribute that may hold any // object; add_note only requires a list when it appends. The common diff --git a/errors/systemexit.go b/errors/systemexit.go index 9915b7b0c..0448dc17b 100644 --- a/errors/systemexit.go +++ b/errors/systemexit.go @@ -8,6 +8,23 @@ import ( "github.com/tamnd/gopy/state" ) +// unhandledKeyboardInterrupt records that a KeyboardInterrupt reached +// the top-level print path. Modules/main.c reads the matching runtime +// flag after Py_RunMain and re-raises SIGINT so the process dies by +// signal (exit status -SIGINT) rather than a plain non-zero code. +// +// CPython: Python/pythonrun.c:625 unhandled_keyboard_interrupt store +var unhandledKeyboardInterrupt bool + +// UnhandledKeyboardInterrupt reports whether a KeyboardInterrupt was +// surfaced to the top-level handler since the last reset. The CLI +// entry point consults it to decide whether to exit via SIGINT. +// +// CPython: Modules/main.c:786 _PyRuntime.signals.unhandled_keyboard_interrupt +func UnhandledKeyboardInterrupt() bool { + return unhandledKeyboardInterrupt +} + // HandleSystemExit inspects the current exception. If it is a // SystemExit, the exit code is read off the args and the exception // is cleared; the caller propagates the code. KeyboardInterrupt is @@ -26,6 +43,7 @@ func HandleSystemExit(ts *state.Thread) (code int, handled bool) { return 0, false } if Match(exc, PyExc_KeyboardInterrupt) { + unhandledKeyboardInterrupt = true return 0, false } if !Match(exc, PyExc_SystemExit) { diff --git a/imp/extension.go b/imp/extension.go new file mode 100644 index 000000000..b1c2f4bc0 --- /dev/null +++ b/imp/extension.go @@ -0,0 +1,696 @@ +package imp + +import ( + "fmt" + "os" + "path/filepath" + "runtime" + "sort" + "sync" + + "github.com/tamnd/gopy/objects" +) + +// This file ports the slice of CPython's extension-module import machinery +// the standard-library test suite drives through _testmultiphase / +// _testsinglephase and the SubinterpImportTests: the PEP 489 "multiple +// interpreters" / per-interpreter-GIL compatibility check and the +// subinterpreter interpreter-state that check consults. +// +// gopy cannot dlopen a compiled C extension, so the extensions are ported +// as Go builtins and registered here keyed by module name, each carrying +// the PEP 489 slot metadata its PyModuleDef declares. _imp.create_dynamic +// dispatches to this registry, applying CheckExtSubinterpCompat exactly the +// way Objects/moduleobject.c:359 PyModule_FromDefAndSpec2 and +// Python/import.c:1555 _PyImport_CheckSubinterpIncompatibleExtensionAllowed +// do before the module body runs. + +// Multiple-interpreters support levels, the Py_mod_multiple_interpreters +// slot values an extension's PyModuleDef may carry. +// +// CPython: Include/moduleobject.h:90 Py_MOD_MULTIPLE_INTERPRETERS_* +const ( + // MultiInterpNotSupported is Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED. + MultiInterpNotSupported = iota + // MultiInterpSupported is Py_MOD_MULTIPLE_INTERPRETERS_SUPPORTED, the + // default when a multi-phase module declares no slot. + MultiInterpSupported + // MultiInterpPerInterpreterGIL is Py_MOD_PER_INTERPRETER_GIL_SUPPORTED. + MultiInterpPerInterpreterGIL +) + +// ExtModuleDef is gopy's analog of a C extension's PyModuleDef plus the +// PEP 489 slot table the loader reads. Init builds the fully populated +// module body (gopy has no separate create / exec phase for builtins, so +// the create_dynamic step runs Init and exec_dynamic is a no-op). +// +// CPython: Include/moduleobject.h:74 PyModuleDef_Slot +type ExtModuleDef struct { + Name string + // SinglePhase marks a legacy single-phase-init module. Such modules + // never support loading under multiple interpreters, so the compat + // check rejects them in any non-main interpreter that enforces it. + SinglePhase bool + // HasMultiInterpSlot records whether the def declared a + // Py_mod_multiple_interpreters slot. When false a multi-phase module + // defaults to MultiInterpSupported. + HasMultiInterpSlot bool + // MultiInterp is the Py_mod_multiple_interpreters slot value. + MultiInterp int + // MSize is the PyModuleDef.m_size of a single-phase module: -1 for a + // "basic" module with no per-module state that does not support repeated + // initialization (its __dict__ is cached in m_copy and copied on reload), + // 0 for a "reinit" module, and >0 for a module that carries its own state. + // Only -1 modules are reloaded from the cached dict; the others re-run + // their init function on every load. + // + // CPython: Python/import.c:920 single-phase init module kinds + MSize int + // DefName is the def's m_name, the module's __name__. It defaults to Name + // but differs for an "indirect" variant whose init function builds a + // module under another def's name (PyInit__testsinglephase_basic_wrapper). + DefName string + // ShareDefWith names a registered module whose def (and thus its + // modules_by_index slot and cached m_copy) this entry reuses, the gopy + // analog of one init function calling another's. + // + // CPython: Python/import.c:960 "two or more modules share a PyModuleDef" + ShareDefWith string + // CheckCacheFirst marks the *_check_cache_first variants, whose init + // returns PyState_FindModule(def) before creating a fresh module and which + // are never recorded in the extensions cache. + // + // CPython: Modules/_testsinglephase.c:690 _check_cache_first modules + CheckCacheFirst bool + // Init builds the module. A non-nil error models a PyInit function + // that raised before returning its def. + Init func() (*objects.Module, error) +} + +var ( + extMu sync.Mutex + extRegistry = map[string]*ExtModuleDef{} +) + +// RegisterExtModule records an extension module by name. Test-extension +// packages call it from their package init, the gopy stand-in for the +// inittab entry a compiled extension would expose. +func RegisterExtModule(def *ExtModuleDef) { + extMu.Lock() + extRegistry[def.Name] = def + extMu.Unlock() +} + +// FindExtModule returns the registered extension def for name, or nil. +func FindExtModule(name string) *ExtModuleDef { + extMu.Lock() + def := extRegistry[name] + extMu.Unlock() + return def +} + +// ExtModuleNames returns the registered extension-module names, sorted. +func ExtModuleNames() []string { + extMu.Lock() + names := make([]string, 0, len(extRegistry)) + for n := range extRegistry { + names = append(names, n) + } + extMu.Unlock() + sort.Strings(names) + return names +} + +// interpState models the slice of PyInterpreterState the extension compat +// check reads: whether this is the main interpreter, whether it runs with +// its own GIL, and the check_multi_interp_extensions config flag (plus the +// _imp._override_multi_interp_extensions_check override). +// +// CPython: Include/internal/pycore_interp.h PyInterpreterState (ceval.own_gil, +// feature flags) +type interpState struct { + isMain bool + ownGil bool + checkMulti bool + // override is the _imp._override_multi_interp_extensions_check value: + // <0 force-disable, 0 use config, >0 force-enable. + override int + // id is the interpreter id; the main interpreter is 0. It tags the + // extensions-cache entries a single-phase module records so a reload only + // reuses a dict the same interpreter owns. + id int64 + // modByIndex is the interpreter's modules_by_index cache: m_index -> + // module, the table PyState_FindModule / look_up_self consults. + // + // CPython: Include/internal/pycore_interp.h modules_by_index + modByIndex map[int]*objects.Module + // hiddenExt holds the registered extension-module sys.modules entries + // this subinterpreter shadowed on entry. CPython gives every interpreter + // its own sys.modules, so a subinterpreter re-imports an extension through + // import_find_extension (firing the compat gate) even when the main + // interpreter already cached it. gopy shares one sys.modules dict, so a + // push removes those entries (forcing the re-import) and the matching pop + // restores them. nil on the main interpreter. + // + // CPython: Include/internal/pycore_interp.h imports.modules + hiddenExt map[string]objects.Object +} + +var ( + interpMu sync.Mutex + interpStack = []*interpState{{isMain: true, id: 0, modByIndex: map[int]*objects.Module{}}} + nextInterpID int64 +) + +// currentInterp returns the interpreter state on top of the stack. gopy +// runs subinterpreter scripts synchronously on the calling goroutine, so a +// single push/pop stack tracks the active interpreter for the duration of a +// run_in_subinterp_with_config / _interpreters.run_string call. +func currentInterp() *interpState { + interpMu.Lock() + defer interpMu.Unlock() + return interpStack[len(interpStack)-1] +} + +// PushSubinterp pushes a fresh non-main interpreter state for the duration +// of a subinterpreter run. ownGil reflects the config gil ('own' -> true, +// 'shared'/'default' -> false); checkMulti is config.check_multi_interp_extensions. +// +// CPython: Python/pylifecycle.c:586 init_interp_create_gil (own_gil) and +// Python/interpconfig.c:262 check_multi_interp_extensions feature flag. +func PushSubinterp(ownGil, checkMulti bool) { + s := &interpState{ + ownGil: ownGil, + checkMulti: checkMulti, + modByIndex: map[int]*objects.Module{}, + hiddenExt: hideExtModules(), + } + interpMu.Lock() + nextInterpID++ + s.id = nextInterpID + interpStack = append(interpStack, s) + interpMu.Unlock() +} + +// PopSubinterp pops the interpreter state pushed by PushSubinterp. The main +// interpreter at the bottom of the stack is never popped. +func PopSubinterp() { + interpMu.Lock() + var popped *interpState + if len(interpStack) > 1 { + popped = interpStack[len(interpStack)-1] + interpStack = interpStack[:len(interpStack)-1] + } + interpMu.Unlock() + if popped != nil { + restoreExtModules(popped.hiddenExt) + } +} + +// hideExtModules removes every registered extension module's sys.modules +// entry, returning the removed entries so PopSubinterp can restore them. A +// fresh subinterpreter has an empty sys.modules, so its first `import name` +// of an extension misses and re-runs the import (firing the PEP 489 compat +// gate through import_find_extension) instead of returning the main +// interpreter's cached module. gopy shares the one sys.modules dict, so the +// removal models the per-interpreter cache for the duration of the run. +// +// CPython: Python/import.c:1964 import_find_extension +func hideExtModules() map[string]objects.Object { + hidden := map[string]objects.Object{} + for _, name := range ExtModuleNames() { + if v, ok := GetModuleRaw(name); ok { + hidden[name] = v + RemoveModule(name) + } + } + return hidden +} + +// restoreExtModules undoes hideExtModules when a subinterpreter run ends: it +// drops any extension entry the subinterpreter left behind and reinstates the +// main interpreter's originals, so the shared sys.modules looks untouched. +func restoreExtModules(hidden map[string]objects.Object) { + for _, name := range ExtModuleNames() { + RemoveModule(name) + } + for name, v := range hidden { + sysModulesMu.Lock() + _ = sysModules.SetItem(objects.NewStr(name), v) + sysModulesMu.Unlock() + } +} + +// SetMultiInterpOverride sets the current interpreter's +// check_multi_interp_extensions override and returns the previous value. +// +// CPython: Python/import.c:5052 _imp__override_multi_interp_extensions_check_impl +func SetMultiInterpOverride(override int) int { + interpMu.Lock() + defer interpMu.Unlock() + s := interpStack[len(interpStack)-1] + old := s.override + s.override = override + return old +} + +// checkMultiInterpExtensions reports whether the current interpreter +// enforces the subinterpreter-incompatible-extension check. +// +// CPython: Python/import.c:1538 check_multi_interp_extensions +func checkMultiInterpExtensions(s *interpState) bool { + if s.override < 0 { + return false + } + if s.override > 0 { + return true + } + return s.checkMulti +} + +// CheckExtSubinterpCompat applies the PEP 489 multiple-interpreters / +// per-interpreter-GIL compatibility check to def against the active +// interpreter. It returns an ImportError-tagged error when the module may +// not be loaded in the current subinterpreter, and nil otherwise. +// +// CPython: Objects/moduleobject.c:359 PyModule_FromDefAndSpec2 (slot gate) +// CPython: Python/import.c:1555 _PyImport_CheckSubinterpIncompatibleExtensionAllowed +func CheckExtSubinterpCompat(def *ExtModuleDef) error { + s := currentInterp() + if s.isMain { + return nil + } + // Single-phase-init modules never support multiple interpreters; the + // fresh-import and cached-reload paths both call the check directly. + // + // CPython: Python/import.c:1983 import_find_extension / 2198 import_run_extension + if def.SinglePhase { + if checkMultiInterpExtensions(s) { + return subinterpIncompatible(def.Name) + } + return nil + } + + multi := MultiInterpSupported + if def.HasMultiInterpSlot { + multi = def.MultiInterp + } + switch { + case multi == MultiInterpNotSupported: + if checkMultiInterpExtensions(s) { + return subinterpIncompatible(def.Name) + } + case multi != MultiInterpPerInterpreterGIL && s.ownGil: + // Supported-but-not-per-interpreter-GIL: only rejected when the + // subinterpreter runs with its own GIL. + if checkMultiInterpExtensions(s) { + return subinterpIncompatible(def.Name) + } + } + return nil +} + +// subinterpIncompatible builds the ImportError the compat check raises. The +// message matches CPython byte-for-byte so the SubinterpImportTests' +// equality assertions on str(exc) pass. +// +// CPython: Python/import.c:1560 PyErr_Format(PyExc_ImportError, ...) +func subinterpIncompatible(name string) error { + return fmt.Errorf("ImportError: module %s does not support loading in subinterpreters", name) +} + +// extDef is gopy's analog of a single PyModuleDef instance: the unit the +// extensions cache and the modules_by_index table key on. Two registry +// entries that share a def (an init function that calls another's) point at +// the same extDef, so they share an m_index (PyState_FindModule / look_up_self) +// and, for a basic module, the cached m_copy. +// +// CPython: Include/internal/pycore_moduleobject.h PyModuleDef_Base +type extDef struct { + name string // m_name; the module's __name__, may differ from the import name + mSize int // PyModuleDef.m_size + index int // m_index into modules_by_index; 0 until assigned on first load +} + +// extCacheKey keys the extensions cache by (filename, name), exactly as +// _PyRuntime.imports.extensions does. +// +// CPython: Python/import.c:1379 _extensions_cache_set +type extCacheKey struct{ path, name string } + +// extCacheValue is the cached single-phase module record: its def, a shallow +// copy of the module __dict__ after the first load (m_copy, basic modules +// only), and the interpreter that owns the copy. +// +// CPython: Python/import.c:1024 struct extensions_cache_value +type extCacheValue struct { + def *extDef + mCopy *objects.Dict + interpid int64 +} + +var ( + extCacheMu sync.Mutex + extCache = map[extCacheKey]*extCacheValue{} + extDefs = map[string]*extDef{} // def name -> shared def + nextModIdx = 0 + modToDef = map[*objects.Module]*extDef{} // built module -> its def +) + +// defFor returns the shared extDef for a registered single-phase module, +// creating it on first use. Entries that name a ShareDefWith reuse the +// referenced module's def so they land in the same modules_by_index slot. +func defFor(def *ExtModuleDef) *extDef { + name := def.DefName + if name == "" { + name = def.Name + } + if def.ShareDefWith != "" { + if shared := FindExtModule(def.ShareDefWith); shared != nil { + sn := shared.DefName + if sn == "" { + sn = shared.Name + } + name = sn + } + } + if ed, ok := extDefs[name]; ok { + return ed + } + ed := &extDef{name: name, mSize: def.MSize} + extDefs[name] = ed + return ed +} + +// CreateExtModule dispatches _imp.create_dynamic to the extension registry. +// It mirrors Python/import.c import_run_extension: a cached single-phase +// module is reloaded from the cache, otherwise the init runs fresh behind the +// PEP 489 compat gate and (for single-phase modules) its result is recorded +// in the extensions cache. path is spec.origin, the extensions-cache key +// alongside name. The caller attaches __file__ / __spec__ / __loader__. +// +// found is false when name is not a registered gopy extension, letting the +// caller fall back to the "gopy cannot dlopen" ImportError. +// +// CPython: Python/import.c:2001 import_run_extension +func CreateExtModule(name, path string) (mod *objects.Module, found bool, err error) { + def := FindExtModule(name) + if def == nil { + return nil, false, nil + } + if !def.SinglePhase { + // Multi-phase modules apply the compat gate as part of + // PyModule_FromDefAndSpec2 (the create step) before the body runs. + if cerr := CheckExtSubinterpCompat(def); cerr != nil { + return nil, true, cerr + } + mod, err = def.Init() + if err != nil { + return nil, true, err + } + return mod, true, nil + } + + ed := func() *extDef { + extCacheMu.Lock() + defer extCacheMu.Unlock() + return defFor(def) + }() + + // import_find_extension: a cached single-phase module is reloaded without + // re-running its init. The *_check_cache_first variants are never cached. + // + // CPython: Python/import.c:1964 import_find_extension + if !def.CheckCacheFirst { + extCacheMu.Lock() + cached, ok := extCache[extCacheKey{path, name}] + extCacheMu.Unlock() + if ok { + return reloadSinglephase(def, ed, cached, name) + } + } + return runSinglephase(def, ed, name, path) +} + +// runSinglephase ports the fresh-load path: it runs the init (on the "main +// interpreter", before the compat gate), applies the subinterpreter compat +// check, then records the module in modules_by_index and the extensions +// cache. A failing init inside a subinterpreter takes the gh-144601 path. +// +// CPython: Python/import.c:2078 import_run_extension +func runSinglephase(def *ExtModuleDef, ed *extDef, name, path string) (*objects.Module, bool, error) { + inSubinterp := !currentInterp().isMain + mod, initErr := def.Init() + if initErr != nil { + if inSubinterp { + // gh-144601: the exception object can't be transferred across + // interpreters. Print it as an unraisable exception, then raise + // a different exception for the calling interpreter. + // + // CPython: Python/import.c:2156 PyErr_FormatUnraisable + if objects.WriteUnraisableHook != nil { + objects.WriteUnraisableHook(nil, "Exception while importing from subinterpreter", initErr) + } + // CPython: Python/import.c:2168 PyErr_SetString(PyExc_ImportError, ...) + return nil, true, fmt.Errorf("ImportError: failed to import from subinterpreter due to exception") + } + return nil, true, initErr + } + if cerr := CheckExtSubinterpCompat(def); cerr != nil { + return nil, true, cerr + } + + s := currentInterp() + extCacheMu.Lock() + if ed.index == 0 { + nextModIdx++ + ed.index = nextModIdx + } + modToDef[mod] = ed + // update_global_state_for_extension caches the def under the main + // interpreter or for any m_size == -1 module; a basic module also stores + // a shallow copy of its dict for later reloads. The *_check_cache_first + // variants are deliberately not cached. + // + // CPython: Python/import.c:1761 update_global_state_for_extension + if !def.CheckCacheFirst && (s.isMain || ed.mSize == -1) { + var mCopy *objects.Dict + if ed.mSize == -1 { + mCopy = snapshotDict(mod.Dict()) + } + extCache[extCacheKey{path, name}] = &extCacheValue{def: ed, mCopy: mCopy, interpid: s.id} + } + extCacheMu.Unlock() + + setModuleByIndex(s, ed.index, mod) + return mod, true, nil +} + +// reloadSinglephase ports reload_singlephase_extension: a basic module +// (m_size == -1) is rebuilt by copying its cached dict into a fresh module +// without re-running init (so its global initialized_count is unchanged); a +// module with state re-runs its init function. +// +// CPython: Python/import.c:1869 reload_singlephase_extension +func reloadSinglephase(def *ExtModuleDef, ed *extDef, cached *extCacheValue, name string) (*objects.Module, bool, error) { + // It may have been imported before in an interpreter that allows legacy + // modules but is barred in the current one. + if cerr := CheckExtSubinterpCompat(def); cerr != nil { + return nil, true, cerr + } + s := currentInterp() + if ed.mSize == -1 { + // import_add_module: reuse the existing sys.modules entry so the + // reloaded module is the same object, then PyDict_Update its dict + // from the cached copy without re-running init. + // + // CPython: Python/import.c:1884 import_add_module / PyDict_Update + mod, ok := GetModule(name) + if !ok { + mod = objects.NewModule(ed.name) + AddModule(name, mod) + } + dst := mod.Dict() + for _, k := range cached.mCopy.Keys() { + v, gerr := cached.mCopy.GetItem(k) + if gerr != nil { + return nil, true, gerr + } + if serr := dst.SetItem(k, v); serr != nil { + return nil, true, serr + } + } + extCacheMu.Lock() + modToDef[mod] = ed + extCacheMu.Unlock() + setModuleByIndex(s, ed.index, mod) + return mod, true, nil + } + // m_size >= 0: re-run the init function. + mod, err := def.Init() + if err != nil { + return nil, true, err + } + extCacheMu.Lock() + modToDef[mod] = ed + extCacheMu.Unlock() + setModuleByIndex(s, ed.index, mod) + return mod, true, nil +} + +// snapshotDict returns a shallow copy of d, the gopy analog of the m_copy +// the import machinery saves after a basic module is first loaded. +// +// CPython: Python/import.c:1140 fixup_cached_def (def->m_base.m_copy) +func snapshotDict(d *objects.Dict) *objects.Dict { + out := objects.NewDict() + for _, k := range d.Keys() { + if v, err := d.GetItem(k); err == nil { + _ = out.SetItem(k, v) + } + } + return out +} + +// setModuleByIndex records mod in the interpreter's modules_by_index table, +// the slot PyState_FindModule / look_up_self reads. +// +// CPython: Python/import.c:651 _modules_by_index_set +func setModuleByIndex(s *interpState, index int, mod *objects.Module) { + if index <= 0 { + return + } + interpMu.Lock() + if s.modByIndex == nil { + s.modByIndex = map[int]*objects.Module{} + } + s.modByIndex[index] = mod + interpMu.Unlock() +} + +// ModuleSelf returns the module currently cached in modules_by_index for the +// def mod belongs to, the value PyState_FindModule(def) yields. It backs the +// test extension's look_up_self() method. +// +// CPython: Modules/_testsinglephase.c:374 common_look_up_self (PyState_FindModule) +func ModuleSelf(mod *objects.Module) objects.Object { + extCacheMu.Lock() + ed := modToDef[mod] + extCacheMu.Unlock() + if ed == nil || ed.index == 0 { + return objects.None() + } + s := currentInterp() + interpMu.Lock() + found := s.modByIndex[ed.index] + interpMu.Unlock() + if found == nil { + return objects.None() + } + return found +} + +// ClearExtension clears the internally cached data for a single-phase +// extension: its modules_by_index slot, the cached def's m_index/m_copy, and +// the extensions-cache entry. It backs _testinternalcapi.clear_extension. +// +// CPython: Python/import.c:903 _PyImport_ClearExtension +// +// (Python/import.c:2241 clear_singlephase_extension) +func ClearExtension(name, path string) error { + extCacheMu.Lock() + cached, ok := extCache[extCacheKey{path, name}] + if !ok { + extCacheMu.Unlock() + return nil + } + ed := cached.def + index := ed.index + ed.index = 0 + delete(extCache, extCacheKey{path, name}) + extCacheMu.Unlock() + + if index > 0 { + s := currentInterp() + interpMu.Lock() + delete(s.modByIndex, index) + interpMu.Unlock() + } + return nil +} + +// extensionSuffix is the file suffix gopy advertises for its +// (Go-implemented) extension modules. CPython derives it from the ABI tag +// and platform triple; gopy keeps the shape ("..so") so __file__ reads +// like a real extension path and the ExtensionFileLoader path hook matches. +// +// CPython: Lib/importlib/_bootstrap_external.py:_get_supported_file_loaders +func extensionSuffix() string { + return fmt.Sprintf(".gopy-314-%s-%s.so", runtime.GOOS, runtime.GOARCH) +} + +// ExtensionSuffixes returns the extension-module suffixes _imp.extension_suffixes +// reports. A single gopy suffix is enough for the test extensions. +// +// CPython: Python/import.c:4807 _imp_extension_suffixes_impl +func ExtensionSuffixes() []string { + return []string{extensionSuffix()} +} + +var ( + extDirMu sync.Mutex + extDirVal string +) + +// SetExtensionDir records the directory the materialized extension stub +// files live in (the gopy analog of CPython's lib-dynload). The path +// finder discovers the stubs there and ExtensionOrigin reports __file__ +// against it. +func SetExtensionDir(dir string) { + extDirMu.Lock() + extDirVal = dir + extDirMu.Unlock() +} + +func extensionDir() string { + extDirMu.Lock() + defer extDirMu.Unlock() + return extDirVal +} + +// ExtensionOrigin synthesizes the __file__ path for a Go-implemented +// extension: /, the location a compiled extension +// would occupy. When the extension dir is unset the bare filename is +// returned. +func ExtensionOrigin(name string) string { + suffix := extensionSuffix() + if dir := extensionDir(); dir != "" { + return filepath.Join(dir, name+suffix) + } + return name + suffix +} + +// MaterializeExtensions writes an empty stub file into dir +// for every registered extension module, the gopy stand-in for the +// compiled .so files CPython ships in lib-dynload. The real Python +// PathFinder -> FileFinder discovers these by suffix and hands them to +// ExtensionFileLoader, whose create_module calls _imp.create_dynamic -> +// CreateExtModule. The stub bytes are never read; the Go registry holds +// the actual module body. dir is recorded as the extension dir. +func MaterializeExtensions(dir string) error { + if err := os.MkdirAll(dir, 0o750); err != nil { + return err + } + suffix := extensionSuffix() + for _, name := range ExtModuleNames() { + p := filepath.Join(dir, name+suffix) + if _, err := os.Stat(p); err == nil { + continue + } + if err := os.WriteFile(p, nil, 0o600); err != nil { + return err + } + } + SetExtensionDir(dir) + return nil +} diff --git a/imp/frozen.go b/imp/frozen.go index 134d338f6..add356b42 100644 --- a/imp/frozen.go +++ b/imp/frozen.go @@ -9,7 +9,9 @@ package imp import ( + "errors" "sync" + "sync/atomic" "github.com/tamnd/gopy/objects" ) @@ -22,17 +24,137 @@ import ( type FrozenModule struct { // Name is the dotted module name, e.g. "importlib._bootstrap". Name string - // Code is the precompiled code object. nil for placeholder entries. + // Code is the precompiled code object. nil for placeholder entries + // and for source-backed entries (compiled lazily from Source). Code *objects.Code + // Source is the canonical .py source for entries whose bytecode is + // produced lazily by FrozenCompiler rather than pre-embedded. This + // stands in for CPython's marshaled frozen blob: gopy stores the + // source text (vendored verbatim) and compiles it on first use. + Source string + // OrigName is the name find_frozen reports for the entry. Frozen + // aliases (e.g. __phello_alias__ -> __hello__) point at a different + // source module; FrozenImporter._resolve_filename keys the on-disk + // __file__ off this. Empty means the entry is its own origin. + // + // CPython: Python/frozen.c _PyImport_FrozenAliases + OrigName string + // OrigNone marks an alias entry whose alias target is NULL, so + // find_frozen reports origname None (e.g. __hello_only__). It + // overrides OrigName/Name when reporting the origin. + // + // CPython: Python/frozen.c:123 aliases {"__hello_only__", NULL} + OrigNone bool + // Embedded marks a genuinely frozen entry that always yields a code + // object, even when Source is empty (e.g. the empty __phello__.ham + // package __init__). CPython freezes these as real, non-empty + // marshaled code; gopy compiles the (possibly empty) Source on demand. + Embedded bool // IsPackage is true when the frozen module is a package (has __path__). IsPackage bool + + compileMu sync.Mutex + compiled *objects.Code + compileErr error + didCompile bool +} + +// FrozenCompiler turns frozen module source into a code object. It is +// installed once at interpreter startup (cmd/gopy wires gopyCompile) +// so the imp package need not depend on parser/compile directly, +// mirroring the SourceCompiler indirection used for path imports. +// +// CPython: Python/pythonrun.c:1102 Py_CompileStringExFlags +var FrozenCompiler func(src []byte, filename string) (*objects.Code, error) + +// CodeObject returns the entry's code object, compiling Source on first +// use. It returns (nil, nil) for a pure placeholder (no Code, no +// Source). The compiled result is cached so repeated imports reuse one +// code object, matching CPython's single marshaled blob per entry. +func (m *FrozenModule) CodeObject() (*objects.Code, error) { + if m.Code != nil { + return m.Code, nil + } + if m.Source == "" && !m.Embedded { + return nil, nil + } + m.compileMu.Lock() + defer m.compileMu.Unlock() + if m.didCompile { + return m.compiled, m.compileErr + } + m.didCompile = true + if FrozenCompiler == nil { + m.compileErr = errors.New("imp: frozen compiler not installed") + return nil, m.compileErr + } + m.compiled, m.compileErr = FrozenCompiler([]byte(m.Source), "") + return m.compiled, m.compileErr +} + +// HasCode reports whether the entry can yield a code object, either +// pre-embedded or compilable from Source. Placeholder entries (the +// importlib bootstrap stubs, which gopy loads from disk) return false. +func (m *FrozenModule) HasCode() bool { + return m.Code != nil || m.Source != "" || m.Embedded +} + +// Origin returns the name find_frozen reports for the entry and whether +// that origin is None. CPython seeds origname with the entry's own name, +// then resolve_module_alias overrides it for alias entries (possibly to +// NULL). _imp.find_frozen reports None when the resolved origname is +// NULL or empty. +// +// CPython: Python/import.c:3052 find_frozen (origname seed + alias) +// CPython: Python/import.c:4533 _imp_find_frozen_impl (NULL/empty -> None) +func (m *FrozenModule) Origin() (string, bool) { + if m.OrigNone { + return "", true + } + if m.OrigName != "" { + return m.OrigName, false + } + return m.Name, false } var ( frozenMu sync.RWMutex frozenModules = map[string]*FrozenModule{} + + // frozenOverride mirrors PyConfig.use_frozen_modules under the test + // override: >0 forces frozen on, <0 forces it off, 0 uses the + // default. test.support.import_helper toggles it via + // _imp._override_frozen_modules_for_tests. + // + // CPython: Python/import.c:2821 use_frozen + frozenOverride atomic.Int32 ) +// SetFrozenOverride records the test override for frozen-module lookup +// and returns the previous value. +// +// CPython: Python/import.c:5034 _imp__override_frozen_modules_for_tests_impl +func SetFrozenOverride(v int) int { + return int(frozenOverride.Swap(int32(v))) +} + +// UseFrozen reports whether frozen-module lookup is currently enabled. +// gopy's default (override 0) is on, matching CPython's release-build +// PyConfig.use_frozen_modules default; entries without embedded code +// still fall through to the path finder via HasCode. +// +// CPython: Python/import.c:2821 use_frozen +func UseFrozen() bool { + switch v := frozenOverride.Load(); { + case v > 0: + return true + case v < 0: + return false + default: + return true + } +} + // RegisterFrozen adds or replaces a frozen module in the table. It is // safe to call from multiple goroutines and from init(). // diff --git a/imp/frozen_bootstrap.go b/imp/frozen_bootstrap.go index 0990133c4..a92deb468 100644 --- a/imp/frozen_bootstrap.go +++ b/imp/frozen_bootstrap.go @@ -11,19 +11,26 @@ package imp func init() { - // _frozen_importlib — Lib/importlib/_bootstrap.py - // CPython: Python/frozen.c:L56 + // _frozen_importlib — Lib/importlib/_bootstrap.py. gopy loads the + // bootstrap from disk at startup and caches it in sys.modules, so this + // frozen code is never executed; it exists so FrozenImporter.find_spec + // reports the module with origname "importlib._bootstrap", matching + // the build-time frozen alias. + // + // CPython: Python/frozen.c:70 bootstrap_modules / :116 aliases RegisterFrozen(&FrozenModule{ Name: "_frozen_importlib", - Code: nil, + Embedded: true, + OrigName: "importlib._bootstrap", IsPackage: false, }) // _frozen_importlib_external — Lib/importlib/_bootstrap_external.py - // CPython: Python/frozen.c:L63 + // CPython: Python/frozen.c:71 bootstrap_modules / :117 aliases RegisterFrozen(&FrozenModule{ Name: "_frozen_importlib_external", - Code: nil, + Embedded: true, + OrigName: "importlib._bootstrap_external", IsPackage: false, }) diff --git a/imp/frozen_test_modules.go b/imp/frozen_test_modules.go new file mode 100644 index 000000000..2f4c0daca --- /dev/null +++ b/imp/frozen_test_modules.go @@ -0,0 +1,86 @@ +// Frozen test-module registrations. CPython compiles a handful of toy +// modules (__hello__, __phello__ and friends) into the interpreter so +// the import machinery has frozen targets to exercise without touching +// the filesystem. test_frozen and the importlib frozen tests import +// them through FrozenImporter. +// +// gopy keeps the source text (vendored verbatim from CPython's Lib/) +// rather than a marshaled blob and compiles it lazily via +// FrozenCompiler. The same modules are also vendored on disk under the +// stdlib root so the "frozen disabled" code paths can load them through +// the path finder, exactly as CPython ships Lib/__hello__.py alongside +// the frozen copy. +// +// CPython: Python/frozen.c:98 _PyImport_FrozenModules test entries +package imp + +// Canonical source for the frozen test modules. These mirror +// Lib/__hello__.py and the Lib/__phello__/ package byte-for-byte. +// +// CPython: Lib/__hello__.py +const frozenHelloSource = `initialized = True + +class TestFrozenUtf8_1: + """\u00b6""" + +class TestFrozenUtf8_2: + """\u03c0""" + +class TestFrozenUtf8_4: + """\U0001f600""" + +def main(): + print("Hello world!") + +if __name__ == '__main__': + main() +` + +// CPython: Lib/__phello__/__init__.py and Lib/__phello__/spam.py (same body) +const frozenPhelloSource = `initialized = True + +def main(): + print("Hello world!") + +if __name__ == '__main__': + main() +` + +// frozenOnlySource is the body frozen as __hello_only__. CPython freezes +// it from Tools/freeze/flag.py, which has no on-disk stdlib copy, so the +// alias table records a NULL origin (loader_state.filename stays None). +// +// CPython: Tools/freeze/flag.py +const frozenOnlySource = `initialized = True +print("Hello world!") +` + +func init() { + // __hello__ and its aliases share one source module; the alias + // entries report __hello__ as their origin so FrozenImporter resolves + // the on-disk __file__ against Lib/__hello__.py. + // + // CPython: Python/frozen.c:96 test_modules / :114 aliases + RegisterFrozen(&FrozenModule{Name: "__hello__", Source: frozenHelloSource}) + RegisterFrozen(&FrozenModule{Name: "__hello_alias__", Source: frozenHelloSource, OrigName: "__hello__"}) + RegisterFrozen(&FrozenModule{Name: "__phello_alias__", Source: frozenHelloSource, OrigName: "__hello__", IsPackage: true}) + RegisterFrozen(&FrozenModule{Name: "__phello_alias__.spam", Source: frozenHelloSource, OrigName: "__hello__"}) + + // __phello__ is a real frozen package. Its __init__ alias reports + // the synthetic "<__phello__" origin (the leading "<" tells + // FrozenImporter._resolve_filename to map it to the package __init__). + // + // CPython: Python/frozen.c:100-107 test_modules / :121 aliases + RegisterFrozen(&FrozenModule{Name: "__phello__", Source: frozenPhelloSource, IsPackage: true}) + RegisterFrozen(&FrozenModule{Name: "__phello__.__init__", Source: frozenPhelloSource, OrigName: "<__phello__"}) + RegisterFrozen(&FrozenModule{Name: "__phello__.ham", Embedded: true, IsPackage: true}) + RegisterFrozen(&FrozenModule{Name: "__phello__.ham.__init__", Embedded: true, OrigName: "<__phello__.ham"}) + RegisterFrozen(&FrozenModule{Name: "__phello__.ham.eggs", Embedded: true}) + RegisterFrozen(&FrozenModule{Name: "__phello__.spam", Source: frozenPhelloSource}) + + // __hello_only__ is frozen-only (no stdlib source), so its alias + // origin is NULL and find_frozen reports origname None. + // + // CPython: Python/frozen.c:108 test_modules / :123 aliases + RegisterFrozen(&FrozenModule{Name: "__hello_only__", Source: frozenOnlySource, OrigNone: true}) +} diff --git a/imp/import.go b/imp/import.go index 27faf852c..055601fad 100644 --- a/imp/import.go +++ b/imp/import.go @@ -18,6 +18,34 @@ import ( // ErrModuleNotFound is returned when no finder can locate the named module. var ErrModuleNotFound = fmt.Errorf("imp: ModuleNotFoundError") +// ErrBlockedNone tags the case where sys.modules[name] is None, the +// sentinel test.support.import_helper.import_fresh_module installs to block +// a module. CPython's _bootstrap raises ModuleNotFoundError(f'import of +// {name} halted; None in sys.modules', name=name); the `name` member is what +// importlib/abc.py inspects (`except ImportError as exc: if exc.name != ...`), +// so the VM must synthesize a typed error carrying it rather than a bare +// ImportError. It wraps ErrModuleNotFound so existing not-found checks match. +// +// CPython: Lib/importlib/_bootstrap.py:1387 _find_and_load (None sentinel) +var ErrBlockedNone = fmt.Errorf("%w: blocked None in sys.modules", ErrModuleNotFound) + +// ImportWarnHook routes an ImportWarning through the live _warnings +// machinery so it walks the filter list and any recording context +// manager (catch_warnings / assertWarns). It is nil until module +// _warnings wires it during init; the imp package cannot import +// _warnings directly because _warnings imports imp. +// +// CPython: Lib/importlib/_bootstrap.py:1353 _warnings.warn(msg, ImportWarning) +var ImportWarnHook func(message string) error + +// ErrModuleExecFailed tags a load failure that happened while executing a +// located module's body (rather than failing to locate it). The real Python +// exception is already live on the thread state with its own traceback, so +// the import opcode must propagate it instead of synthesizing a fresh +// ModuleNotFoundError. A nested `import missing` inside the body wraps +// ErrModuleNotFound, so callers check this sentinel first. +var ErrModuleExecFailed = fmt.Errorf("imp: module body raised") + // ImportModule performs an absolute import of name. It is the // zero-level convenience wrapper around ImportModuleLevel. // @@ -26,6 +54,32 @@ func ImportModule(exec Executor, name string) (*objects.Module, error) { return ImportModuleLevel(exec, name, "", 0) } +// ImportModuleLevelObject imports name relative to pkgname at the given +// level and returns whatever sys.modules holds, which need not be a +// module: a test (or pathological code) can inject an arbitrary object +// under a name, and CPython's import returns it unchanged so the +// IMPORT_FROM / _handle_fromlist that follows operates through plain +// attribute access. Normal imports always yield a real module, in which +// case this behaves exactly like ImportModuleLevel. +// +// CPython: Python/import.c:1561 PyImport_ImportModuleLevelObject +func ImportModuleLevelObject(exec Executor, name, pkgname string, level int) (objects.Object, error) { + absName, err := resolveAbsName(name, pkgname, level) + if err != nil { + return nil, err + } + if raw, present := GetModuleRaw(absName); present { + if objects.IsNone(raw) { + return nil, fmt.Errorf("%w: %q", ErrBlockedNone, absName) + } + if _, ok := raw.(*objects.Module); !ok { + // A non-module cached entry: return it verbatim. + return raw, nil + } + } + return ImportModuleLevel(exec, name, pkgname, level) +} + // ImportModuleLevel imports name relative to pkgname at the given // level. level=0 is an absolute import; level>0 is relative. // @@ -55,13 +109,27 @@ func ImportModuleLevel(exec Executor, name, pkgname string, level int) (*objects // CPython: Python/import.c:L1613 sys_modules_get_dict if raw, present := GetModuleRaw(absName); present { if objects.IsNone(raw) { - return nil, fmt.Errorf("ImportError: import of %q halted; None in sys.modules", absName) + return nil, fmt.Errorf("%w: %q", ErrBlockedNone, absName) } if mod, ok := raw.(*objects.Module); ok { return mod, nil } } + // 1b. Custom sys.meta_path finders. CPython's _find_spec walks + // sys.meta_path in order; the BuiltinImporter, FrozenImporter and + // PathFinder entries are realized by the Go steps below, so here we + // consult only the additional finders a program (or a test) inserts. + // A finder inserted at meta_path[0] therefore wins over the built-in + // and frozen lookups, matching CPython's ordering. + // + // CPython: Lib/importlib/_bootstrap.py:912 _find_spec + if mod, found, err := metaPathFind(exec, absName); err != nil { + return nil, err + } else if found { + return mod, nil + } + // 2. Frozen module. // CPython: Python/import.c:L1632 import_find_and_load if fm, ok := FindFrozen(absName); ok && fm.Code != nil { @@ -84,9 +152,37 @@ func ImportModuleLevel(exec Executor, name, pkgname string, level int) (*objects // CPython: Objects/moduleobject.c:606 PyModule_AddFunctions mod.StampBuiltinModule() AddModule(absName, mod) + // CPython's BuiltinImporter sets __spec__/__loader__ on every + // built-in module; gopy's inittab path mirrors that so tools + // (pyclbr, runpy, inspect) that read module.__spec__ work. + // + // CPython: Lib/importlib/_bootstrap.py:736 BuiltinImporter.exec_module + AttachBuiltinSpec(exec, mod, absName) return mod, nil } + // 3b. Go-implemented C extension (the test-extension registry). CPython + // reaches these through PathFinder -> ExtensionFileLoader after a + // lib-dynload `.so` matches; gopy ports the extension as a Go builtin + // registered by name and builds it via the same create_dynamic compat + // gate, then attaches the ExtensionFileLoader spec so module.__spec__ + // reads like a real extension. + // + // CPython: Python/import.c:2001 import_run_extension + if ext := FindExtModule(absName); ext != nil { + mod, found, eerr := CreateExtModule(absName, ExtensionOrigin(absName)) + if eerr != nil { + return nil, eerr + } + if found { + AddModule(absName, mod) + AttachExtensionSpec(exec, mod, absName, ExtensionOrigin(absName)) + parent, tail := splitParent(absName) + bindOnParent(parent, tail, mod) + return mod, nil + } + } + // 4. Path-based finder (sys.path). // CPython: Lib/importlib/_bootstrap_external.py:1284 PathFinder.find_spec // @@ -107,6 +203,88 @@ func ImportModuleLevel(exec Executor, name, pkgname string, level int) (*objects return nil, fmt.Errorf("%w: No module named %q", ErrModuleNotFound, absName) } +// metaPathFind consults the custom finders on sys.meta_path for absName. +// It skips the BuiltinImporter, FrozenImporter and PathFinder entries +// (identified by their class __name__), which gopy realizes in Go, and +// calls find_spec(name, path, None) on every other finder. The first +// finder that returns a spec drives loadFromSpec; a None return means the +// finder declined and the walk continues. +// +// CPython: Lib/importlib/_bootstrap.py:912 _find_spec +func metaPathFind(exec Executor, absName string) (*objects.Module, bool, error) { + sysMod, ok := GetModule("sys") + if !ok { + return nil, false, nil + } + mpObj, _ := sysMod.Dict().GetItem(objects.NewStr("meta_path")) + mp, _ := mpObj.(*objects.List) + if mp == nil || mp.Len() == 0 { + return nil, false, nil + } + // The parent package's __path__ becomes the `path` argument for a + // submodule import, mirroring _find_and_load's parent.__path__ read. + // + // CPython: Lib/importlib/_bootstrap.py:1227 path = parent_module.__path__ + pathArg := objects.None() + if parent, _ := splitParent(absName); parent != "" { + if pm, ok := GetModule(parent); ok { + if pp, err := pm.Dict().GetItem(objects.NewStr("__path__")); err == nil && pp != nil { + pathArg = pp + } + } + } + nameObj := objects.NewStr(absName) + for i := 0; i < mp.Len(); i++ { + finder := mp.Item(i) + if isBuiltinFinder(finder) { + continue + } + findSpec, err := objects.GetAttr(finder, objects.NewStr("find_spec")) + if err != nil { + // A legacy finder without find_spec does not participate; + // CPython's _find_spec skips it the same way. + continue + } + spec, err := objects.Call(findSpec, objects.NewTuple([]objects.Object{nameObj, pathArg, objects.None()}), nil) + if err != nil { + return nil, false, err + } + if spec == nil || objects.IsNone(spec) { + continue + } + mod, err := loadFromSpec(exec, absName, spec) + if err != nil { + return nil, false, err + } + parent, tail := splitParent(absName) + bindOnParent(parent, tail, mod) + return mod, true, nil + } + return nil, false, nil +} + +// isBuiltinFinder reports whether finder is one of the three importers +// gopy realizes in Go (BuiltinImporter, FrozenImporter, PathFinder). +// Those are class objects exposing __name__; the custom finders programs +// install on meta_path are instances that do not. +// +// CPython: Lib/importlib/_bootstrap.py:736 BuiltinImporter / :976 PathFinder +func isBuiltinFinder(finder objects.Object) bool { + nameAttr, err := objects.GetAttr(finder, objects.NewStr("__name__")) + if err != nil { + return false + } + name, ok := nameAttr.(*objects.Unicode) + if !ok { + return false + } + switch name.Value() { + case "BuiltinImporter", "FrozenImporter", "PathFinder", "WindowsRegistryFinder": + return true + } + return false +} + // resolveAbsName converts a relative import (level > 0) to an // absolute module name using pkgname as the anchor. // diff --git a/imp/inittab.go b/imp/inittab.go index fc207f130..b4c57511f 100644 --- a/imp/inittab.go +++ b/imp/inittab.go @@ -64,6 +64,43 @@ func ExtendInittab(entries []InittabEntry) error { return nil } +// shadowedByStdlib lists inittab names that CPython ships as pure-Python +// stdlib modules (.py files on sys.path), so they never appear in +// CPython's PyImport_Inittab. gopy keeps a Go implementation in the +// inittab as an early-bootstrap import shortcut, but the live import +// machinery must treat them as not-built-in: BuiltinImporter declines +// them and PathFinder loads the vendored source, so e.g. +// 'fnmatch' in sys.builtin_module_names stays False as on a normal +// CPython build, and is_builtin agrees with builtin_module_names. +var shadowedByStdlib = map[string]bool{ + "os": true, + "warnings": true, + "dataclasses": true, + "difflib": true, + "fnmatch": true, +} + +// ShadowedByStdlib reports whether name is registered in the inittab only +// as a bootstrap shortcut while CPython ships it as pure-Python stdlib, +// so it must be reported as not-built-in by is_builtin and excluded from +// sys.builtin_module_names. +func ShadowedByStdlib(name string) bool { + return shadowedByStdlib[name] +} + +// IsBuiltinName reports whether name resolves to a statically linked +// built-in module, the membership test behind both _imp.is_builtin and +// sys.builtin_module_names. Names shadowed by a pure-Python stdlib module +// are excluded so they load from source the way they do on CPython. +// +// CPython: Python/import.c:4720 _imp_is_builtin_impl +func IsBuiltinName(name string) bool { + if shadowedByStdlib[name] { + return false + } + return FindInitFunc(name) != nil +} + // FindInitFunc returns the InitFunc registered for name, or nil if the // module is not in the built-in table. // diff --git a/imp/pathfinder.go b/imp/pathfinder.go index bdcc730df..2e15320a7 100644 --- a/imp/pathfinder.go +++ b/imp/pathfinder.go @@ -19,12 +19,16 @@ package imp import ( + "errors" "fmt" "os" "path/filepath" + "runtime" "strings" "sync" + pyerrors "github.com/tamnd/gopy/errors" + "github.com/tamnd/gopy/marshal" "github.com/tamnd/gopy/objects" ) @@ -105,65 +109,417 @@ func (p *PathFinder) FindModule(exec Executor, name string) (*objects.Module, er // CPython: Lib/importlib/_bootstrap.py:1227 _find_and_load pm, err := ImportModuleLevel(exec, parent, "", 0) if err != nil { + // A parent that was located but raised while executing its + // __init__ must surface that exception verbatim (CPython + // propagates it from _find_and_load), so do not relabel it + // as a finder miss. Only a genuine parent-not-found is a + // miss the child lookup can recover from. + // + // CPython: Lib/importlib/_bootstrap.py:1227 _find_and_load + if errors.Is(err, ErrModuleExecFailed) || !errors.Is(err, ErrModuleNotFound) { + return nil, err + } return nil, fmt.Errorf("%w: parent package %q: %w", errFinderMiss, parent, err) } parentMod = pm } + // Importing the parent package may have imported this child as a side + // effect (e.g. the parent's __init__ ran `from .child import ...`), + // caching it in sys.modules and possibly rebinding the parent's + // attribute to something other than the submodule. In that case CPython + // returns the already-cached child and never reloads or re-binds it, so + // the parent's rebinding survives. + // + // CPython: Lib/importlib/_bootstrap.py:1290 _find_and_load_unlocked + if cached, ok := GetModule(name); ok { + return cached, nil + } paths, err := readPackagePath(parentMod) if err != nil { return nil, err } search = paths + + // Track this child on the parent spec for the duration of the load so a + // circular import that does getattr(parent, tail) before tail finishes + // loading gets the "cannot access submodule" diagnostic. + // + // CPython: Lib/importlib/_bootstrap.py:1340 parent_spec._uninitialized_submodules.append(child) + pop := pushUninitializedSubmodule(parentMod, tail) + defer pop() } + // PEP 420: a directory matching the tail with no __init__.py and no + // flat-file match is a namespace portion. CPython's PathFinder + // accumulates portions across every path entry and, only after no + // regular module is found anywhere, builds a namespace package whose + // __path__ is the collected portions. + // + // CPython: Lib/importlib/_bootstrap_external.py:1430 FileFinder.find_spec + // (namespace portion path) / Lib/importlib/_bootstrap.py:1167 PathFinder + var namespacePortions []string for _, entry := range search { - dir := entry - if dir == "" { - dir = "." - } - // Package case: //__init__.py. - // CPython: Lib/importlib/_bootstrap_external.py:1378 cache_module in cache - pkgDir := filepath.Join(dir, tail) - pkgInit := filepath.Join(pkgDir, "__init__.py") - if isFile(pkgInit) { - mod, err := loadAsPackage(exec, p.Compiler, pkgInit, pkgDir, name) - if err != nil { - return nil, err - } - bindOnParent(parent, tail, mod) - return mod, nil + mod, err := p.scanEntry(exec, entry, name, parent, tail, &namespacePortions) + if err != nil { + return nil, err } - // Module case: /.py. - // CPython: Lib/importlib/_bootstrap_external.py:1391 suffix loop - modFile := filepath.Join(dir, tail+".py") - if isFile(modFile) { - mod, err := loadAsModule(exec, p.Compiler, modFile, name, parent) - if err != nil { - return nil, err - } - bindOnParent(parent, tail, mod) + if mod != nil { return mod, nil } } + if len(namespacePortions) > 0 { + mod := loadAsNamespace(exec, name, parent, namespacePortions) + bindOnParent(parent, tail, mod) + return mod, nil + } return nil, fmt.Errorf("%w: %s", errFinderMiss, name) } +// scanEntry searches one sys.path entry for name. It returns (mod, nil) +// when the module was found and loaded, (nil, nil) when this entry did +// not match (FindModule should keep scanning), or (nil, err) on a load +// failure that must propagate. A PEP 420 namespace portion contributed +// by this entry is appended to *namespacePortions, leaving the module +// unresolved so the caller can fall back to a namespace package. +// +// CPython: Lib/importlib/_bootstrap_external.py:1357 FileFinder.find_spec +func (p *PathFinder) scanEntry(exec Executor, entry, name, parent, tail string, namespacePortions *[]string) (*objects.Module, error) { + dir := entry + if dir == "" { + dir = "." + } + // spec_from_file_location runs the resolved location through + // _path_abspath, so every __file__, __path__ and __cached__ a + // path-based import produces is absolute even when the sys.path + // entry is relative ('', '.', or a relative directory). Absolutize + // the directory up front so the file paths joined below, the + // bytecode-cache path, and the spec origin all agree and match + // CPython's absolute strings. + // + // CPython: Lib/importlib/_bootstrap_external.py:782 spec_from_file_location (_path_abspath) + if abs, err := filepath.Abs(dir); err == nil { + dir = abs + } + // A sys.path entry that is not a directory (a zip archive, or a + // path that points inside one) is handled by a custom importer + // registered on sys.path_hooks, exactly as CPython's PathFinder + // routes such entries through zipimport.zipimporter. Only consult + // the hooks for non-directories so the directory scan below stays + // the fast path for the common case. + // + // CPython: Lib/importlib/_bootstrap_external.py:1236 _path_importer_cache + if !isDir(dir) { + spec, handled, herr := pathHookSpec(exec, entry, name) + if herr != nil { + return nil, herr + } + if !handled { + return nil, nil + } + // A namespace spec from the importer (loader None, search + // locations set) is a PEP 420 portion: collect it and keep + // scanning, exactly as CPython's PathFinder extends + // namespace_path instead of returning. A spec with a real + // loader is a concrete module, so load and return it. + // + // CPython: Lib/importlib/_bootstrap_external.py:1284 PathFinder._get_spec + if portions, isNS := namespacePortionsOf(spec); isNS { + *namespacePortions = append(*namespacePortions, portions...) + return nil, nil + } + mod, lerr := loadFromSpec(exec, name, spec) + if lerr != nil { + return nil, lerr + } + bindOnParent(parent, tail, mod) + return mod, nil + } + return p.scanDir(exec, dir, name, parent, tail, namespacePortions) +} + +// scanDir searches a single directory sys.path entry for name, trying +// the source package, sourceless package, source module, and sourceless +// module loaders in CPython's suffix order. It returns the loaded module, +// (nil, nil) for a miss, or (nil, err) on a load failure. A bare package +// directory with no loadable __init__ is recorded as a PEP 420 portion. +// +// CPython: Lib/importlib/_bootstrap_external.py:1391 FileFinder suffix loop +func (p *PathFinder) scanDir(exec Executor, dir, name, parent, tail string, namespacePortions *[]string) (*objects.Module, error) { + pkgDir := filepath.Join(dir, tail) + // (suffix file, loader) tried in CPython's order: source package, + // sourceless package, source module, sourceless module. + loaders := []struct { + file string + base string // case-sensitivity check target + load func() (*objects.Module, error) + }{ + {filepath.Join(pkgDir, "__init__.py"), pkgDir, func() (*objects.Module, error) { + return loadAsPackage(exec, p.Compiler, filepath.Join(pkgDir, "__init__.py"), pkgDir, name) + }}, + {filepath.Join(pkgDir, "__init__.pyc"), pkgDir, func() (*objects.Module, error) { + return loadAsPackageBytecode(exec, filepath.Join(pkgDir, "__init__.pyc"), pkgDir, name) + }}, + {filepath.Join(dir, tail+".py"), filepath.Join(dir, tail+".py"), func() (*objects.Module, error) { + return loadAsModule(exec, p.Compiler, filepath.Join(dir, tail+".py"), name, parent) + }}, + {filepath.Join(dir, tail+".pyc"), filepath.Join(dir, tail+".pyc"), func() (*objects.Module, error) { + return loadAsModuleBytecode(exec, filepath.Join(dir, tail+".pyc"), name, parent) + }}, + } + for _, l := range loaders { + if !isFile(l.file) || !caseOK(l.base) { + continue + } + mod, err := l.load() + if err != nil { + return nil, err + } + bindOnParent(parent, tail, mod) + return mod, nil + } + if isDir(pkgDir) && caseOK(pkgDir) { + *namespacePortions = append(*namespacePortions, pkgDir) + } + return nil, nil +} + +// pathHookSpec consults sys.path_hooks for a custom importer able to load +// modules out of entry (zipimport.zipimporter for a .zip archive) and asks +// that importer for name's spec. +// +// handled is false when no hook claims entry, or when the importer claims +// entry but has no spec for name, so FindModule keeps scanning the +// remaining path entries. herr carries a find_spec failure that must +// propagate. The spec is returned unloaded so FindModule can tell a +// concrete module apart from a PEP 420 namespace portion. +// +// CPython: Lib/importlib/_bootstrap_external.py:1284 PathFinder._get_spec +func pathHookSpec(exec Executor, entry, name string) (spec objects.Object, handled bool, herr error) { + _ = exec + importer, ok := pathHookImporter(entry) + if !ok { + return nil, false, nil + } + findSpec, err := objects.GetAttr(importer, objects.NewStr("find_spec")) + if err != nil { + return nil, false, err + } + s, err := objects.Call(findSpec, objects.NewTuple([]objects.Object{objects.NewStr(name)}), nil) + if err != nil { + return nil, true, err + } + if s == nil || objects.IsNone(s) { + // The archive exists but does not contain name: a miss, not an + // error. CPython's PathFinder moves on to the next path entry. + return nil, false, nil + } + return s, true, nil +} + +// namespacePortionsOf reports whether spec is a PEP 420 namespace spec +// (loader None) and, if so, returns its submodule_search_locations as a +// slice of strings. A spec with a real loader is a concrete module and +// returns isNS false. +// +// CPython: Lib/importlib/_bootstrap_external.py:1284 PathFinder._get_spec +// (spec.submodule_search_locations / namespace_path extension) +func namespacePortionsOf(spec objects.Object) (portions []string, isNS bool) { + loader, err := objects.GetAttr(spec, objects.NewStr("loader")) + if err != nil || !objects.IsNone(loader) { + return nil, false + } + ssl, err := objects.GetAttr(spec, objects.NewStr("submodule_search_locations")) + if err != nil || ssl == nil || objects.IsNone(ssl) { + return nil, false + } + switch v := ssl.(type) { + case *objects.List: + for i := 0; i < v.Len(); i++ { + if s, ok := v.Item(i).(*objects.Unicode); ok { + portions = append(portions, s.Value()) + } + } + case *objects.Tuple: + for i := 0; i < v.Len(); i++ { + if s, ok := v.Item(i).(*objects.Unicode); ok { + portions = append(portions, s.Value()) + } + } + } + return portions, true +} + +// pathHookImporter returns the importer object responsible for entry, +// consulting sys.path_importer_cache first and then sys.path_hooks. A +// hook that raises (ImportError) declines entry, so the next hook is +// tried; when none claim it the result is cached as None and ok is false. +// +// CPython: Lib/importlib/_bootstrap_external.py:1236 PathFinder._path_importer_cache +func pathHookImporter(entry string) (objects.Object, bool) { + sysMod, ok := GetModule("sys") + if !ok { + return nil, false + } + key := objects.NewStr(entry) + cacheObj, _ := sysMod.Dict().GetItem(objects.NewStr("path_importer_cache")) + cache, _ := cacheObj.(*objects.Dict) + if cache != nil { + if v, err := cache.GetItem(key); err == nil && v != nil { + if objects.IsNone(v) { + return nil, false + } + return v, true + } + } + hooksObj, _ := sysMod.Dict().GetItem(objects.NewStr("path_hooks")) + hooks, _ := hooksObj.(*objects.List) + if hooks == nil { + return nil, false + } + for i := 0; i < hooks.Len(); i++ { + importer, err := objects.Call(hooks.Item(i), objects.NewTuple([]objects.Object{key}), nil) + if err != nil { + // ImportError from a hook means "I do not handle this entry". + continue + } + if importer != nil && !objects.IsNone(importer) { + if cache != nil { + _ = cache.SetItem(key, importer) + } + return importer, true + } + } + if cache != nil { + _ = cache.SetItem(key, objects.None()) + } + return nil, false +} + +// loadFromSpec builds a module from spec via importlib.util.module_from_spec, +// registers it in sys.modules, and runs spec.loader.exec_module, mirroring +// the body of _bootstrap._load_unlocked. gopy cannot call _load itself +// because that path enters the import lock machinery, which needs the +// _weakref injection CPython performs in _setup and gopy does not run. +// +// CPython: Lib/importlib/_bootstrap.py:921 _load_unlocked +func loadFromSpec(exec Executor, name string, spec objects.Object) (*objects.Module, error) { + util, ok := GetModule("importlib.util") + if !ok { + util, ok = ensureImportlibUtil(exec) + if !ok { + return nil, fmt.Errorf("imp: loadFromSpec %q: importlib.util unavailable", name) + } + } + mfs, err := util.Dict().GetItem(objects.NewStr("module_from_spec")) + if err != nil { + return nil, fmt.Errorf("imp: loadFromSpec %q: module_from_spec missing: %w", name, err) + } + modObj, err := objects.Call(mfs, objects.NewTuple([]objects.Object{spec}), nil) + if err != nil { + return nil, fmt.Errorf("imp: loadFromSpec %q: module_from_spec: %w", name, err) + } + module, ok := modObj.(*objects.Module) + if !ok { + return nil, fmt.Errorf("imp: loadFromSpec %q: module_from_spec returned %T", name, modObj) + } + AddModule(name, module) + loader, err := objects.GetAttr(spec, objects.NewStr("loader")) + if err != nil { + RemoveModule(name) + return nil, fmt.Errorf("imp: loadFromSpec %q: spec.loader: %w", name, err) + } + // A namespace-package spec carries loader None: module_from_spec has + // already populated __path__ from submodule_search_locations and there + // is no body to run, exactly as _load_unlocked skips exec_module when + // the loader is None. + // + // CPython: Lib/importlib/_bootstrap.py:945 _load_unlocked (loader is None) + if objects.IsNone(loader) { + if final, ok := GetModule(name); ok { + return final, nil + } + return module, nil + } + execMod, err := objects.GetAttr(loader, objects.NewStr("exec_module")) + if err != nil { + RemoveModule(name) + return nil, fmt.Errorf("imp: loadFromSpec %q: loader.exec_module: %w", name, err) + } + if _, err := objects.Call(execMod, objects.NewTuple([]objects.Object{module}), nil); err != nil { + RemoveModule(name) + return nil, fmt.Errorf("imp: loadFromSpec %q: exec_module: %w: %w", name, err, ErrModuleExecFailed) + } + // exec_module may reassign sys.modules[name]; re-read it the way + // CPython's _load_unlocked returns sys.modules[spec.name]. + if final, ok := GetModule(name); ok { + return final, nil + } + return module, nil +} + // bindOnParent installs child as an attribute on the parent package's // module dict. Mirrors the setattr step _find_and_load_unlocked runs // after a successful submodule load so `import a.b` makes `a.b` // resolve as an attribute on `a`. Errors are swallowed to match // CPython, which also catches AttributeError around the setattr. // -// CPython: Lib/importlib/_bootstrap.py:1234 setattr(parent_module, child, module) +// CPython: Lib/importlib/_bootstrap.py:1350 setattr(parent_module, child, module) func bindOnParent(parent, tail string, child *objects.Module) { if parent == "" { return } - pm, ok := GetModule(parent) - if !ok { + // CPython reads parent_module = sys.modules[parent] verbatim, so a test + // (or pathological code) that swaps in a non-module object still receives + // the setattr; GetModuleRaw preserves that object, GetModule would drop it. + pm, ok := GetModuleRaw(parent) + if !ok || objects.IsNone(pm) { return } - _ = pm.Dict().SetItem(objects.NewStr(tail), child) + // CPython binds `module = sys.modules.pop(spec.name)`, i.e. the object the + // body left in sys.modules, not the module shell the loader created. An + // __init__ that reassigns sys.modules[__name__] to a custom object is bound + // in that swapped form, so re-read the entry by full name and fall back to + // the loader's module only when nothing replaced it. + // + // CPython: Lib/importlib/_bootstrap.py:931 module = sys.modules.pop(spec.name) + bound := objects.Object(child) + if raw, present := GetModuleRaw(parent + "." + tail); present && !objects.IsNone(raw) { + bound = raw + } + // setattr(parent_module, child, module) runs the parent's real __setattr__ + // so a custom or unwritable parent participates. An AttributeError is + // caught and reported as an ImportWarning, exactly as + // _find_and_load_unlocked does. + // + // CPython: Lib/importlib/_bootstrap.py:1350 try: setattr(...) except AttributeError + if err := objects.SetAttr(pm, objects.NewStr(tail), bound); err != nil { + if isAttributeError(err) && ImportWarnHook != nil { + // CPython: Lib/importlib/_bootstrap.py:1352 msg = f"Cannot set ..." + msg := fmt.Sprintf("Cannot set an attribute on '%s' for child module '%s'", + parent, tail) + _ = ImportWarnHook(msg) + } + } +} + +// isAttributeError reports whether a Go error raised by SetAttr carries a +// Python AttributeError. SetAttr surfaces the exception wrapped in a +// RaisedError; an entry that is not an AttributeError propagates as a +// non-match so it is not silently turned into a warning. +func isAttributeError(err error) bool { + var re *objects.RaisedError + if errors.As(err, &re) { + if exc, ok := re.Exc.(*pyerrors.Exception); ok { + return pyerrors.Match(exc, pyerrors.PyExc_AttributeError) + } + } + // SetAttr also surfaces a missing-slot failure as a plain Go error whose + // text leads with the exception name, so match that shape too. + msg := err.Error() + if rest, ok := strings.CutPrefix(msg, "vm: "); ok { + msg = rest + } + return strings.HasPrefix(msg, "AttributeError:") } // splitParent splits a dotted module name into (parent, tail). @@ -251,19 +607,36 @@ func loadAsPackage(exec Executor, compiler SourceCompiler, initFile, pkgDir, nam return nil, fmt.Errorf("imp: loadAsPackage %q: __package__: %w", name, err) } AddModule(name, mod) + // CPython attaches __spec__ before exec_module; do the same so an + // __init__.py that imports from its own package during init reads + // spec.has_location / spec.origin. + // + // CPython: Lib/importlib/_bootstrap.py:573 module_from_spec + attachSpecAttrs(exec, mod, name, initFile, []string{pkgDir}) src, err := os.ReadFile(initFile) //nolint:gosec // initFile is filepath.Join of a trusted PathFinder.Paths entry. if err != nil { return nil, fmt.Errorf("imp: loadAsPackage %q: %w", name, err) } - code, err := compiler(src, initFile) - if err != nil { - return nil, fmt.Errorf("imp: loadAsPackage %q: compile: %w", name, err) + code, ok := readBytecodeCache(initFile) + if !ok { + var cerr error + code, cerr = compiler(src, initFile) + if cerr != nil { + return nil, fmt.Errorf("imp: loadAsPackage %q: compile: %w", name, cerr) + } + writeBytecodeCache(initFile, code) } - if _, err := exec.ExecCode(code, mod); err != nil { + setSpecInitializing(mod, true) + _, execErr := exec.ExecCode(code, mod) + setSpecInitializing(mod, false) + if execErr != nil { RemoveModule(name) - return nil, fmt.Errorf("imp: loadAsPackage %q: exec: %w", name, err) + return nil, fmt.Errorf("imp: loadAsPackage %q: exec: %w: %w", name, execErr, ErrModuleExecFailed) } + // Executing this package may have completed importlib's self-bootstrap, + // which unblocks the deferred spec queue (see maybeFlushPendingSpecs). + maybeFlushPendingSpecs(exec) // CPython: Python/import.c:2715 exec_code_in_module re-reads // sys.modules so an `__init__.py` that reassigns its own entry // (rare for packages, but the same shape as decimal/_pydecimal). @@ -273,6 +646,129 @@ func loadAsPackage(exec Executor, compiler SourceCompiler, initFile, pkgDir, nam return mod, nil } +// loadAsPackageBytecode is loadAsPackage for a sourceless package: the +// code object comes from /__init__.pyc instead of compiling +// __init__.py. __path__ is set before the body runs so a package whose +// __init__ does `from .submod import x` can resolve the parent's +// __path__. +// +// CPython: Lib/importlib/_bootstrap_external.py:1215 SourcelessFileLoader +func loadAsPackageBytecode(exec Executor, initFile, pkgDir, name string) (*objects.Module, error) { + code, err := readPycCode(initFile) + if err != nil { + return nil, fmt.Errorf("imp: loadAsPackageBytecode %q: %w", name, err) + } + mod, exists := GetModule(name) + if !exists { + mod = objects.NewModule(name) + } + d := mod.Dict() + if err := d.SetItem(objects.NewStr("__file__"), objects.NewStr(initFile)); err != nil { + return nil, fmt.Errorf("imp: loadAsPackageBytecode %q: __file__: %w", name, err) + } + if err := d.SetItem(objects.NewStr("__path__"), + objects.NewList([]objects.Object{objects.NewStr(pkgDir)})); err != nil { + return nil, fmt.Errorf("imp: loadAsPackageBytecode %q: __path__: %w", name, err) + } + if err := d.SetItem(objects.NewStr("__package__"), objects.NewStr(name)); err != nil { + return nil, fmt.Errorf("imp: loadAsPackageBytecode %q: __package__: %w", name, err) + } + AddModule(name, mod) + attachSpecAttrs(exec, mod, name, initFile, []string{pkgDir}) + setSpecInitializing(mod, true) + _, execErr := exec.ExecCode(code, mod) + setSpecInitializing(mod, false) + if execErr != nil { + RemoveModule(name) + return nil, fmt.Errorf("imp: loadAsPackageBytecode %q: exec: %w: %w", name, execErr, ErrModuleExecFailed) + } + if final, ok := GetModule(name); ok { + return final, nil + } + return mod, nil +} + +// loadAsModuleBytecode is loadAsModule for a sourceless module: the code +// object comes from /.pyc instead of compiling .py. +// +// CPython: Lib/importlib/_bootstrap_external.py:1215 SourcelessFileLoader +func loadAsModuleBytecode(exec Executor, file, name, parent string) (*objects.Module, error) { + code, err := readPycCode(file) + if err != nil { + return nil, fmt.Errorf("imp: loadAsModuleBytecode %q: %w", name, err) + } + mod, exists := GetModule(name) + if !exists { + mod = objects.NewModule(name) + } + d := mod.Dict() + if err := d.SetItem(objects.NewStr("__file__"), objects.NewStr(file)); err != nil { + return nil, fmt.Errorf("imp: loadAsModuleBytecode %q: __file__: %w", name, err) + } + if err := d.SetItem(objects.NewStr("__package__"), objects.NewStr(parent)); err != nil { + return nil, fmt.Errorf("imp: loadAsModuleBytecode %q: __package__: %w", name, err) + } + AddModule(name, mod) + attachSpecAttrs(exec, mod, name, file, nil) + setSpecInitializing(mod, true) + _, execErr := exec.ExecCode(code, mod) + setSpecInitializing(mod, false) + if execErr != nil { + RemoveModule(name) + return nil, fmt.Errorf("imp: loadAsModuleBytecode %q: exec: %w: %w", name, execErr, ErrModuleExecFailed) + } + if final, ok := GetModule(name); ok { + return final, nil + } + return mod, nil +} + +// readPycCode opens a .pyc file and returns its embedded code object, +// validating the magic-number header along the way. +// +// CPython: Lib/importlib/_bootstrap_external.py:1215 SourcelessFileLoader.get_code +func readPycCode(file string) (*objects.Code, error) { + f, err := os.Open(file) //nolint:gosec // file is filepath.Join of a trusted PathFinder.Paths entry. + if err != nil { + return nil, err + } + defer f.Close() + code, _, err := marshal.ReadPyc(f) + if err != nil { + return nil, err + } + return code, nil +} + +// loadAsNamespace builds a PEP 420 namespace package: a module with no +// __file__, a __path__ spanning every contributing directory, and a +// namespace __spec__ (loader None, origin None). The body is never +// executed because a namespace package has no __init__.py. +// +// CPython: Lib/importlib/_bootstrap.py:573 module_from_spec (namespace) / +// Lib/importlib/_bootstrap_external.py:1230 NamespaceLoader +func loadAsNamespace(exec Executor, name, parent string, portions []string) *objects.Module { + mod, exists := GetModule(name) + if !exists { + mod = objects.NewModule(name) + } + d := mod.Dict() + items := make([]objects.Object, len(portions)) + for i, s := range portions { + items[i] = objects.NewStr(s) + } + _ = d.SetItem(objects.NewStr("__path__"), objects.NewList(items)) + _ = d.SetItem(objects.NewStr("__package__"), objects.NewStr(name)) + _ = d.SetItem(objects.NewStr("__file__"), objects.None()) + if _, err := d.GetItem(objects.NewStr("__doc__")); err != nil { + _ = d.SetItem(objects.NewStr("__doc__"), objects.None()) + } + _ = parent + AddModule(name, mod) + attachNamespaceSpec(exec, mod, name, portions) + return mod +} + // loadAsModule is the flat-file equivalent: load source, set // __file__ and __package__ (which is the parent dotted name, or "" // for top-level), then exec. @@ -291,19 +787,36 @@ func loadAsModule(exec Executor, compiler SourceCompiler, file, name, parent str return nil, fmt.Errorf("imp: loadAsModule %q: __package__: %w", name, err) } AddModule(name, mod) + // CPython sets __spec__ in module_from_spec before exec_module runs the + // body, so a module that imports from itself during initialization can + // read spec.has_location / spec.origin. Attach before exec. + // + // CPython: Lib/importlib/_bootstrap.py:573 module_from_spec + attachSpecAttrs(exec, mod, name, file, nil) src, err := os.ReadFile(file) //nolint:gosec // file is filepath.Join of a trusted PathFinder.Paths entry. if err != nil { return nil, fmt.Errorf("imp: loadAsModule %q: %w", name, err) } - code, err := compiler(src, file) - if err != nil { - return nil, fmt.Errorf("imp: loadAsModule %q: compile: %w", name, err) + code, ok := readBytecodeCache(file) + if !ok { + var cerr error + code, cerr = compiler(src, file) + if cerr != nil { + return nil, fmt.Errorf("imp: loadAsModule %q: compile: %w", name, cerr) + } + writeBytecodeCache(file, code) } - if _, err := exec.ExecCode(code, mod); err != nil { + setSpecInitializing(mod, true) + _, execErr := exec.ExecCode(code, mod) + setSpecInitializing(mod, false) + if execErr != nil { RemoveModule(name) - return nil, fmt.Errorf("imp: loadAsModule %q: exec: %w", name, err) + return nil, fmt.Errorf("imp: loadAsModule %q: exec: %w: %w", name, execErr, ErrModuleExecFailed) } + // A freshly executed importlib submodule may have completed the package + // bootstrap; drain any specs deferred while it was incomplete. + maybeFlushPendingSpecs(exec) // CPython: Python/import.c:2715 exec_code_in_module re-reads // sys.modules so a module body that reassigns its own entry // (`sys.modules[__name__] = other`, e.g. decimal/_pydecimal) wins. @@ -313,6 +826,480 @@ func loadAsModule(exec Executor, compiler SourceCompiler, file, name, parent str return mod, nil } +// attachSpecAttrs populates the module-namespace surface CPython's +// _init_module_attrs fills from a ModuleSpec: __spec__, __loader__, +// __cached__ and a default __doc__. gopy's import runs Go-side, so the +// spec is built by calling importlib.util.spec_from_file_location once +// the body has run (the same shape CPython's FileFinder produces). +// +// importlib.util is itself a .py module, so the modules loaded before +// (and during) its own import cannot have their spec built yet. Those +// are queued in pendingSpecs and flushed the moment util becomes +// available, so importlib and its early dependencies still end up with +// a __spec__. +// +// CPython: Lib/importlib/_bootstrap.py:516 _init_module_attrs +func attachSpecAttrs(exec Executor, mod *objects.Module, name, origin string, searchLocations []string) { + d := mod.Dict() + // __doc__ defaults to None when the body stored no docstring. + docKey := objects.NewStr("__doc__") + if _, err := d.GetItem(docKey); err != nil { + _ = d.SetItem(docKey, objects.None()) + } + p := pendingSpec{mod: mod, name: name, origin: origin, search: searchLocations} + util, ok := ensureImportlibUtil(exec) + if !ok { + pendingMu.Lock() + pendingSpecs = append(pendingSpecs, p) + pendingMu.Unlock() + return + } + applySpec(util, p) + flushPendingSpecs(util) +} + +// setSpecInitializing flips mod.__spec__._initializing. CPython's +// module_from_spec wraps exec_module in `spec._initializing = True` / +// `finally: spec._initializing = False`, so a module that imports from +// itself during its own body sees a partially-initialized spec. gopy +// mirrors that around ExecCode so the circular-import and shadowing +// hints in _Py_module_getattro_impl / _PyEval_ImportFrom fire correctly. +// +// CPython: Lib/importlib/_bootstrap.py:573 module_from_spec +func setSpecInitializing(mod *objects.Module, on bool) { + spec, err := mod.Dict().GetItem(objects.NewStr("__spec__")) + if err != nil || spec == nil || objects.IsNone(spec) { + return + } + v := objects.False() + if on { + v = objects.True() + } + _ = objects.SetAttr(spec, objects.NewStr("_initializing"), v) +} + +// pushUninitializedSubmodule appends child to parentMod.__spec__. +// _uninitialized_submodules and returns a pop function that removes the +// last entry. CPython brackets the child's _load_unlocked with this +// append/pop so a circular import that reaches getattr(parent, child) +// while child is mid-load gets the "cannot access submodule" message. +// +// CPython: Lib/importlib/_bootstrap.py:1340 parent_spec._uninitialized_submodules.append(child) +func pushUninitializedSubmodule(parentMod *objects.Module, child string) func() { + noop := func() {} + if parentMod == nil { + return noop + } + spec, err := parentMod.Dict().GetItem(objects.NewStr("__spec__")) + if err != nil || spec == nil || objects.IsNone(spec) { + return noop + } + listObj, err := objects.GetAttr(spec, objects.NewStr("_uninitialized_submodules")) + if err != nil { + return noop + } + list, ok := listObj.(*objects.List) + if !ok { + return noop + } + list.Append(objects.NewStr(child)) + return func() { + // CPython: Lib/importlib/_bootstrap.py:1345 _uninitialized_submodules.pop() + if n := list.Len(); n > 0 { + list.SetSlice(n-1, n, nil) + } + } +} + +// attachNamespaceSpec binds a PEP 420 namespace ModuleSpec (loader None, +// origin None, submodule_search_locations = the portions) onto mod. Like +// the file path it defers when importlib.util is not importable yet. +// +// CPython: Lib/importlib/_bootstrap.py:573 module_from_spec (namespace) +func attachNamespaceSpec(exec Executor, mod *objects.Module, name string, portions []string) { + p := pendingSpec{mod: mod, name: name, search: portions, namespace: true} + util, ok := ensureImportlibUtil(exec) + if !ok { + pendingMu.Lock() + pendingSpecs = append(pendingSpecs, p) + pendingMu.Unlock() + return + } + applySpec(util, p) + flushPendingSpecs(util) +} + +// AttachBuiltinSpec gives a built-in (inittab) module the __spec__ / +// __loader__ surface CPython's BuiltinImporter installs: origin +// "built-in", no source, no file. It is deferred just like the +// file-based path when importlib.util is not importable yet. +// +// CPython: Lib/importlib/_bootstrap.py:736 BuiltinImporter.exec_module +func AttachBuiltinSpec(exec Executor, mod *objects.Module, name string) { + d := mod.Dict() + docKey := objects.NewStr("__doc__") + if _, err := d.GetItem(docKey); err != nil { + _ = d.SetItem(docKey, objects.None()) + } + p := pendingSpec{mod: mod, name: name, builtin: true} + util, ok := ensureImportlibUtil(exec) + if !ok { + pendingMu.Lock() + pendingSpecs = append(pendingSpecs, p) + pendingMu.Unlock() + return + } + applySpec(util, p) + flushPendingSpecs(util) +} + +// pendingSpec records a module whose spec could not be built yet because +// importlib.util was not importable at the time. +type pendingSpec struct { + mod *objects.Module + name string + origin string + search []string + builtin bool + namespace bool + extension bool +} + +// AttachExtensionSpec gives a Go-implemented extension module the +// __spec__ / __loader__ / __file__ surface CPython's ExtensionFileLoader +// installs: an ExtensionFileLoader instance as the loader and the +// synthesized lib-dynload path as origin / __file__. test_import's +// require_extension asserts module.__spec__.loader is ExtensionFileLoader, +// so the loader type must be exactly that. +// +// CPython: Lib/importlib/_bootstrap_external.py:1032 ExtensionFileLoader +func AttachExtensionSpec(exec Executor, mod *objects.Module, name, origin string) { + d := mod.Dict() + docKey := objects.NewStr("__doc__") + if _, err := d.GetItem(docKey); err != nil { + _ = d.SetItem(docKey, objects.None()) + } + _ = d.SetItem(objects.NewStr("__file__"), objects.NewStr(origin)) + p := pendingSpec{mod: mod, name: name, origin: origin, extension: true} + util, ok := ensureImportlibUtil(exec) + if !ok { + pendingMu.Lock() + pendingSpecs = append(pendingSpecs, p) + pendingMu.Unlock() + return + } + applySpec(util, p) + flushPendingSpecs(util) +} + +var ( + pendingMu sync.Mutex + pendingSpecs []pendingSpec +) + +// maybeFlushPendingSpecs drains the deferred-spec queue if anything is +// queued and importlib.util has become usable. Modules whose specs were +// deferred during importlib's bootstrap (importlib itself, _bootstrap, +// _bootstrap_external) are picked up here the moment the package finishes +// initializing, so a follow-up `import importlib.abc` finds a __spec__ on +// its parent package. +func maybeFlushPendingSpecs(exec Executor) { + pendingMu.Lock() + n := len(pendingSpecs) + pendingMu.Unlock() + if n == 0 { + return + } + if util, ok := ensureImportlibUtil(exec); ok { + flushPendingSpecs(util) + } +} + +// flushPendingSpecs drains the deferred-spec queue, building each +// module's spec now that importlib.util is available. +func flushPendingSpecs(util *objects.Module) { + pendingMu.Lock() + queue := pendingSpecs + pendingSpecs = nil + pendingMu.Unlock() + for _, p := range queue { + applySpec(util, p) + } +} + +// applySpec builds a ModuleSpec for p via importlib.util and binds the +// resulting __spec__/__loader__/__cached__ onto the module dict. +// Built-in modules use spec_from_loader with a "built-in" origin; file +// modules use spec_from_file_location. +// +// CPython: Lib/importlib/_bootstrap.py:516 _init_module_attrs +func applySpec(util *objects.Module, p pendingSpec) { + d := p.mod.Dict() + // importlib._bootstrap._setup already walks sys.modules and gives every + // built-in module a spec whose loader is BuiltinImporter. When that has + // run before this deferred flush, a freshly-built spec here would carry + // loader=None (importlib.machinery may not be importable yet) and clobber + // the correct __loader__. Leave _setup's work in place. + // + // CPython: Lib/importlib/_bootstrap.py:1517 _setup (built-in spec set-up) + if p.builtin { + if existing, err := d.GetItem(objects.NewStr("__spec__")); err == nil && existing != nil && !objects.IsNone(existing) { + if loader, lerr := objects.GetAttr(existing, objects.NewStr("loader")); lerr == nil && loader != nil && !objects.IsNone(loader) { + return + } + } + } + spec := buildSpec(util, p) + if spec == nil { + return + } + _ = d.SetItem(objects.NewStr("__spec__"), spec) + if loader, lerr := objects.GetAttr(spec, objects.NewStr("loader")); lerr == nil { + _ = d.SetItem(objects.NewStr("__loader__"), loader) + } + // __cached__ mirrors spec.cached (None for gopy's bytecode-less load). + if cached, cerr := objects.GetAttr(spec, objects.NewStr("cached")); cerr == nil { + _ = d.SetItem(objects.NewStr("__cached__"), cached) + } else { + _ = d.SetItem(objects.NewStr("__cached__"), objects.None()) + } +} + +// buildSpec calls the appropriate importlib.util constructor for p. +func buildSpec(util *objects.Module, p pendingSpec) objects.Object { + switch { + case p.namespace: + return buildNamespaceSpec(p) + case p.builtin: + return buildBuiltinSpec(util, p) + case p.extension: + return buildExtensionSpec(util, p) + default: + return buildFileSpec(util, p) + } +} + +// buildExtensionSpec builds a spec whose loader is an ExtensionFileLoader +// instance, mirroring the spec PathFinder produces for a compiled +// extension. spec_from_file_location with an explicit loader keeps the +// loader type exactly ExtensionFileLoader and records origin as __file__. +// +// CPython: Lib/importlib/_bootstrap_external.py:1546 ExtensionFileLoader path hook +func buildExtensionSpec(util *objects.Module, p pendingSpec) objects.Object { + machinery, ok := GetModule("importlib.machinery") + if !ok { + return nil + } + loaderCls, err := machinery.Dict().GetItem(objects.NewStr("ExtensionFileLoader")) + if err != nil || loaderCls == nil { + return nil + } + loader, lerr := objects.Call(loaderCls, + objects.NewTuple([]objects.Object{objects.NewStr(p.name), objects.NewStr(p.origin)}), nil) + if lerr != nil { + return nil + } + fn, err := util.Dict().GetItem(objects.NewStr("spec_from_file_location")) + if err != nil { + return nil + } + kwargs := objects.NewDict() + _ = kwargs.SetItem(objects.NewStr("loader"), loader) + args := objects.NewTuple([]objects.Object{objects.NewStr(p.name), objects.NewStr(p.origin)}) + spec, cerr := objects.Call(fn, args, kwargs) + if cerr != nil || spec == objects.None() { + return nil + } + return spec +} + +// buildNamespaceSpec builds a PEP 420 namespace spec: loader None, origin +// None, the portions as submodule_search_locations. machinery.ModuleSpec is +// the faithful constructor; util re-exports it. +// +// CPython: Lib/importlib/_bootstrap.py:573 module_from_spec +func buildNamespaceSpec(p pendingSpec) objects.Object { + machinery, ok := GetModule("importlib.machinery") + if !ok { + return nil + } + ctor, err := machinery.Dict().GetItem(objects.NewStr("ModuleSpec")) + if err != nil { + return nil + } + kwargs := objects.NewDict() + _ = kwargs.SetItem(objects.NewStr("is_package"), objects.True()) + args := objects.NewTuple([]objects.Object{objects.NewStr(p.name), objects.None()}) + spec, cerr := objects.Call(ctor, args, kwargs) + if cerr != nil || spec == objects.None() { + return nil + } + items := make([]objects.Object, len(p.search)) + for i, s := range p.search { + items[i] = objects.NewStr(s) + } + _ = objects.SetAttr(spec, objects.NewStr("submodule_search_locations"), objects.NewList(items)) + return spec +} + +// buildBuiltinSpec builds the spec for a built-in module. CPython's +// BuiltinImporter.find_spec passes the importer class itself as the loader, +// so every built-in module's __loader__ is BuiltinImporter, not None. Mirror +// that: a None loader would fail test_importlib's test_everyone_has___loader__. +// +// CPython: Lib/importlib/_bootstrap.py:760 BuiltinImporter.find_spec +func buildBuiltinSpec(util *objects.Module, p pendingSpec) objects.Object { + fn, err := util.Dict().GetItem(objects.NewStr("spec_from_loader")) + if err != nil { + return nil + } + args := objects.NewTuple([]objects.Object{objects.NewStr(p.name), builtinImporterLoader()}) + kwargs := objects.NewDict() + _ = kwargs.SetItem(objects.NewStr("origin"), objects.NewStr("built-in")) + spec, cerr := objects.Call(fn, args, kwargs) + if cerr != nil || spec == objects.None() { + return nil + } + return spec +} + +// builtinImporterLoader returns the BuiltinImporter class to use as a +// built-in module's __loader__. importlib.machinery re-exports +// _bootstrap.BuiltinImporter, but it may not be imported yet when a built-in +// module loads early, so fall back to importlib._bootstrap, which is always +// live by this point. None is the last resort. +func builtinImporterLoader() objects.Object { + for _, modName := range []string{"importlib.machinery", "importlib._bootstrap"} { + m, ok := GetModule(modName) + if !ok { + continue + } + if bi, lerr := m.Dict().GetItem(objects.NewStr("BuiltinImporter")); lerr == nil && bi != nil { + return bi + } + } + return objects.None() +} + +// buildFileSpec builds a file-backed spec via spec_from_file_location. +func buildFileSpec(util *objects.Module, p pendingSpec) objects.Object { + fn, err := util.Dict().GetItem(objects.NewStr("spec_from_file_location")) + if err != nil { + return nil + } + kwargs := objects.NewDict() + if p.search != nil { + items := make([]objects.Object, len(p.search)) + for i, s := range p.search { + items[i] = objects.NewStr(s) + } + _ = kwargs.SetItem(objects.NewStr("submodule_search_locations"), + objects.NewList(items)) + } + args := objects.NewTuple([]objects.Object{objects.NewStr(p.name), objects.NewStr(p.origin)}) + spec, cerr := objects.Call(fn, args, kwargs) + if cerr != nil || spec == objects.None() { + return nil + } + return spec +} + +var ( + specBootstrapMu sync.Mutex + specBootstrapped bool +) + +// ensureImportlibUtil returns the importlib.util module, importing it on +// first use. The lazy import is guarded by specBootstrapped so the +// modules pulled in by importlib.util's own load (os, types, +// importlib._bootstrap_external) do not re-enter and recurse while that +// import is still in flight. +func ensureImportlibUtil(exec Executor) (*objects.Module, bool) { + if util, ok := GetModule("importlib.util"); ok { + // util is registered before its body runs, so a mid-import + // lookup sees the module without spec_from_file_location yet. + // Treat that partial state as "not ready" so the caller defers + // rather than flushing the pending queue against a stub. + if _, err := util.Dict().GetItem(objects.NewStr("spec_from_file_location")); err != nil { + return nil, false + } + // spec_from_file_location dereferences importlib._bootstrap_external's + // module-global `_bootstrap` (wired by _set_bootstrap_module). A fresh + // importlib re-import (test.support.import_helper.import_fresh_module) + // can leave util importable while that global is still None, so verify + // the builder is wired before reporting util ready. + if !specBuilderReady() { + return nil, false + } + return util, true + } + // Until importlib's package bootstrap finishes, importing importlib.util + // would pull in a fresh importlib._bootstrap_external whose module-global + // `_bootstrap` is still None (it is wired by _set_bootstrap_module at + // importlib/__init__.py:37). spec_from_file_location dereferences that + // global at _bootstrap_external.py:596, so building a spec mid-bootstrap + // crashes. Defer: the module loads without a spec now and the pending + // queue is flushed once importlib is fully initialized. This mirrors + // importlib's own rule ("Until bootstrapping is complete, DO NOT import + // any modules that attempt to import importlib._bootstrap"). + // + // CPython: Lib/importlib/__init__.py:6 (bootstrap-complete guard) + if !importlibBootstrapComplete() { + return nil, false + } + specBootstrapMu.Lock() + if specBootstrapped { + specBootstrapMu.Unlock() + return nil, false + } + specBootstrapped = true + specBootstrapMu.Unlock() + util, err := ImportModule(exec, "importlib.util") + specBootstrapMu.Lock() + specBootstrapped = false + specBootstrapMu.Unlock() + if err != nil { + return nil, false + } + return util, true +} + +// importlibBootstrapComplete reports whether the importlib package has +// finished its self-bootstrap. importlib/__init__.py defines import_module +// only after wiring _bootstrap / _bootstrap_external (lines 16-48), so the +// presence of that attribute is a reliable "bootstrap done" sentinel. When +// importlib is not loaded at all (very early startup), report complete so the +// legacy lazy-import path is preserved. +// +// CPython: Lib/importlib/__init__.py:71 def import_module +func importlibBootstrapComplete() bool { + mod, ok := GetModule("importlib") + if !ok { + return true + } + _, err := mod.Dict().GetItem(objects.NewStr("import_module")) + return err == nil +} + +// specBuilderReady reports whether importlib._bootstrap_external is wired to +// importlib._bootstrap. spec_from_file_location dereferences the module-global +// `_bootstrap` (set by _set_bootstrap_module), so a fresh re-import that has +// not run that wiring yet must not be asked to build a spec. +// +// CPython: Lib/importlib/_bootstrap_external.py:1552 _set_bootstrap_module +func specBuilderReady() bool { + be, ok := GetModule("importlib._bootstrap_external") + if !ok { + // Not yet loaded: util will pull it in wired, so treat as ready. + return true + } + v, err := be.Dict().GetItem(objects.NewStr("_bootstrap")) + if err != nil || v == nil { + return false + } + return !objects.IsNone(v) +} + // isFile reports whether path exists and is a regular file. It is the // gopy stand-in for importlib's _path_isfile helper. // @@ -325,6 +1312,62 @@ func isFile(path string) bool { return info.Mode().IsRegular() } +// isDir reports whether path exists and is a directory. It is the gopy +// stand-in for importlib's _path_isdir helper used by namespace-portion +// detection. +// +// CPython: Lib/importlib/_bootstrap_external.py:153 _path_isdir +func isDir(path string) bool { + info, err := os.Stat(path) + if err != nil { + return false + } + return info.IsDir() +} + +// caseOK reports whether the final component of an existing candidate path +// matches a real on-disk directory entry with exact case. On a +// case-insensitive but case-preserving filesystem (macOS, Windows) os.Stat +// succeeds for any case spelling, so a plain existence probe would let +// `import RAnDoM` resolve random.py. CPython's FileFinder guards against +// this by testing membership in set(os.listdir(dir)), the exact-case path +// cache, unless _relax_case() is true. caseOK reproduces that membership +// test by scanning the directory for an exact-case name match. +// +// CPython: Lib/importlib/_bootstrap_external.py:1378 cache_module in cache +func caseOK(path string) bool { + if relaxCase() { + return true + } + entries, err := os.ReadDir(filepath.Dir(path)) + if err != nil { + return false + } + base := filepath.Base(path) + for _, e := range entries { + if e.Name() == base { + return true + } + } + return false +} + +// relaxCase mirrors importlib's _relax_case: case folding is relaxed only +// on case-insensitive platforms (Windows, macOS) and only when PYTHONCASEOK +// is present in the environment. Case-sensitive platforms are always +// strict, where caseOK's directory scan is a redundant but harmless match. +// +// CPython: Lib/importlib/_bootstrap_external.py:50 _relax_case +func relaxCase() bool { + switch runtime.GOOS { + case "windows", "darwin": + _, ok := os.LookupEnv("PYTHONCASEOK") + return ok + default: + return false + } +} + var ( pathFinderMu sync.RWMutex pathFinder *PathFinder diff --git a/imp/shadowing.go b/imp/shadowing.go new file mode 100644 index 000000000..b17f59c20 --- /dev/null +++ b/imp/shadowing.go @@ -0,0 +1,314 @@ +package imp + +import ( + "fmt" + "os" + "strings" + + "github.com/tamnd/gopy/objects" +) + +// optionalAttr ports PyObject_GetOptionalAttr for the shadowing helpers: +// it returns (val, true, nil) on success and (nil, false, nil) when the +// attribute is missing (AttributeError). Any non-AttributeError failure +// propagates as the third return. +// +// CPython: Objects/object.c:1324 PyObject_GetOptionalAttr +func optionalAttr(o objects.Object, name string) (objects.Object, bool, error) { + v, err := objects.GetAttr(o, objects.NewStr(name)) + if err == nil { + return v, true, nil + } + if strings.Contains(err.Error(), "AttributeError") { + return nil, false, nil + } + return nil, false, err +} + +// SpecFileOrigin ports _PyModuleSpec_GetFileOrigin: returns the spec's +// origin string only when spec.has_location is truthy and spec.origin is +// a str. The bool reports whether a location origin was found. +// +// CPython: Objects/moduleobject.c:892 _PyModuleSpec_GetFileOrigin +func SpecFileOrigin(spec objects.Object) (string, bool, error) { + hasLoc, found, err := optionalAttr(spec, "has_location") + if err != nil || !found { + return "", false, err + } + if !objects.IsTrue(hasLoc) { + return "", false, nil + } + originObj, found, err := optionalAttr(spec, "origin") + if err != nil || !found { + return "", false, err + } + origin, ok := originObj.(*objects.Unicode) + if !ok { + return "", false, nil + } + return origin.Value(), true, nil +} + +// SpecIsInitializing ports _PyModuleSpec_IsInitializing: spec._initializing +// is truthy. +// +// CPython: Objects/moduleobject.c:858 _PyModuleSpec_IsInitializing +func SpecIsInitializing(spec objects.Object) (bool, error) { + v, found, err := optionalAttr(spec, "_initializing") + if err != nil || !found { + return false, err + } + return objects.IsTrue(v), nil +} + +// SpecIsUninitializedSubmodule ports _PyModuleSpec_IsUninitializedSubmodule: +// name is currently mid-import as a submodule, i.e. it appears in +// spec._uninitialized_submodules. A missing list reads as "not a submodule". +// +// CPython: Objects/moduleobject.c:876 _PyModuleSpec_IsUninitializedSubmodule +func SpecIsUninitializedSubmodule(spec objects.Object, name string) (bool, error) { + if spec == nil || objects.IsNone(spec) { + return false, nil + } + v, found, err := optionalAttr(spec, "_uninitialized_submodules") + if err != nil || !found { + return false, err + } + contains, err := objects.Contains(v, objects.NewStr(name)) + if err != nil { + return false, err + } + return contains, nil +} + +// ModuleIsPossiblyShadowing ports _PyModule_IsPossiblyShadowing: the +// module at origin could shadow a same-named module later on the search +// path. The check is: not sys.flags.safe_path and +// dirname(origin minus a trailing /__init__.py) == (sys.path[0] or cwd). +// +// CPython: Objects/moduleobject.c:923 _PyModule_IsPossiblyShadowing +func ModuleIsPossiblyShadowing(originFound bool, origin string) (bool, error) { + if !originFound { + return false, nil + } + if safePathEnabled() { + return false, nil + } + root := origin + sep := strings.LastIndex(root, string(os.PathSeparator)) + if sep < 0 { + return false, nil + } + // A package origin ends in __init__.py; step one directory up. + if root[sep+1:] == "__init__.py" { + root = root[:sep] + sep = strings.LastIndex(root, string(os.PathSeparator)) + if sep < 0 { + return false, nil + } + } + root = root[:sep] + + sysPath0, ok := sysPathZero() + if !ok { + return false, nil + } + if sysPath0 == "" { + cwd, err := os.Getwd() + if err != nil { + return false, err + } + sysPath0 = cwd + } + return sysPath0 == root, nil +} + +// safePathEnabled reports whether sys.flags.safe_path is truthy. +// +// CPython: Objects/moduleobject.c:937 config->safe_path +func safePathEnabled() bool { + sysMod, ok := GetModule("sys") + if !ok { + return false + } + flags, err := objects.GetAttr(sysMod, objects.NewStr("flags")) + if err != nil { + return false + } + sp, err := objects.GetAttr(flags, objects.NewStr("safe_path")) + if err != nil { + return false + } + return objects.IsTrue(sp) +} + +// configSysPath0 holds the startup-captured leading sys.path entry, the +// equivalent of CPython's config->sys_path_0. The shadowing check uses +// this snapshot, NOT live sys.path[0], so a script that mutates sys.path +// after startup does not change shadowing detection. +// +// CPython: Python/initconfig.c config->sys_path_0 +var ( + configSysPath0 string + configSysPath0Set bool +) + +// SetConfigSysPath0 records the startup leading sys.path entry. The bool +// reports whether the interpreter installed one at all (false under +// safe_path, where CPython leaves config->sys_path_0 NULL). +func SetConfigSysPath0(path string, present bool) { + configSysPath0 = path + configSysPath0Set = present +} + +// sysPathZero returns config->sys_path_0. The bool is false when no +// leading entry was captured (e.g. safe_path). +// +// CPython: Objects/moduleobject.c:967 config->sys_path_0 +func sysPathZero() (string, bool) { + if !configSysPath0Set { + return "", false + } + return configSysPath0, true +} + +// StdlibModuleNamesContains reports whether modName is in +// sys.stdlib_module_names. modName is passed as the live object (not a +// Go string) so an unhashable __name__ raises through PySet_Contains +// exactly as CPython does. The lookup is silent when stdlib_module_names +// is missing or is not a set/frozenset (PyAnySet_Check guards the call). +// +// CPython: Objects/moduleobject.c:1059 PySet_Contains(stdlib_modules, mod_name) +func StdlibModuleNamesContains(modName objects.Object) (bool, error) { + sysMod, ok := GetModule("sys") + if !ok { + return false, nil + } + namesObj, found, err := optionalAttr(sysMod, "stdlib_module_names") + if err != nil { + return false, err + } + if !found { + return false, nil + } + if !anySetCheck(namesObj) { + return false, nil + } + contains, err := objects.Contains(namesObj, modName) + if err != nil { + return false, err + } + return contains, nil +} + +// anySetCheck ports PyAnySet_Check: the object is a set or frozenset (or +// a subclass of either). +// +// CPython: Include/cpython/setobject.h PyAnySet_Check +func anySetCheck(o objects.Object) bool { + t := o.Type() + return objects.IsSubtype(t, objects.SetType) || objects.IsSubtype(t, objects.FrozensetType) +} + +// moduleGetattrError ports the error tail of _Py_module_getattro_impl: +// after a generic-attribute miss with no PEP 562 __getattr__, it builds +// the best-effort AttributeError, surfacing the stdlib-shadowing and +// circular-import hints. It returns a Go error whose message the objects +// layer synthesizes into the AttributeError. It is wired into +// objects.ModuleAttrErrorHook so module.go can reach the import system's +// spec helpers without an import cycle. +// +// CPython: Objects/moduleobject.c:1024 _Py_module_getattro_impl (error tail) +func moduleGetattrError(m *objects.Module, name string) error { + d := m.Dict() + + // __name__ must be a str (or str subclass); otherwise the generic + // "module has no attribute" message applies. CPython uses PyUnicode_Check + // here, so a str subclass passes. + modNameObj, _ := d.GetItem(objects.NewStr("__name__")) + if modNameObj == nil || !objects.IsSubtype(modNameObj.Type(), objects.StrType()) { + return fmt.Errorf("AttributeError: module has no attribute '%s'", name) + } + modName := unicodeContents(modNameObj) + nameQ := quoteU(name) + modQ := quoteU(modName) + + spec, serr := d.GetItem(objects.NewStr("__spec__")) + if serr != nil || spec == nil || objects.IsNone(spec) { + return fmt.Errorf("AttributeError: module %s has no attribute %s", modQ, nameQ) + } + + origin, originFound, oerr := SpecFileOrigin(spec) + if oerr != nil { + return oerr + } + shadowing, sherr := ModuleIsPossiblyShadowing(originFound, origin) + if sherr != nil { + return sherr + } + shadowingStdlib := false + if shadowing { + c, cerr := StdlibModuleNamesContains(modNameObj) + if cerr != nil { + return cerr + } + shadowingStdlib = c + } + + if shadowingStdlib { + return fmt.Errorf("AttributeError: module %s has no attribute %s (consider renaming %s since it has the same name as the standard library module named %s and prevents importing that standard library module)", + modQ, nameQ, quoteU(origin), modQ) + } + + initializing, ierr := SpecIsInitializing(spec) + if ierr != nil { + return ierr + } + switch { + case initializing && shadowing: + return fmt.Errorf("AttributeError: module %s has no attribute %s (consider renaming %s if it has the same name as a library you intended to import)", + modQ, nameQ, quoteU(origin)) + case initializing && originFound: + return fmt.Errorf("AttributeError: partially initialized module %s from %s has no attribute %s (most likely due to a circular import)", + modQ, quoteU(origin), nameQ) + case initializing: + return fmt.Errorf("AttributeError: partially initialized module %s has no attribute %s (most likely due to a circular import)", + modQ, nameQ) + } + + // Not initializing: the miss is a circular import only if the name is a + // submodule currently mid-load (tracked on spec._uninitialized_submodules). + // + // CPython: Objects/moduleobject.c:1116 _PyModuleSpec_IsUninitializedSubmodule + uninit, uerr := SpecIsUninitializedSubmodule(spec, name) + if uerr != nil { + return uerr + } + if uninit { + return fmt.Errorf("AttributeError: cannot access submodule %s of module %s (most likely due to a circular import)", + nameQ, modQ) + } + return fmt.Errorf("AttributeError: module %s has no attribute %s", modQ, nameQ) +} + +// unicodeContents returns the string contents of a str (or str subclass) +// object, mirroring how CPython's %U formats a PyUnicode payload. +func unicodeContents(o objects.Object) string { + if u, ok := o.(*objects.Unicode); ok { + return u.Value() + } + if s, err := objects.Str(o); err == nil { + return s + } + return "" +} + +// quoteU wraps s in single quotes, matching the literal 'quotes' the +// CPython getattro format strings put around each %U substitution. +func quoteU(s string) string { return "'" + s + "'" } + +// init wires the module-getattro error builder into the objects package +// so module attribute misses surface the import-system shadowing hints. +func init() { + objects.ModuleAttrErrorHook = moduleGetattrError +} diff --git a/imp/writepyc.go b/imp/writepyc.go new file mode 100644 index 000000000..ca5e7ff4a --- /dev/null +++ b/imp/writepyc.go @@ -0,0 +1,294 @@ +// Bytecode-cache writing for the source loaders. After a .py file is +// compiled, SourceFileLoader.exec_module writes the resulting code +// object to a PEP 3147 __pycache__/..pyc file so the next +// import skips recompilation. gopy's import runs Go-side, so the write +// path is reimplemented here against the marshal .pyc writer. +// +// CPython: Lib/importlib/_bootstrap_external.py:1129 SourceFileLoader.get_code +// CPython: Lib/importlib/_bootstrap_external.py:1185 SourceFileLoader.set_data +package imp + +import ( + "bytes" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/tamnd/gopy/marshal" + "github.com/tamnd/gopy/objects" +) + +// pycacheDir is the PEP 3147 cache subdirectory name. +// +// CPython: Lib/importlib/_bootstrap_external.py:60 _PYCACHE +const pycacheDir = "__pycache__" + +// isFrozenBootstrapSource reports whether sourcePath is one of the two +// importlib bootstrap modules CPython freezes (importlib._bootstrap and +// importlib._bootstrap_external). Those are never byte-compiled to a .pyc +// in CPython, so gopy excludes them from the bytecode cache to keep their +// "" co_filename intact. A cached +// .pyc would be rewritten to the real disk path by fixCoFilename, leaving +// the import-machinery frames un-trimmable by remove_importlib_frames. +// +// CPython: Python/pylifecycle.c:1041 init_importlib (frozen modules) +func isFrozenBootstrapSource(sourcePath string) bool { + return strings.HasSuffix(sourcePath, "importlib/_bootstrap.py") || + strings.HasSuffix(sourcePath, "importlib/_bootstrap_external.py") +} + +// dontWriteBytecode reports sys.dont_write_bytecode. When True the +// source loaders skip the cache write entirely, exactly like CPython's +// SourceFileLoader.get_code (the `not sys.dont_write_bytecode` guard). +// +// CPython: Lib/importlib/_bootstrap_external.py:1167 source_to_code cache guard +func dontWriteBytecode() bool { + sysMod, ok := GetModule("sys") + if !ok { + return true + } + v, err := objects.GetAttr(sysMod, objects.NewStr("dont_write_bytecode")) + if err != nil { + return true + } + return objects.IsTrue(v) +} + +// cacheTag returns sys.implementation.cache_tag, the per-interpreter +// bytecode-cache discriminator (e.g. "gopy-3140"). The empty string +// signals a missing tag, in which case the caller skips caching the +// same way cache_from_source raises NotImplementedError. +// +// CPython: Lib/importlib/_bootstrap_external.py:480 cache_from_source (tag read) +func cacheTag() string { + sysMod, ok := GetModule("sys") + if !ok { + return "" + } + impl, err := objects.GetAttr(sysMod, objects.NewStr("implementation")) + if err != nil { + return "" + } + tag, err := objects.GetAttr(impl, objects.NewStr("cache_tag")) + if err != nil { + return "" + } + t, ok := tag.(*objects.Unicode) + if !ok { + return "" + } + return t.Value() +} + +// pycachePrefix returns sys.pycache_prefix as (value, set). When set, +// caches live under that root directory mirroring the source's absolute +// path instead of an adjacent __pycache__. +// +// CPython: Lib/importlib/_bootstrap_external.py:490 cache_from_source (prefix branch) +func pycachePrefix() (string, bool) { + sysMod, ok := GetModule("sys") + if !ok { + return "", false + } + v, err := objects.GetAttr(sysMod, objects.NewStr("pycache_prefix")) + if err != nil || objects.IsNone(v) { + return "", false + } + p, ok := v.(*objects.Unicode) + if !ok { + return "", false + } + return p.Value(), true +} + +// cacheFromSource computes the .pyc path for a source file, matching +// importlib.util.cache_from_source so the path gopy writes is the same +// one spec_from_file_location records as __cached__ and the loader reads +// back. Only the optimization=” (sys.flags.optimize == 0) case is +// produced; gopy never runs at -O. +// +// CPython: Lib/importlib/_bootstrap_external.py:466 cache_from_source +func cacheFromSource(sourcePath string) string { + tag := cacheTag() + if tag == "" { + return "" + } + head, tail := filepath.Split(sourcePath) + base := tail + sep := "" + if dot := strings.LastIndex(tail, "."); dot >= 0 { + base, sep = tail[:dot], "." + if base == "" { + // A leading-dot name like ".pyc" keeps the whole tail as base. + base = tail + sep = "" + } + } + almost := base + sep + tag + filename := almost + ".pyc" + if prefix, ok := pycachePrefix(); ok { + // CPython rebuilds the source's absolute directory under the prefix, + // dropping the volume separator so the tree nests cleanly. + absHead, err := filepath.Abs(head) + if err != nil { + absHead = head + } + absHead = strings.TrimPrefix(absHead, string(filepath.Separator)) + return filepath.Join(prefix, absHead, filename) + } + return filepath.Join(filepath.Clean(head), pycacheDir, filename) +} + +// readBytecodeCache returns the cached code object for sourcePath when a +// fresh, valid .pyc exists under __pycache__. "Fresh" means the .pyc +// magic matches and its timestamp-mode header records exactly the +// source's current mtime and size, the same staleness test +// SourceFileLoader.get_code applies before trusting the cache. A hash- +// mode .pyc (PEP 552) is only trusted when its hash bit is unchecked; +// any other condition (missing, stale, unreadable, checked-hash) returns +// ok=false so the caller recompiles from source. +// +// CPython: Lib/importlib/_bootstrap_external.py:1129 SourceFileLoader.get_code +// CPython: Lib/importlib/_bootstrap_external.py:585 _validate_timestamp_pyc +func readBytecodeCache(sourcePath string) (*objects.Code, bool) { + if isFrozenBootstrapSource(sourcePath) { + // CPython freezes importlib._bootstrap[_external] and never loads + // them from a .pyc, so their code objects keep the synthetic + // "" co_filename for the + // life of the process. gopy loads them from source instead; reading + // a cached .pyc would route through fixCoFilename below and rewrite + // that co_filename to the real disk path, leaving the import-machinery + // frames un-trimmable by remove_importlib_frames. Skip the cache so + // the source compiler stamps the frozen name every time. + // + // CPython: Python/import.c:3500 remove_importlib_frames (frozen names) + return nil, false + } + dest := cacheFromSource(sourcePath) + if dest == "" { + return nil, false + } + info, err := os.Stat(sourcePath) + if err != nil { + return nil, false + } + f, err := os.Open(dest) //nolint:gosec // dest is cacheFromSource of a trusted source path. + if err != nil { + return nil, false + } + defer f.Close() + code, hdr, err := marshal.ReadPyc(f) + if err != nil { + return nil, false + } + if hdr.Flags&0x1 != 0 { + // Hash-based .pyc: an unchecked-hash cache is trusted unconditionally, + // a checked-hash cache would need the source hash recomputed, which + // the timestamp fast path does not do, so fall back to recompiling. + // + // CPython: Lib/importlib/_bootstrap_external.py:609 _validate_hash_pyc + if hdr.Flags&0x2 != 0 { + return code, true + } + return nil, false + } + mtime := uint32(info.ModTime().Unix()) + size := uint32(info.Size()) + if hdr.Mtime != mtime || hdr.SourceSize != size { + return nil, false + } + // The cached code object carries whatever co_filename it was compiled + // with (py_compile's dfile can differ from the real source). When the + // source still exists the loader rewrites co_filename to the actual + // path, recursing into nested code consts, exactly like _compile_bytecode + // calling _imp._fix_co_filename. + // + // CPython: Lib/importlib/_bootstrap_external.py:809 _compile_bytecode + // CPython: Python/import.c:1276 _imp__fix_co_filename_impl + fixCoFilename(code, code.Filename, sourcePath) + return code, true +} + +// fixCoFilename rewrites co_filename on code and every nested code const +// whose filename matches oldname, mirroring CPython's recursive +// update_code_filenames. Only matching consts are touched so that a code +// object compiled against a different file is left alone. +// +// CPython: Python/import.c:1243 update_code_filenames +func fixCoFilename(code *objects.Code, oldname, newname string) { + if code.Filename != oldname { + return + } + code.Filename = newname + for _, c := range code.Consts { + if nested, ok := c.(*objects.Code); ok { + fixCoFilename(nested, oldname, newname) + } + } + code.SyncConstObjs() +} + +// writeBytecodeCache writes code to the .pyc cache for sourcePath unless +// sys.dont_write_bytecode is set. The header records the source file's +// mtime and size so a stale cache is detected on the next import. A +// write failure is swallowed: CPython's set_data treats a NotADirectory +// or permission error as non-fatal (the import still succeeds from +// source), and so does gopy. +// +// CPython: Lib/importlib/_bootstrap_external.py:1167 get_code (cache write) +// CPython: Lib/importlib/_bootstrap_external.py:1185 set_data (atomic write) +func writeBytecodeCache(sourcePath string, code *objects.Code) { + if dontWriteBytecode() || isFrozenBootstrapSource(sourcePath) { + return + } + dest := cacheFromSource(sourcePath) + if dest == "" { + return + } + info, err := os.Stat(sourcePath) + if err != nil { + return + } + mtime := uint32(info.ModTime().Unix()) + size := uint32(info.Size()) + + var buf bytes.Buffer + if err := marshal.WritePyc(&buf, code, mtime, size); err != nil { + return + } + // 0o777 is CPython's makedirs mode for __pycache__; the umask narrows it. + // CPython: Lib/importlib/_bootstrap_external.py source_to_cache makedirs. + if err := os.MkdirAll(filepath.Dir(dest), 0o777); err != nil { //nolint:gosec // CPython __pycache__ mode, umask-narrowed + return + } + // The cache inherits the source's permission bits plus write access, so a + // read-only .py still yields a rewritable .pyc. + // + // CPython: Lib/importlib/_bootstrap_external.py:438 _calc_mode + mode := info.Mode().Perm() | 0o200 + + // Write atomically the way _write_atomic does: a uniquely-suffixed temp + // file in the cache directory opened O_EXCL with the computed mode, then + // rename over the target. The temp name is keyed off the pid so concurrent + // writers do not collide. + // + // CPython: Lib/importlib/_bootstrap_external.py:184 _write_atomic + tmp := dest + "." + strconv.Itoa(os.Getpid()) + ".tmp" + f, err := os.OpenFile(tmp, os.O_EXCL|os.O_CREATE|os.O_WRONLY, mode&0o666) //nolint:gosec // tmp derives from a trusted cache path. + if err != nil { + return + } + if _, err := f.Write(buf.Bytes()); err != nil { + _ = f.Close() + _ = os.Remove(tmp) + return + } + if err := f.Close(); err != nil { + _ = os.Remove(tmp) + return + } + if err := os.Rename(tmp, dest); err != nil { + _ = os.Remove(tmp) + } +} diff --git a/initconfig/config_get.go b/initconfig/config_get.go new file mode 100644 index 000000000..be0201f9e --- /dev/null +++ b/initconfig/config_get.go @@ -0,0 +1,163 @@ +package initconfig + +import "sort" + +// ConfigMemberType mirrors the PyConfigMemberType enum: the storage +// class of a PyConfig field, which decides how config_get turns the raw +// member into a Python object. +// +// CPython: Python/initconfig.c:60 PyConfigMemberType +type ConfigMemberType int + +const ( + ConfigMemberInt ConfigMemberType = iota + ConfigMemberUint + ConfigMemberBool + ConfigMemberULong + ConfigMemberWStr + ConfigMemberWStrOpt + ConfigMemberWStrList +) + +// configSpec is one row of PYCONFIG_SPEC: the option name, its member +// type, the sys attribute config_get delegates to when use_sys is set +// (empty for NO_SYS / SYS_FLAG rows), and a reader that pulls the raw +// member out of a PyConfig. gopy only lists the rows whose members the +// v0.x PyConfig subset actually models; the scoped-out fields documented +// on PyConfig (tracemalloc, dump_refs, perf_profiling, ...) are absent +// here exactly as they are absent from the struct, so config_get reports +// them as unknown names until their subsystems land. +// +// CPython: Python/initconfig.c:105 PYCONFIG_SPEC +type configSpec struct { + name string + typ ConfigMemberType + sysAttr string + get func(c *PyConfig) any +} + +// pyconfigSpec is the gopy port of PYCONFIG_SPEC. Rows preserve the +// CPython option names, member types, and SYS_ATTR delegations. +// +// CPython: Python/initconfig.c:105 PYCONFIG_SPEC +var pyconfigSpec = []configSpec{ + // --- Public options --- + {"argv", ConfigMemberWStrList, "argv", func(c *PyConfig) any { return c.Argv }}, + {"base_exec_prefix", ConfigMemberWStrOpt, "base_exec_prefix", func(c *PyConfig) any { return c.BaseExecPrefix }}, + {"base_executable", ConfigMemberWStrOpt, "_base_executable", func(c *PyConfig) any { return c.BaseExecutable }}, + {"base_prefix", ConfigMemberWStrOpt, "base_prefix", func(c *PyConfig) any { return c.BasePrefix }}, + {"bytes_warning", ConfigMemberUint, "", func(c *PyConfig) any { return c.BytesWarning }}, + {"exec_prefix", ConfigMemberWStrOpt, "exec_prefix", func(c *PyConfig) any { return c.ExecPrefix }}, + {"executable", ConfigMemberWStrOpt, "executable", func(c *PyConfig) any { return c.Executable }}, + {"inspect", ConfigMemberBool, "", func(c *PyConfig) any { return c.Inspect }}, + {"int_max_str_digits", ConfigMemberUint, "", func(c *PyConfig) any { return c.IntMaxStrDigits }}, + {"interactive", ConfigMemberBool, "", func(c *PyConfig) any { return c.Interactive }}, + {"module_search_paths", ConfigMemberWStrList, "path", func(c *PyConfig) any { return c.ModuleSearchPaths }}, + {"optimization_level", ConfigMemberUint, "", func(c *PyConfig) any { return c.OptimizationLevel }}, + {"parser_debug", ConfigMemberBool, "", func(c *PyConfig) any { return c.ParserDebug }}, + {"platlibdir", ConfigMemberWStr, "platlibdir", func(c *PyConfig) any { return c.Platlibdir }}, + {"prefix", ConfigMemberWStrOpt, "prefix", func(c *PyConfig) any { return c.Prefix }}, + {"pycache_prefix", ConfigMemberWStrOpt, "pycache_prefix", func(c *PyConfig) any { return c.PycachePrefix }}, + {"quiet", ConfigMemberBool, "", func(c *PyConfig) any { return c.Quiet }}, + {"stdlib_dir", ConfigMemberWStrOpt, "_stdlib_dir", func(c *PyConfig) any { return c.StdlibDir }}, + {"use_environment", ConfigMemberBool, "", func(c *PyConfig) any { return c.UseEnvironment }}, + {"verbose", ConfigMemberUint, "", func(c *PyConfig) any { return c.Verbose }}, + {"warnoptions", ConfigMemberWStrList, "warnoptions", func(c *PyConfig) any { return c.WarnOptions }}, + {"write_bytecode", ConfigMemberBool, "", func(c *PyConfig) any { return c.WriteBytecode }}, + {"xoptions", ConfigMemberWStrList, "_xoptions", func(c *PyConfig) any { return c.XOptions }}, + + // --- Read-only options --- + {"buffered_stdio", ConfigMemberBool, "", func(c *PyConfig) any { return c.BufferedStdio }}, + {"check_hash_pycs_mode", ConfigMemberWStr, "", func(c *PyConfig) any { return c.checkHashPycsMode }}, + {"code_debug_ranges", ConfigMemberBool, "", func(c *PyConfig) any { return c.CodeDebugRanges }}, + {"configure_c_stdio", ConfigMemberBool, "", func(c *PyConfig) any { return c.ConfigureCStdio }}, + {"dev_mode", ConfigMemberBool, "", func(c *PyConfig) any { return c.DevMode }}, + {"filesystem_encoding", ConfigMemberWStr, "", func(c *PyConfig) any { return c.FilesystemEncoding }}, + {"filesystem_errors", ConfigMemberWStr, "", func(c *PyConfig) any { return c.FilesystemErrors }}, + {"hash_seed", ConfigMemberULong, "", func(c *PyConfig) any { return c.HashSeed }}, + {"home", ConfigMemberWStrOpt, "", func(c *PyConfig) any { return c.Home }}, + {"import_time", ConfigMemberUint, "", func(c *PyConfig) any { return c.ImportTime }}, + {"install_signal_handlers", ConfigMemberBool, "", func(c *PyConfig) any { return c.InstallSignalHandlers }}, + {"isolated", ConfigMemberBool, "", func(c *PyConfig) any { return c.Isolated }}, + {"orig_argv", ConfigMemberWStrList, "orig_argv", func(c *PyConfig) any { return c.OrigArgv }}, + {"parse_argv", ConfigMemberBool, "", func(c *PyConfig) any { return c.ParseArgv }}, + {"pathconfig_warnings", ConfigMemberBool, "", func(c *PyConfig) any { return c.PathconfigWarnings }}, + {"program_name", ConfigMemberWStr, "", func(c *PyConfig) any { return c.ProgramName }}, + {"run_command", ConfigMemberWStrOpt, "", func(c *PyConfig) any { return c.RunCommand }}, + {"run_filename", ConfigMemberWStrOpt, "", func(c *PyConfig) any { return c.RunFilename }}, + {"run_module", ConfigMemberWStrOpt, "", func(c *PyConfig) any { return c.RunModule }}, + {"safe_path", ConfigMemberBool, "", func(c *PyConfig) any { return c.SafePath }}, + {"site_import", ConfigMemberBool, "", func(c *PyConfig) any { return c.SiteImport }}, + {"skip_source_first_line", ConfigMemberBool, "", func(c *PyConfig) any { return c.SkipSourceFirstLine }}, + {"stdio_encoding", ConfigMemberWStr, "", func(c *PyConfig) any { return c.StdioEncoding }}, + {"stdio_errors", ConfigMemberWStr, "", func(c *PyConfig) any { return c.StdioErrors }}, + {"use_frozen_modules", ConfigMemberBool, "", func(c *PyConfig) any { return c.UseFrozenModules }}, + {"use_hash_seed", ConfigMemberBool, "", func(c *PyConfig) any { return c.UseHashSeed }}, + {"user_site_directory", ConfigMemberBool, "", func(c *PyConfig) any { return c.UserSiteDirectory }}, + {"warn_default_encoding", ConfigMemberBool, "", func(c *PyConfig) any { return c.WarnDefaultEncoding }}, + + // --- Init-only options --- + {"_init_main", ConfigMemberBool, "", func(c *PyConfig) any { return c.InitMain }}, + {"_install_importlib", ConfigMemberBool, "", func(c *PyConfig) any { return c.InstallImportlib }}, + {"module_search_paths_set", ConfigMemberBool, "", func(c *PyConfig) any { return c.ModuleSearchPathsSet }}, + {"pythonpath_env", ConfigMemberWStrOpt, "", func(c *PyConfig) any { return c.PythonpathEnv }}, + {"sys_path_0", ConfigMemberWStrOpt, "", func(c *PyConfig) any { return c.SysPath0 }}, +} + +// configFindSpec locates the PYCONFIG_SPEC row for name. +// +// CPython: Python/initconfig.c:4360 config_find_spec +func configFindSpec(name string) *configSpec { + for i := range pyconfigSpec { + if pyconfigSpec[i].name == name { + return &pyconfigSpec[i] + } + } + return nil +} + +// ConfigMember is the raw value of a config option plus the metadata +// config_get needs to wrap it: its member type and, when the option is +// exposed through sys, the sys attribute name to read instead. +// +// CPython: Python/initconfig.c:4378 config_get +type ConfigMember struct { + Value any + Type ConfigMemberType + SysAttr string +} + +// ConfigGet looks up name in PYCONFIG_SPEC and returns its raw member +// from c. The bool reports whether the name is a known config option; +// an unknown name maps to the "unknown config option name" ValueError +// the caller raises. +// +// This is the gopy split of config_get: this half resolves the spec and +// reads the raw member (config_find_spec + config_get_spec_member); the +// _testcapi layer wraps the member into a Python object and handles the +// use_sys delegation, exactly as config_get does once it has the member. +// +// CPython: Python/initconfig.c:4458 PyConfig_Get +func (c *PyConfig) ConfigGet(name string) (ConfigMember, bool) { + spec := configFindSpec(name) + if spec == nil { + return ConfigMember{}, false + } + return ConfigMember{ + Value: spec.get(c), + Type: spec.typ, + SysAttr: spec.sysAttr, + }, true +} + +// ConfigNames returns the sorted list of every known config option name. +// +// CPython: Modules/_testcapi/config.c:74 _testcapi_config_names +func ConfigNames() []string { + names := make([]string, len(pyconfigSpec)) + for i := range pyconfigSpec { + names[i] = pyconfigSpec[i].name + } + sort.Strings(names) + return names +} diff --git a/marshal/code.go b/marshal/code.go index 899b2ac2c..c1af528da 100644 --- a/marshal/code.go +++ b/marshal/code.go @@ -13,6 +13,7 @@ import ( "encoding/binary" "fmt" + "github.com/tamnd/gopy/monitor" "github.com/tamnd/gopy/objects" "github.com/tamnd/gopy/specialize" ) @@ -68,16 +69,20 @@ func marshalCode(enc *encoder, c *objects.Code, flag byte) error { } } // Mirror CPython's _PyCode_GetCode pre-write deopt: walk every - // codeunit and rewrite specialized opcodes back to their adaptive - // parent, then zero each trailing cache cell. Without this step a - // .pyc would carry whatever specialization state the in-memory Code - // happened to warm by marshal time, which is non-deterministic - // across runs and breaks byte-equality with the cpython oracle. + // codeunit and recover the base opcode, rewriting specialized + // opcodes back to their adaptive parent AND stripping the + // INSTRUMENTED_ markers (and the INSTRUMENTED_LINE side table) + // that sys.settrace / sys.monitoring leave in the live bytecode, + // then zero each trailing cache cell. Without this a .pyc would + // carry whatever specialization or monitoring state the in-memory + // Code happened to warm by marshal time: non-deterministic across + // runs, and on reload an INSTRUMENTED_LINE with no monitoring data + // behind it would dispatch a NOP in place of the real opcode. // specialize.Enable on unmarshalCode re-runs Quicken so adaptive // counters get reseeded on load. // // CPython: Objects/codeobject.c:2310 _PyCode_GetCode (deopts before write) - if err := enc.writeCachedBytes(specialize.DeoptCode(c.Code), true); err != nil { + if err := enc.writeCachedBytes(monitor.BaseCode(c), true); err != nil { return err } consts := make([]any, len(c.Consts)) @@ -169,7 +174,16 @@ func unmarshalCode(d *decoder) (*objects.Code, error) { if !ok { return nil, fmt.Errorf("marshal: code.code expected bytes, got %T", codeObj) } - c.Code = code + // PyCode_New copies co_code into the per-code co_code_adaptive + // buffer that specialization and instrumentation mutate in place; + // the immutable co_code bytes object is never touched. gopy keeps + // one slice for both roles, so it must own a private copy here. + // Otherwise marshal's reference table, which dedups byte-identical + // co_code across sibling functions, hands two code objects the same + // backing array and an in-place rewrite on one corrupts the other. + // + // CPython: Objects/codeobject.c:117 _PyCode_New (co_code_adaptive copy) + c.Code = append([]byte(nil), code...) // consts tuple constsObj, err := d.read() @@ -391,21 +405,30 @@ func boolCount(b bool) int { } // splitLocalsplusnames reconstructs varnames/cellvars/freevars from -// the wire-format combined array. +// the wire-format combined array. The three buckets are not disjoint: +// an argument that is also closed over by a nested function carries +// both CO_FAST_LOCAL and CO_FAST_CELL, and CPython lists it in both +// co_varnames and co_cellvars. Routing it to cellvars only would drop +// it from co_varnames and shift the argument-name slice the frame uses +// to report keyword-only arguments. Match get_localsplus_names: select +// each bucket by an independent bit test. +// +// CPython: Objects/codeobject.c:424 get_localsplus_names func splitLocalsplusnames(names []any, kinds []byte) (varnames []string, cellvars []string, freevars []string) { for i, n := range names { s, _ := n.(string) if i >= len(kinds) { break } - switch { - case kinds[i]&coFastFree != 0: - freevars = append(freevars, s) - case kinds[i]&coFastCell != 0: - cellvars = append(cellvars, s) - default: + if kinds[i]&coFastLocal != 0 { varnames = append(varnames, s) } + if kinds[i]&coFastCell != 0 { + cellvars = append(cellvars, s) + } + if kinds[i]&coFastFree != 0 { + freevars = append(freevars, s) + } } return varnames, cellvars, freevars } @@ -424,6 +447,16 @@ func splitLocalsplusnames(names []any, kinds []byte) (varnames []string, cellvar // CPython: Objects/codeobject.c:203 intern_constants // CPython: Python/marshal.c:391 w_ref interned check. func wrapConstStrings(v any) any { + // A code object that round-tripped through Python (marshal.load then + // code.replace) carries co_consts as objects.Object values rather than + // the native Go consts a freshly-compiled gopy Code holds. Normalize + // those to the native marshal value set first so the rest of this + // function (and writeBody) sees ints, strings, tuples and code objects. + if obj, ok := v.(objects.Object); ok { + if n, err := fromObject(obj); err == nil { + v = n + } + } switch x := v.(type) { case string: if shouldInternString(x) { diff --git a/marshal/marshal.go b/marshal/marshal.go index 309d49361..9813d4e2e 100644 --- a/marshal/marshal.go +++ b/marshal/marshal.go @@ -19,6 +19,7 @@ import ( "math/big" "unsafe" + "github.com/tamnd/gopy/ast" "github.com/tamnd/gopy/objects" ) @@ -75,6 +76,30 @@ const flagRef = 0x80 // CPython: Python/marshal.c WFERR_UNMARSHALLABLE var ErrUnmarshallable = errors.New("marshal: object cannot be marshaled") +// The three EOF sentinels mirror the EOFError messages CPython's r_object / +// r_byte / r_string raise when the wire data runs out. The marshal module +// surface maps them to EOFError, every other decode error to ValueError. +// +// CPython: Python/marshal.c:833 r_string ("marshal data too short") +// CPython: Python/marshal.c:916 r_byte ("EOF read where not expected") +// CPython: Python/marshal.c:1172 r_object ("EOF read where object expected") +var ( + ErrEOFObjectExpected = errors.New("EOF read where object expected") + ErrEOFNotExpected = errors.New("EOF read where not expected") + ErrDataTooShort = errors.New("marshal data too short") +) + +// IsEOF reports whether err is one of the marshal EOF sentinels (or a raw +// io.EOF / io.ErrUnexpectedEOF that escaped conversion). The module surface +// uses it to choose EOFError over ValueError. +func IsEOF(err error) bool { + return errors.Is(err, ErrEOFObjectExpected) || + errors.Is(err, ErrEOFNotExpected) || + errors.Is(err, ErrDataTooShort) || + errors.Is(err, io.EOF) || + errors.Is(err, io.ErrUnexpectedEOF) +} + // Dump writes v to w in the version-5 wire format. // // CPython: Python/marshal.c PyMarshal_WriteObjectToFile @@ -330,6 +355,14 @@ func (e *encoder) write(v any) error { } return e.writeByte(typeFalse) } + // The Ellipsis singleton (the `...` const) is short-circuited before + // the FLAG_REF memo, exactly like None / True / False. gopy spells the + // const as ast.EllipsisType; the runtime ellipsis object maps here too. + // + // CPython: Python/marshal.c:476 w_object (v == Py_Ellipsis) + if isEllipsisValue(v) { + return e.writeByte(typeEllipsis) + } e.depth++ defer func() { e.depth-- }() @@ -560,7 +593,16 @@ func (b *byteReader) ReadByte() (byte, error) { } func (d *decoder) readByte() (byte, error) { - return d.r.ReadByte() + b, err := d.r.ReadByte() + if err != nil { + // CPython's r_byte raises EOFError "EOF read where not expected". + // CPython: Python/marshal.c:916 r_byte + if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) { + return 0, ErrEOFNotExpected + } + return 0, err + } + return b, nil } func (d *decoder) readN(n int) ([]byte, error) { @@ -568,6 +610,12 @@ func (d *decoder) readN(n int) ([]byte, error) { for i := 0; i < n; i++ { b, err := d.r.ReadByte() if err != nil { + // CPython reads byte strings through r_string, which raises + // EOFError "marshal data too short" when the buffer underruns. + // CPython: Python/marshal.c:833 r_string + if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) { + return nil, ErrDataTooShort + } return nil, err } out[i] = b @@ -598,6 +646,13 @@ func (d *decoder) readInt64() (int64, error) { func (d *decoder) read() (any, error) { tag, err := d.readByte() if err != nil { + // r_object reads the type code first; an EOF here is reported as + // "EOF read where object expected", distinct from r_byte's own + // "EOF read where not expected" used mid-object. + // CPython: Python/marshal.c:1172 r_object + if errors.Is(err, ErrEOFNotExpected) || errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) { + return nil, ErrEOFObjectExpected + } return nil, err } @@ -632,6 +687,13 @@ func (d *decoder) decodeTag(tag byte) (any, error) { return true, nil case typeFalse: return false, nil + case typeEllipsis: + // Decode to ast.EllipsisType, the same `...` const the compiler + // emits, so a marshaled-then-loaded code object round-trips + // identically to a freshly compiled one. + // + // CPython: Python/marshal.c r_object TYPE_ELLIPSIS + return ast.Ellipsis, nil case typeInt: v, err := d.readInt32() return int64(v), err @@ -835,12 +897,31 @@ func toObject(v any) (objects.Object, error) { return objects.NewFloat(x), nil case string: return objects.NewStr(x), nil + case ast.EllipsisType: + return objects.Ellipsis(), nil case objects.Object: return x, nil } return nil, fmt.Errorf("marshal: cannot convert %T to Object", v) } +// isEllipsisValue reports whether v is the marshalable Ellipsis const, +// in either of the two spellings gopy uses: the compiler emits the +// ast.EllipsisType node for a `...` literal, while a code object built +// at runtime (e.g. via code.replace) may carry the runtime ellipsis +// singleton instead. Both serialize to TYPE_ELLIPSIS. +// +// CPython: Python/marshal.c:476 w_object (v == Py_Ellipsis) +func isEllipsisValue(v any) bool { + if _, ok := v.(ast.EllipsisType); ok { + return true + } + if obj, ok := v.(objects.Object); ok { + return obj == objects.Ellipsis() + } + return false +} + // fromObject converts an objects.Object back to a plain Go marshal // value so that set and frozenset items can pass through write(). func fromObject(obj objects.Object) (any, error) { @@ -859,6 +940,20 @@ func fromObject(obj objects.Object) (any, error) { return x, nil case *objects.Code: return x, nil + case *objects.Complex: + return x.Complex128(), nil + case *objects.Bytes: + return x.Bytes(), nil + case *objects.Tuple: + out := make([]any, x.Len()) + for i := 0; i < x.Len(); i++ { + n, err := fromObject(x.Item(i)) + if err != nil { + return nil, err + } + out[i] = n + } + return out, nil } // None and str use unexported concrete types; dispatch via type slots. if obj.Type() == objects.NoneType() { diff --git a/module/_collections/module.go b/module/_collections/module.go index f2411a2da..9eab9fd8a 100644 --- a/module/_collections/module.go +++ b/module/_collections/module.go @@ -1486,7 +1486,18 @@ func defaultDictGetItem(o, key objects.Object) (objects.Object, error) { if err == nil { return v, nil } - // Key absent: call __missing__. + // Key absent: dict_subscript looks up __missing__ at the type level, so + // a defaultdict subclass that overrides it (and declines to insert) is + // honoured instead of always running defdict_missing. + // + // CPython: Objects/dictobject.c:2229 dict_subscript + missingFn, merr := objects.LookupSpecial(o, "__missing__") + if merr != nil { + return nil, merr + } + if missingFn != nil { + return objects.CallOneArg(missingFn, key) + } res, merr := defaultDictMissing([]objects.Object{o, key}, nil) if merr != nil { return nil, merr diff --git a/module/_imp/module.go b/module/_imp/module.go index 841419443..d1fc26c1d 100644 --- a/module/_imp/module.go +++ b/module/_imp/module.go @@ -1,19 +1,27 @@ // Package _imp is the gopy port of CPython's Modules/_imp module (the -// builtin half lives in Python/import.c). Only the slice consumed by -// the vendored importlib._bootstrap_external is materialized: +// builtin half lives in Python/import.c). It materializes the surface +// the vendored importlib._bootstrap / _bootstrap_external drive: // // - source_hash(key, source) Python/import.c:4869 // - pyc_magic_number_token (int) Python/import.c:4926 // - check_hash_based_pycs (str) Python/import.c:4920 +// - extension_suffixes() Python/import.c:4807 +// - find_frozen / get_frozen_object Python/import.c:4660 / 4592 +// - is_frozen / is_frozen_package Python/import.c:4720 / 4700 +// - create_builtin / exec_builtin Python/import.c:4488 / 4540 +// - create_dynamic / exec_dynamic Python/import.c:4380 / 4440 +// - _fix_co_filename Python/import.c:4318 // -// The rest of the C module (lock_held, find_frozen, create_builtin, -// ...) is intentionally absent — gopy's own imp package already serves -// those roles and importlib does not need _imp to reach them. +// The frozen / builtin entries bridge to gopy's own imp package (the +// frozen table and the inittab), which is the real store for those +// modules. create_dynamic / exec_dynamic raise ImportError: gopy cannot +// load CPython C extension shared objects. // // CPython: Python/import.c:4943 imp_module package _imp import ( + "bytes" "encoding/binary" "fmt" @@ -70,44 +78,437 @@ func buildModule() (*objects.Module, error) { })); err != nil { return nil, err } - // is_builtin / is_frozen: gopy has no frozen/builtin import path, so - // both report negative. + // is_builtin(name): 1 when name is in the inittab, else 0. (-1 for a + // loaded-builtin-on-the-frozen-path edge case never arises here.) // - // CPython: Python/import.c:4943 imp_module + // CPython: Python/import.c:4720 _imp_is_builtin_impl if err := d.SetItem(objects.NewStr("is_builtin"), - objects.NewBuiltinFunction("is_builtin", func(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + objects.NewBuiltinFunction("is_builtin", isBuiltin)); err != nil { + return nil, err + } + // is_frozen(name): True when name is a frozen module with embedded + // bytecode. + // + // CPython: Python/import.c:4740 _imp_is_frozen_impl + if err := d.SetItem(objects.NewStr("is_frozen"), + objects.NewBuiltinFunction("is_frozen", isFrozen)); err != nil { + return nil, err + } + // extension_suffixes(): gopy cannot dynamically load CPython C + // extension shared objects, so the list of extension suffixes is + // empty. ExtensionFileLoader is therefore never wired to any suffix + // in _bootstrap_external._setup. + // + // CPython: Python/import.c:4807 _imp_extension_suffixes_impl + if err := d.SetItem(objects.NewStr("extension_suffixes"), + objects.NewBuiltinFunction("extension_suffixes", func(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + suffixes := imp.ExtensionSuffixes() + items := make([]objects.Object, len(suffixes)) + for i, s := range suffixes { + items[i] = objects.NewStr(s) + } + return objects.NewList(items), nil + })); err != nil { + return nil, err + } + // find_frozen / get_frozen_object / is_frozen_package bridge to + // gopy's frozen module table (imp/frozen.go). + // + // CPython: Python/import.c:4660 _imp_find_frozen_impl + // CPython: Python/import.c:4592 _imp_get_frozen_object_impl + // CPython: Python/import.c:4700 _imp_is_frozen_package_impl + if err := d.SetItem(objects.NewStr("find_frozen"), + objects.NewBuiltinFunction("find_frozen", findFrozen)); err != nil { + return nil, err + } + if err := d.SetItem(objects.NewStr("get_frozen_object"), + objects.NewBuiltinFunction("get_frozen_object", getFrozenObject)); err != nil { + return nil, err + } + if err := d.SetItem(objects.NewStr("is_frozen_package"), + objects.NewBuiltinFunction("is_frozen_package", isFrozenPackage)); err != nil { + return nil, err + } + // create_builtin / exec_builtin bridge to the inittab. gopy's + // initfunc builds a fully-initialized module in one step, so + // create_builtin runs it and exec_builtin is a no-op. + // + // CPython: Python/import.c:4488 _imp_create_builtin + // CPython: Python/import.c:4540 _imp_exec_builtin_impl + if err := d.SetItem(objects.NewStr("create_builtin"), + objects.NewBuiltinFunction("create_builtin", createBuiltin)); err != nil { + return nil, err + } + if err := d.SetItem(objects.NewStr("exec_builtin"), + objects.NewBuiltinFunction("exec_builtin", func(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { return objects.NewInt(0), nil })); err != nil { return nil, err } - if err := d.SetItem(objects.NewStr("is_frozen"), - objects.NewBuiltinFunction("is_frozen", func(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { - return objects.NewBool(false), nil + // create_dynamic / exec_dynamic: gopy cannot load CPython C + // extension shared objects. Match CPython's failure shape with an + // ImportError rather than silently succeeding. + // + // CPython: Python/import.c:4380 _imp_create_dynamic_impl + // CPython: Python/import.c:4440 _imp_exec_dynamic_impl + if err := d.SetItem(objects.NewStr("create_dynamic"), + objects.NewBuiltinFunction("create_dynamic", createDynamic)); err != nil { + return nil, err + } + if err := d.SetItem(objects.NewStr("exec_dynamic"), + objects.NewBuiltinFunction("exec_dynamic", func(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + // gopy's create_dynamic already ran the module's Init (both the + // PEP 489 create and exec phases), so exec_dynamic is a no-op that + // reports success with 0, matching the C impl's return value. + return objects.NewInt(0), nil })); err != nil { return nil, err } + // _fix_co_filename(code, path): rewrite co_filename on a code object + // (and its nested code consts) in place. + // + // CPython: Python/import.c:4318 _imp__fix_co_filename_impl + if err := d.SetItem(objects.NewStr("_fix_co_filename"), + objects.NewBuiltinFunction("_fix_co_filename", fixCoFilename)); err != nil { + return nil, err + } // _override_frozen_modules_for_tests / _override_multi_interp_extensions_check: - // test.support.import_helper toggles these around test runs. gopy - // keeps them as no-ops returning a sentinel int matching CPython's - // previous-value convention. + // test.support.import_helper toggles these around test runs. + // _override_frozen_modules_for_tests records the override that + // use_frozen() consults (>0 on, <0 off, 0 default) and returns the + // previous value, matching the C impl. // // CPython: Python/import.c:5034 _imp__override_frozen_modules_for_tests_impl // CPython: Python/import.c:5052 _imp__override_multi_interp_extensions_check_impl if err := d.SetItem(objects.NewStr("_override_frozen_modules_for_tests"), - objects.NewBuiltinFunction("_override_frozen_modules_for_tests", func(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { - return objects.None(), nil + objects.NewBuiltinFunction("_override_frozen_modules_for_tests", func(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + override, err := signedIntArg(args, "_override_frozen_modules_for_tests") + if err != nil { + return nil, err + } + return objects.NewInt(int64(imp.SetFrozenOverride(override))), nil })); err != nil { return nil, err } if err := d.SetItem(objects.NewStr("_override_multi_interp_extensions_check"), - objects.NewBuiltinFunction("_override_multi_interp_extensions_check", func(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { - return objects.NewInt(0), nil + objects.NewBuiltinFunction("_override_multi_interp_extensions_check", func(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + override, err := signedIntArg(args, "_override_multi_interp_extensions_check") + if err != nil { + return nil, err + } + return objects.NewInt(int64(imp.SetMultiInterpOverride(override))), nil })); err != nil { return nil, err } return m, nil } +// nameArg pulls a single str positional out of args for the frozen / +// builtin query functions, which all take exactly one module name. +func nameArg(fn string, args []objects.Object) (string, error) { + if len(args) < 1 { + return "", fmt.Errorf("TypeError: %s() missing required argument", fn) + } + u, ok := args[0].(*objects.Unicode) + if !ok { + return "", fmt.Errorf("TypeError: %s() argument must be str, not '%T'", fn, args[0]) + } + return u.Value(), nil +} + +// signedIntArg pulls a single int positional out of args for the +// override toggles, which take one C int. A missing argument defaults +// to 0 (the "use default" override state). +func signedIntArg(args []objects.Object, fn string) (int, error) { + if len(args) < 1 { + return 0, nil + } + v, ok := args[0].(*objects.Int) + if !ok { + return 0, fmt.Errorf("TypeError: %s() argument must be int, not '%T'", fn, args[0]) + } + n, _ := v.Int64() + return int(n), nil +} + +// isBuiltin implements _imp.is_builtin(name). +// +// CPython: Python/import.c:4720 _imp_is_builtin_impl +func isBuiltin(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + name, err := nameArg("is_builtin", args) + if err != nil { + return nil, err + } + if imp.IsBuiltinName(name) { + return objects.NewInt(1), nil + } + return objects.NewInt(0), nil +} + +// isFrozen implements _imp.is_frozen(name): True only when the name has +// embedded bytecode (a placeholder entry with nil Code is not frozen). +// +// CPython: Python/import.c:4740 _imp_is_frozen_impl +func isFrozen(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + name, err := nameArg("is_frozen", args) + if err != nil { + return nil, err + } + if !imp.UseFrozen() { + return objects.NewBool(false), nil + } + fm, ok := imp.FindFrozen(name) + return objects.NewBool(ok && fm.HasCode()), nil +} + +// isFrozenPackage implements _imp.is_frozen_package(name). +// +// CPython: Python/import.c:4700 _imp_is_frozen_package_impl +func isFrozenPackage(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + name, err := nameArg("is_frozen_package", args) + if err != nil { + return nil, err + } + fm, ok := imp.FindFrozen(name) + if !ok || !fm.HasCode() { + return nil, fmt.Errorf("ImportError: No such frozen object named %s", name) + } + return objects.NewBool(fm.IsPackage), nil +} + +// findFrozen implements _imp.find_frozen(name, *, withdata=False). It +// returns a 3-tuple (data, is_package, origname) or None. gopy stores +// frozen modules as code objects, not marshalled blobs, so the data +// slot is always None (FrozenImporter.find_spec discards it and fetches +// the code later via get_frozen_object). +// +// CPython: Python/import.c:4660 _imp_find_frozen_impl +func findFrozen(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + name, err := nameArg("find_frozen", args) + if err != nil { + return nil, err + } + if !imp.UseFrozen() { + return objects.None(), nil + } + fm, ok := imp.FindFrozen(name) + if !ok || !fm.HasCode() { + return objects.None(), nil + } + origname, isNone := fm.Origin() + var origObj objects.Object = objects.None() + if !isNone { + origObj = objects.NewStr(origname) + } + return objects.NewTuple([]objects.Object{ + objects.None(), + objects.NewBool(fm.IsPackage), + origObj, + }), nil +} + +// getFrozenObject implements _imp.get_frozen_object(name, data=None). It +// returns the frozen module's code object. +// +// CPython: Python/import.c:4592 _imp_get_frozen_object_impl +func getFrozenObject(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + name, err := nameArg("get_frozen_object", args) + if err != nil { + return nil, err + } + + // When an explicit data buffer is supplied, CPython unmarshals it + // directly rather than consulting the frozen table; a buffer that does + // not decode to a code object raises ImportError "... is invalid". + if len(args) >= 2 && !objects.IsNone(args[1]) { + data, err := toBuffer(args[1]) + if err != nil { + return nil, fmt.Errorf("TypeError: get_frozen_object() argument 2 must be bytes, not '%T'", args[1]) + } + return unmarshalFrozenData(args[0], data) + } + + fm, ok := imp.FindFrozen(name) + if !ok || !fm.HasCode() { + return nil, fmt.Errorf("ImportError: No such frozen object named %s", name) + } + code, err := fm.CodeObject() + if err != nil { + return nil, err + } + if code == nil { + return nil, fmt.Errorf("ImportError: No such frozen object named %s", name) + } + return code, nil +} + +// unmarshalFrozenData ports unmarshal_frozen_code for the explicit-data +// path of get_frozen_object: an empty or non-code or undecodable buffer +// raises ImportError "Frozen object named %R is invalid" (a non-code +// object that decodes cleanly raises TypeError instead). +// +// CPython: Python/import.c unmarshal_frozen_code / set_frozen_error +func unmarshalFrozenData(nameObj objects.Object, data []byte) (objects.Object, error) { + nameRepr, rerr := objects.Repr(nameObj) + if rerr != nil { + return nil, rerr + } + if len(data) == 0 { + return nil, fmt.Errorf("ImportError: Frozen object named %s is invalid", nameRepr) + } + obj, err := marshal.Load(bytes.NewReader(data)) + if err != nil { + return nil, fmt.Errorf("ImportError: Frozen object named %s is invalid", nameRepr) + } + code, ok := obj.(*objects.Code) + if !ok { + return nil, fmt.Errorf("TypeError: frozen object %s is not a code object", nameRepr) + } + return code, nil +} + +// createDynamic implements _imp.create_dynamic(spec, file=None). gopy +// cannot load CPython C extension shared objects, so the load itself +// fails with ImportError. The spec.name / spec.origin validation that +// _Py_ext_module_loader_info_init_from_spec performs still runs first, +// so a name or origin with an embedded null raises ValueError exactly as +// CPython does before the unsupported-load failure. +// +// CPython: Python/import.c:4743 _imp_create_dynamic_impl +// CPython: Python/importdl.c:115 _Py_ext_module_loader_info_init +func createDynamic(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) < 1 { + return nil, fmt.Errorf("TypeError: create_dynamic() missing required argument 'spec'") + } + spec := args[0] + + nameObj, err := objects.GetAttr(spec, objects.NewStr("name")) + if err != nil { + return nil, err + } + nameStr, ok := nameObj.(*objects.Unicode) + if !ok { + return nil, fmt.Errorf("TypeError: module name must be a string") + } + if err := checkEmbeddedNull(nameStr.Value()); err != nil { + return nil, err + } + + originObj, err := objects.GetAttr(spec, objects.NewStr("origin")) + if err != nil { + return nil, err + } + origin := "" + if !objects.IsNone(originObj) { + originStr, ok := originObj.(*objects.Unicode) + if !ok { + return nil, fmt.Errorf("TypeError: module filename must be a string") + } + if err := checkEmbeddedNull(originStr.Value()); err != nil { + return nil, err + } + origin = originStr.Value() + } + + // gopy compiles its extension modules into the runtime as Go builtins + // rather than dlopening a shared object. When the spec names a + // registered extension, run its Init (the create+exec phases) behind the + // PEP 489 multiple-interpreters compat check; otherwise fall back to the + // "cannot load a C extension" ImportError. + mod, found, err := imp.CreateExtModule(nameStr.Value(), origin) + if err != nil { + return nil, err + } + if found { + return mod, nil + } + + return nil, fmt.Errorf("ImportError: gopy does not support dynamic (C extension) module loading") +} + +// checkEmbeddedNull mirrors the ValueError CPython raises when encoding a +// str that contains a NUL, the failure path the name / filename encode +// steps in _Py_ext_module_loader_info_init hit for an embedded null. +// +// CPython: Objects/unicodeobject.c PyUnicode_AsUTF8AndSize (embedded null) +func checkEmbeddedNull(s string) error { + for i := 0; i < len(s); i++ { + if s[i] == 0 { + return fmt.Errorf("ValueError: embedded null character") + } + } + return nil +} + +// createBuiltin implements _imp.create_builtin(spec). It reads spec.name +// and runs the matching inittab initializer, which builds a fully +// initialized module (gopy has no separate exec phase for builtins). +// +// CPython: Python/import.c:4488 _imp_create_builtin +func createBuiltin(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) < 1 { + return nil, fmt.Errorf("TypeError: create_builtin() missing required argument 'spec'") + } + nameObj, err := objects.GetAttr(args[0], objects.NewStr("name")) + if err != nil { + return nil, err + } + u, ok := nameObj.(*objects.Unicode) + if !ok { + return nil, fmt.Errorf("TypeError: spec.name must be str, not '%T'", nameObj) + } + name := u.Value() + initFn := imp.FindInitFunc(name) + if initFn == nil { + return nil, fmt.Errorf("ImportError: no built-in module named %s", name) + } + mod, err := initFn() + if err != nil { + return nil, err + } + mod.StampBuiltinModule() + return mod, nil +} + +// fixCoFilename implements _imp._fix_co_filename(code, path). It rewrites +// co_filename on the code object in place. +// +// CPython: Python/import.c:4318 _imp__fix_co_filename_impl +func fixCoFilename(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) < 2 { + return nil, fmt.Errorf("TypeError: _fix_co_filename() takes exactly 2 arguments") + } + code, ok := args[0].(*objects.Code) + if !ok { + return nil, fmt.Errorf("TypeError: _fix_co_filename() argument 1 must be code, not '%T'", args[0]) + } + path, ok := args[1].(*objects.Unicode) + if !ok { + return nil, fmt.Errorf("TypeError: _fix_co_filename() argument 2 must be str, not '%T'", args[1]) + } + updateCodeFilenames(code, code.Filename, path.Value()) + return objects.None(), nil +} + +// updateCodeFilenames rewrites co_filename to newname on co and on every +// nested code object reachable through co_consts that still carries the +// original oldname. A code compiled with a stale dfile (the .pyc records +// it) gets re-stamped to the real source path on import, including the +// code objects of the functions it defines. +// +// CPython: Python/import.c:4291 update_code_filenames +func updateCodeFilenames(co *objects.Code, oldname, newname string) { + if co.Filename != oldname { + return + } + co.Filename = newname + for _, c := range co.Consts { + if nested, ok := c.(*objects.Code); ok { + updateCodeFilenames(nested, oldname, newname) + } + } +} + // sourceHash mirrors _imp.source_hash(key, source). It hashes the // source buffer with SipHash-1-3 keyed by `key` and returns the result // as 8 little-endian bytes. diff --git a/module/_interpreters/module.go b/module/_interpreters/module.go index 3ec1432d1..ee6049eca 100644 --- a/module/_interpreters/module.go +++ b/module/_interpreters/module.go @@ -67,11 +67,20 @@ type interp struct { whence int refs int64 ns *objects.Dict + // ownGil and checkMulti capture the PyInterpreterConfig the interpreter + // was created with: whether it runs with its own GIL and whether it + // enforces the subinterpreter-incompatible-extension check. The default + // _PyInterpreterConfig_INIT (isolated) sets both, so a bare create() + // produces an interpreter that rejects single-phase extension imports. + // + // CPython: Include/cpython/pylifecycle.h:52 _PyInterpreterConfig_INIT + ownGil bool + checkMulti bool } var ( mu sync.Mutex - registry = map[int64]*interp{} + registry = map[int64]*interp{} nextID int64 = 1 ) @@ -114,17 +123,57 @@ func argInt(args []objects.Object, i int) (int64, error) { // create allocates a new interpreter and returns its id. // // CPython: Modules/_interpretersmodule.c:768 interp_create -func create(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { +func create(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + // The optional config selects the interpreter's isolation. The default + // (_PyInterpreterConfig_INIT) is fully isolated: its own GIL and the + // multi-interpreter extension check enabled. A "legacy" named config or + // an explicit object can relax that. + // + // CPython: Modules/_interpretersmodule.c:404 config_from_object + ownGil, checkMulti := true, true + if len(args) >= 1 && !objects.IsNone(args[0]) { + ownGil, checkMulti = configFromObject(args[0]) + } mu.Lock() defer mu.Unlock() id := nextID nextID++ ns := objects.NewDict() _ = ns.SetItem(objects.NewStr("__name__"), objects.NewStr("__main__")) - registry[id] = &interp{id: id, whence: whenceStdlib, refs: 0, ns: ns} + registry[id] = &interp{id: id, whence: whenceStdlib, refs: 0, ns: ns, ownGil: ownGil, checkMulti: checkMulti} return objects.NewInt(id), nil } +// configFromObject reads the (own_gil, check_multi_interp_extensions) pair +// from a create() config argument: a named-config string or an object whose +// attributes mirror PyInterpreterConfig. +// +// CPython: Python/interpconfig.c:262 _PyInterpreterConfig_InitFromDict +func configFromObject(cfg objects.Object) (ownGil, checkMulti bool) { + if name, ok := cfg.(*objects.Unicode); ok { + switch name.Value() { + case "legacy": + return false, false + case "empty": + return false, false + default: // "default", "isolated", "" + return true, true + } + } + ownGil, checkMulti = true, true + if gilObj, err := objects.GetAttr(cfg, objects.NewStr("gil")); err == nil { + if gilStr, ok := gilObj.(*objects.Unicode); ok { + ownGil = gilStr.Value() == "own" + } + } + if checkObj, err := objects.GetAttr(cfg, objects.NewStr("check_multi_interp_extensions")); err == nil { + if t, terr := objects.IsTruthy(checkObj); terr == nil { + checkMulti = t + } + } + return ownGil, checkMulti +} + // destroy finalizes and removes an interpreter. // // CPython: Modules/_interpretersmodule.c:874 interp_destroy @@ -298,6 +347,15 @@ func execCode(args []objects.Object, _ map[string]objects.Object) (objects.Objec // // CPython: Modules/_interpretersmodule.c _PyXI_excinfo func excinfoFor(err error) objects.Object { + // CPython's _run_in_interpreter consumes the script's exception into the + // excinfo snapshot and clears it from the interpreter, so the failure + // does not leak into later operations (a pending exception otherwise + // surfaces during the next generator finalization). Mirror that clear. + // + // CPython: Python/crossinterp.c:1700 _PyXI_excinfo_InitFromException + if objects.ClearCurrentExceptionHook != nil { + objects.ClearCurrentExceptionHook() + } typeName := "Exception" msg := err.Error() if re, ok := err.(*objects.RaisedError); ok { @@ -309,7 +367,15 @@ func excinfoFor(err error) objects.Object { } } ns := objects.NewNamespace() - _ = objects.SetAttr(ns, objects.NewStr("type"), objects.NewStr(typeName)) + // excinfo.type is itself a namespace carrying the exception type's + // __name__/__qualname__/__module__, the shape _PyXI_excinfo_TypeAsObject + // builds so callers can read exc.type.__name__. + // + // CPython: Python/crossinterp.c:1517 _PyXI_excinfo_TypeAsObject + typeNS := objects.NewNamespace() + _ = objects.SetAttr(typeNS, objects.NewStr("__name__"), objects.NewStr(typeName)) + _ = objects.SetAttr(typeNS, objects.NewStr("__qualname__"), objects.NewStr(typeName)) + _ = objects.SetAttr(ns, objects.NewStr("type"), typeNS) _ = objects.SetAttr(ns, objects.NewStr("msg"), objects.NewStr(msg)) formatted := fmt.Sprintf("%s: %s", typeName, msg) _ = objects.SetAttr(ns, objects.NewStr("formatted"), objects.NewStr(formatted)) @@ -317,6 +383,43 @@ func excinfoFor(err error) objects.Object { return ns } +// runString runs a source string in the interpreter's __main__ namespace. +// Like exec it returns None on success or an excinfo namespace on an +// unhandled exception; the high-level caller decides what to do with it. +// +// CPython: Modules/_interpretersmodule.c:1174 interp_run_string +func runString(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + id, err := argInt(args, 0) + if err != nil { + return nil, err + } + if len(args) < 2 { + return nil, fmt.Errorf("TypeError: run_string() missing 'script'") + } + script, ok := args[1].(*objects.Unicode) + if !ok { + return nil, fmt.Errorf("TypeError: run_string() argument 2 must be a string, not %s", args[1].Type().Name) + } + mu.Lock() + it, err := lookup(id) + mu.Unlock() + if err != nil { + return nil, err + } + // A subinterpreter run is a fresh-namespace exec that pushes a non-main + // interpreter state, so the PEP 489 extension compat check and the + // gh-144601 single-phase failure path observe the subinterpreter the + // same way CPython's switched-to-main init does. + // + // CPython: Modules/_interpretersmodule.c:650 _run_in_interpreter + imp.PushSubinterp(it.ownGil, it.checkMulti) + defer imp.PopSubinterp() + if _, err := builtins.Exec([]objects.Object{script, it.ns}, nil); err != nil { + return excinfoFor(err), nil + } + return objects.None(), nil +} + func isShareable(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { // gopy shares object references directly, so everything is shareable. // @@ -361,6 +464,7 @@ func buildModule() (*objects.Module, error) { {"is_running", fn("is_running", isRunning)}, {"set___main___attrs", fn("set___main___attrs", setMainAttrs)}, {"exec", fn("exec", execCode)}, + {"run_string", fn("run_string", runString)}, {"is_shareable", fn("is_shareable", isShareable)}, } for _, e := range entries { diff --git a/module/_posixsubprocess/module.go b/module/_posixsubprocess/module.go index eaddc7ddc..0fc2b140e 100644 --- a/module/_posixsubprocess/module.go +++ b/module/_posixsubprocess/module.go @@ -21,7 +21,6 @@ import ( "io" "os" "os/exec" - "runtime" "github.com/tamnd/gopy/imp" "github.com/tamnd/gopy/objects" @@ -88,14 +87,17 @@ func forkExec(args []objects.Object, _ map[string]objects.Object) (objects.Objec executable = execs[0] } - // args[4]: cwd - string or None + // args[4]: cwd - PyUnicode_FSConverter accepts str, bytes, or any + // os.PathLike (pathlib.Path), so subprocess.run(cwd=Path(...)) works. + // + // CPython: Modules/_posixsubprocess.c subprocess_fork_exec ("O&" cwd_obj) cwd := "" if args[4] != nil && args[4] != objects.None() { - s, ok := args[4].(*objects.Unicode) - if !ok { - return nil, fmt.Errorf("TypeError: cwd must be str or None") + s, err := fsConvert(args[4]) + if err != nil { + return nil, err } - cwd = s.Value() + cwd = s } // args[5]: env_list - list of "KEY=VALUE" strings or None. @@ -137,11 +139,12 @@ func forkExec(args []objects.Object, _ map[string]objects.Object) (objects.Objec // owned by Python's subprocess machinery (subprocess.py closes them // explicitly after fork_exec returns). If Go's GC fires the default // finalizer before Python calls os.close(), the fd is closed out from - // under the caller and subsequent os.close() raises EBADF. - // Pattern mirrors module/os/stat_darwin.go osFstat runtime.SetFinalizer. + // under the caller and subsequent os.close() raises EBADF. The finalizer + // is armed on the inner *os.file, so objects.ClearOSFileFinalizer reaches + // it rather than the outer handle (a SetFinalizer no-op). if p2cread >= 0 { f := os.NewFile(uintptr(p2cread), "pipe:stdin") - runtime.SetFinalizer(f, nil) + objects.ClearOSFileFinalizer(f) cmd.Stdin = f } else { cmd.Stdin = io.NopCloser(os.Stdin) @@ -151,7 +154,7 @@ func forkExec(args []objects.Object, _ map[string]objects.Object) (objects.Objec // CPython: Modules/_posixsubprocess.c:730 dup2(c2pwrite, 1) if c2pwrite >= 0 { f := os.NewFile(uintptr(c2pwrite), "pipe:stdout") - runtime.SetFinalizer(f, nil) + objects.ClearOSFileFinalizer(f) cmd.Stdout = f } else { cmd.Stdout = os.Stdout @@ -161,7 +164,7 @@ func forkExec(args []objects.Object, _ map[string]objects.Object) (objects.Objec // CPython: Modules/_posixsubprocess.c:737 dup2(errwrite, 2) if errwrite >= 0 { f := os.NewFile(uintptr(errwrite), "pipe:stderr") - runtime.SetFinalizer(f, nil) + objects.ClearOSFileFinalizer(f) cmd.Stderr = f } else { cmd.Stderr = os.Stderr @@ -216,16 +219,47 @@ func toStringSlice(obj objects.Object) ([]string, error) { return out, nil } -// objectToString converts a Python str or bytes object to a Go string. +// objectToString converts a Python str, bytes, or os.PathLike object to a +// Go string. CPython runs each argv member through fsconvert_strdup, which +// is PyUnicode_FSConverter, so pathlib.Path arguments are accepted too. +// +// CPython: Modules/_posixsubprocess.c:130 fsconvert_strdup func objectToString(obj objects.Object) (string, error) { switch v := obj.(type) { case *objects.Unicode: return v.Value(), nil case *objects.Bytes: return string(v.Bytes()), nil - default: - return "", fmt.Errorf("expected str, got %s", obj.Type().Name) } + if fspath, err := objects.GetAttr(obj, objects.NewStr("__fspath__")); err == nil { + result, err := objects.CallNoArgs(fspath) + if err != nil { + return "", err + } + return objectToString(result) + } + return "", fmt.Errorf("expected str, got %s", obj.Type().Name) +} + +// fsConvert mirrors PyUnicode_FSConverter: it accepts a str, bytes, or +// any os.PathLike (pathlib.Path) by invoking __fspath__ and recursing. +// +// CPython: Modules/posixmodule.c PyUnicode_FSConverter / PyOS_FSPath +func fsConvert(obj objects.Object) (string, error) { + switch v := obj.(type) { + case *objects.Unicode: + return v.Value(), nil + case *objects.Bytes: + return string(v.Bytes()), nil + } + if fspath, err := objects.GetAttr(obj, objects.NewStr("__fspath__")); err == nil { + result, err := objects.CallNoArgs(fspath) + if err != nil { + return "", err + } + return fsConvert(result) + } + return "", fmt.Errorf("TypeError: cwd must be str or None") } // toIntFd extracts a file descriptor integer from a Python int object. diff --git a/module/_posixsubprocess/module_test.go b/module/_posixsubprocess/module_test.go index d08744f6b..3bfc753a0 100644 --- a/module/_posixsubprocess/module_test.go +++ b/module/_posixsubprocess/module_test.go @@ -59,29 +59,29 @@ func makeArgs(argv []string, executable string, cwd string) []objects.Object { // 23 arguments in CPython clinic order: return []objects.Object{ - argList, // args (process_args) - execList, // executable_list - objects.False(), // close_fds + argList, // args (process_args) + execList, // executable_list + objects.False(), // close_fds objects.NewTuple([]objects.Object{}), // pass_fds - cwdObj, // cwd - objects.None(), // env (inherit) - intObj(-1), // p2cread - intObj(-1), // p2cwrite - intObj(-1), // c2pread - intObj(-1), // c2pwrite - intObj(-1), // errread - intObj(-1), // errwrite - intObj(-1), // errpipe_read - intObj(-1), // errpipe_write - objects.True(), // restore_signals - objects.False(), // call_setsid - intObj(-1), // pgid_to_set - objects.None(), // gid - objects.None(), // extra_groups - objects.None(), // uid - intObj(-1), // child_umask - objects.None(), // preexec_fn - objects.False(), // use_vfork + cwdObj, // cwd + objects.None(), // env (inherit) + intObj(-1), // p2cread + intObj(-1), // p2cwrite + intObj(-1), // c2pread + intObj(-1), // c2pwrite + intObj(-1), // errread + intObj(-1), // errwrite + intObj(-1), // errpipe_read + intObj(-1), // errpipe_write + objects.True(), // restore_signals + objects.False(), // call_setsid + intObj(-1), // pgid_to_set + objects.None(), // gid + objects.None(), // extra_groups + objects.None(), // uid + intObj(-1), // child_umask + objects.None(), // preexec_fn + objects.False(), // use_vfork } } @@ -107,16 +107,11 @@ func TestForkExecTrue(t *testing.T) { if err != nil { t.Fatalf("fork_exec: %v", err) } - tup, ok := result.(*objects.Tuple) + // CPython returns PyLong_FromPid(pid): fork_exec yields the child PID as + // a plain int, not a tuple. subprocess.py assigns self.pid directly. + pidObj, ok := result.(*objects.Int) if !ok { - t.Fatalf("expected tuple, got %T", result) - } - if tup.Len() < 2 { - t.Fatalf("expected at least 2-tuple, got len %d", tup.Len()) - } - pidObj, ok := tup.Item(0).(*objects.Int) - if !ok { - t.Fatalf("pid is not an int: %T", tup.Item(0)) + t.Fatalf("expected int pid, got %T", result) } pid, _ := pidObj.Int64() if pid <= 0 { @@ -137,22 +132,15 @@ func TestForkExecEcho(t *testing.T) { if err != nil { t.Fatalf("fork_exec /bin/echo: %v", err) } - tup, ok := result.(*objects.Tuple) + // CPython: Modules/_posixsubprocess.c:1325 return PyLong_FromPid(pid). + pidObj, ok := result.(*objects.Int) if !ok { - t.Fatalf("expected tuple, got %T", result) - } - pidObj, ok := tup.Item(0).(*objects.Int) - if !ok { - t.Fatalf("pid is not an int: %T", tup.Item(0)) + t.Fatalf("expected int pid, got %T", result) } pid, _ := pidObj.Int64() if pid <= 0 { t.Fatalf("expected positive PID, got %d", pid) } - // Sentinel at index 1 must be None. - if tup.Item(1) != objects.None() { - t.Fatalf("expected None sentinel at index 1, got %v", tup.Item(1)) - } } // TestForkExecMissingArgs verifies that fewer than 23 arguments returns a diff --git a/module/_testcapi/config.go b/module/_testcapi/config.go new file mode 100644 index 000000000..5b737fc96 --- /dev/null +++ b/module/_testcapi/config.go @@ -0,0 +1,173 @@ +package testcapi + +import ( + "fmt" + "math/big" + "sync" + + "github.com/tamnd/gopy/imp" + "github.com/tamnd/gopy/initconfig" + "github.com/tamnd/gopy/objects" + "github.com/tamnd/gopy/state" +) + +// defaultConfig is the fallback configuration used when the lifecycle +// has not stamped a PyConfig onto the interpreter. The cmd entry point +// still resolves paths by hand rather than running initconfig end to +// end, so PyConfig_Get reads from the layered Python defaults, which +// already carry the runtime-true knobs the config tests inspect +// (code_debug_ranges on, write_bytecode on, optimization_level zero). +// +// CPython: Python/initconfig.c:1106 PyConfig_InitPythonConfig +var ( + defaultConfigOnce sync.Once + defaultConfig initconfig.PyConfig +) + +func sharedDefaultConfig() *initconfig.PyConfig { + defaultConfigOnce.Do(func() { + defaultConfig.InitPythonConfig() + }) + return &defaultConfig +} + +// activeConfig returns the *initconfig.PyConfig the lifecycle stamped on +// the main interpreter, the live configuration _Py_GetConfig hands to +// PyConfig_Get. It falls back to the layered Python defaults until the +// cmd entry point wires initconfig through to the interpreter. +// +// CPython: Python/initconfig.c:4461 PyConfig_Get (_Py_GetConfig) +func activeConfig() (*initconfig.PyConfig, error) { + interp := state.MainInterpreter() + if interp != nil { + if cfg, ok := interp.Config.(*initconfig.PyConfig); ok && cfg != nil { + return cfg, nil + } + } + return sharedDefaultConfig(), nil +} + +// configGetObject wraps a resolved config member into the Python object +// PyConfig_Get returns: ints/uints become int, bools become bool, the +// optional wide strings become None when empty, and the wide-string +// lists become tuples. SYS_ATTR members are read back from the live sys +// module instead, matching config_get's use_sys delegation. +// +// CPython: Python/initconfig.c:4378 config_get +func configGetObject(name string) (objects.Object, error) { + cfg, err := activeConfig() + if err != nil { + return nil, err + } + member, found := cfg.ConfigGet(name) + if !found { + // CPython: Python/initconfig.c:4451 config_unknown_name_error + return nil, fmt.Errorf("ValueError: unknown config option name: %s", name) + } + + // use_sys is always 1 for PyConfig_Get: a member exposed through sys + // reads the live sys attribute so command-line and runtime overrides + // are visible. + // + // CPython: Python/initconfig.c:4382 config_get (spec->sys.attr) + if member.SysAttr != "" { + return sysRequiredAttr(member.SysAttr) + } + + switch member.Type { + case initconfig.ConfigMemberInt, initconfig.ConfigMemberUint: + return objects.NewInt(int64(member.Value.(int))), nil + case initconfig.ConfigMemberBool: + return objects.NewBool(member.Value.(int) != 0), nil + case initconfig.ConfigMemberULong: + return objects.NewIntFromBig(new(big.Int).SetUint64(member.Value.(uint64))), nil + case initconfig.ConfigMemberWStr: + return objects.NewStr(member.Value.(string)), nil + case initconfig.ConfigMemberWStrOpt: + s := member.Value.(string) + if s == "" { + return objects.None(), nil + } + return objects.NewStr(s), nil + case initconfig.ConfigMemberWStrList: + items := member.Value.([]string) + objs := make([]objects.Object, len(items)) + for i, s := range items { + objs[i] = objects.NewStr(s) + } + return objects.NewTuple(objs), nil + default: + return nil, fmt.Errorf("SystemError: unreachable config member type") + } +} + +// sysRequiredAttr mirrors _PySys_GetRequiredAttrString: read the named +// attribute from the live sys module, raising RuntimeError when sys or +// the attribute is missing. +// +// CPython: Python/sysmodule.c:99 _PySys_GetRequiredAttrString +func sysRequiredAttr(attr string) (objects.Object, error) { + mod, ok := imp.GetModule("sys") + if !ok { + return nil, fmt.Errorf("RuntimeError: lost sys module") + } + v, err := mod.Dict().GetItem(objects.NewStr(attr)) + if err != nil { + return nil, err + } + if v == nil { + return nil, fmt.Errorf("RuntimeError: lost sys.%s", attr) + } + return v, nil +} + +// configGet ports _testcapi.config_get: parse the option name and return +// PyConfig_Get(name). +// +// CPython: Modules/_testcapi/config.c:4 _testcapi_config_get +func configGet(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) != 1 { + return nil, fmt.Errorf("TypeError: config_get expected 1 argument, got %d", len(args)) + } + name, ok := args[0].(*objects.Unicode) + if !ok { + return nil, fmt.Errorf("TypeError: argument must be str, not %s", args[0].Type().Name) + } + return configGetObject(name.Value()) +} + +// configGetint ports _testcapi.config_getint: PyConfig_GetInt(name), +// which is PyConfig_Get(name) constrained to an int result. +// +// CPython: Modules/_testcapi/config.c:16 _testcapi_config_getint +func configGetint(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) != 1 { + return nil, fmt.Errorf("TypeError: config_getint expected 1 argument, got %d", len(args)) + } + name, ok := args[0].(*objects.Unicode) + if !ok { + return nil, fmt.Errorf("TypeError: argument must be str, not %s", args[0].Type().Name) + } + obj, err := configGetObject(name.Value()) + if err != nil { + return nil, err + } + // CPython: Python/initconfig.c:4478 PyConfig_GetInt (PyLong_Check) + if _, ok := obj.(*objects.Int); !ok { + return nil, fmt.Errorf("TypeError: config option %s is not an int", name.Value()) + } + return obj, nil +} + +// configNames ports _testcapi.config_names: the frozenset of every known +// config option name. +// +// CPython: Modules/_testcapi/config.c:32 _testcapi_config_names +func configNames(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + names := initconfig.ConfigNames() + items := make([]objects.Object, len(names)) + for i, n := range names { + items[i] = objects.NewStr(n) + } + return objects.NewFrozenset(items) +} diff --git a/module/_testcapi/module.go b/module/_testcapi/module.go index d100c7dee..eb7128111 100644 --- a/module/_testcapi/module.go +++ b/module/_testcapi/module.go @@ -17,6 +17,7 @@ import ( "math" "math/big" + "github.com/tamnd/gopy/builtins" "github.com/tamnd/gopy/imp" "github.com/tamnd/gopy/objects" ) @@ -240,6 +241,10 @@ func buildModule() (*objects.Module, error) { {"bad_get", badGet}, {"set_nomemory", setNomemory}, {"remove_mem_hooks", removeMemHooks}, + {"config_get", configGet}, + {"config_getint", configGetint}, + {"config_names", configNames}, + {"run_in_subinterp", runInSubinterp}, } for _, w := range wrappers { if err := d.SetItem(objects.NewStr(w.name), objects.NewBuiltinFunction(w.name, w.fn)); err != nil { @@ -312,6 +317,34 @@ func buildModule() (*objects.Module, error) { return m, nil } +// runInSubinterp ports _testcapi.run_in_subinterp(code). CPython creates a +// fresh subinterpreter with Py_NewInterpreter, runs code through +// PyRun_SimpleStringFlags, ends the interpreter, and returns the status. +// gopy has no single-phase C extensions to isolate, so the faithful +// behaviour is a fresh-namespace exec returning the PyRun_SimpleString +// status code. +// +// CPython: Modules/_testcapimodule.c:1969 run_in_subinterp +func runInSubinterp(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) != 1 { + return nil, fmt.Errorf("TypeError: run_in_subinterp() takes exactly one argument (%d given)", len(args)) + } + code, ok := args[0].(*objects.Unicode) + if !ok { + return nil, fmt.Errorf("TypeError: run_in_subinterp() argument must be str, not %s", args[0].Type().Name) + } + // Py_NewInterpreter builds a legacy subinterpreter: it shares the main + // GIL and leaves check_multi_interp_extensions off, and it has its own + // sys.modules so any extension re-imports through import_find_extension. + // Push that interpreter state for the duration of the run so the script's + // "assert name not in sys.modules" holds and the re-import copies m_copy. + // + // CPython: Modules/_testcapimodule.c:1969 run_in_subinterp (Py_NewInterpreter) + imp.PushSubinterp(false, false) + defer imp.PopSubinterp() + return objects.NewInt(int64(builtins.RunInFreshNamespace(code.Value()))), nil +} + // setNomemory ports _testcapi.set_nomemory(start[, stop]). It arms the // allocation-fault injector so the allocation request at ordinal start // (counting from the call) begins failing, continuing until ordinal stop; diff --git a/module/_testinternalcapi/module.go b/module/_testinternalcapi/module.go index 993534b75..0b616fa1b 100644 --- a/module/_testinternalcapi/module.go +++ b/module/_testinternalcapi/module.go @@ -11,7 +11,9 @@ package testinternalcapi import ( "fmt" + "github.com/tamnd/gopy/builtins" "github.com/tamnd/gopy/imp" + "github.com/tamnd/gopy/module/sys" "github.com/tamnd/gopy/objects" ) @@ -31,6 +33,9 @@ func buildModule() (*objects.Module, error) { {"has_split_table", hasSplitTable}, {"get_static_builtin_types", getStaticBuiltinTypes}, {"identify_type_slot_wrappers", identifyTypeSlotWrappers}, + {"get_recursion_depth", getRecursionDepth}, + {"run_in_subinterp_with_config", runInSubinterpWithConfig}, + {"clear_extension", clearExtension}, } for _, f := range fns { if err := d.SetItem(objects.NewStr(f.name), objects.NewBuiltinFunction(f.name, f.fn)); err != nil { @@ -64,6 +69,103 @@ func buildModule() (*objects.Module, error) { return m, nil } +// runInSubinterpWithConfig ports run_in_subinterp_with_config(code, config, +// xi=False). CPython spins up a fresh PyInterpreterState configured by the +// PyInterpreterConfig the test built, runs the code with +// PyRun_SimpleStringFlags, tears the interpreter down, and returns that +// status. gopy compiles every extension into the runtime as a Go builtin +// (multi-phase by construction), so the config's isolation and +// check_multi_interp_extensions fields never reject an import: a faithful +// run is a fresh-namespace exec whose only observable output is the +// PyRun_SimpleString status code. The config object is accepted and +// ignored. +// +// CPython: Modules/_testinternalcapi.c:1816 run_in_subinterp_with_config +func runInSubinterpWithConfig(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) < 1 { + return nil, fmt.Errorf("TypeError: run_in_subinterp_with_config() missing required argument 'code' (pos 1)") + } + code, ok := args[0].(*objects.Unicode) + if !ok { + return nil, fmt.Errorf("TypeError: run_in_subinterp_with_config() argument 'code' must be str, not %s", args[0].Type().Name) + } + + // gopy cannot spin up a real OS-level subinterpreter, so the run is a + // fresh-namespace exec. The config's gil and check_multi_interp_extensions + // fields are honoured: they are pushed onto the interpreter-state stack + // the PEP 489 extension compat check (imp.CheckExtSubinterpCompat) reads, + // so importing an incompatible extension from the script raises the same + // ImportError CPython's subinterpreter would. own_gil follows + // config.gil == 'own' (the ISOLATED gil=2 case). + // + // CPython: Python/pylifecycle.c:586 init_interp_create_gil (own_gil) + ownGil, checkMulti := false, false + if len(args) >= 2 && !objects.IsNone(args[1]) { + config := args[1] + if gilObj, err := objects.GetAttr(config, objects.NewStr("gil")); err == nil { + if gilStr, ok := gilObj.(*objects.Unicode); ok { + ownGil = gilStr.Value() == "own" + } + } + if checkObj, err := objects.GetAttr(config, objects.NewStr("check_multi_interp_extensions")); err == nil { + if t, terr := objects.IsTruthy(checkObj); terr == nil { + checkMulti = t + } + } + } + + imp.PushSubinterp(ownGil, checkMulti) + defer imp.PopSubinterp() + return objects.NewInt(int64(builtins.RunInFreshNamespace(code.Value()))), nil +} + +// clearExtension ports clear_extension(name, filename): it clears all +// internally cached data for a single-phase extension module so the test +// suite can re-import it fresh. It delegates to _PyImport_ClearExtension. +// +// CPython: Modules/_testinternalcapi.c:893 clear_extension +// +// (Python/import.c:903 _PyImport_ClearExtension) +func clearExtension(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) != 2 { + return nil, fmt.Errorf("TypeError: clear_extension() takes exactly 2 arguments (%d given)", len(args)) + } + name, ok := args[0].(*objects.Unicode) + if !ok { + return nil, fmt.Errorf("TypeError: clear_extension() argument 1 must be str, not %s", args[0].Type().Name) + } + path := "" + if !objects.IsNone(args[1]) { + filename, ok := args[1].(*objects.Unicode) + if !ok { + return nil, fmt.Errorf("TypeError: clear_extension() argument 2 must be str, not %s", args[1].Type().Name) + } + path = filename.Value() + } + if err := imp.ClearExtension(name.Value(), path); err != nil { + return nil, err + } + return objects.None(), nil +} + +// getRecursionDepth returns the Python recursion depth of the caller, +// matching tstate->py_recursion_limit - tstate->py_recursion_remaining. +// gopy tracks depth by the active interpreter-frame chain, so the count +// of frames from the caller back to the root is the same quantity. The +// C probe pushes no Python frame, so the caller's frame is the base. +// +// CPython: Modules/_testinternalcapi.c:110 get_recursion_depth +func getRecursionDepth(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if sys.CurrentInterpreterFrameHook == nil { + return objects.NewInt(0), nil + } + depth := int64(0) + for f := sys.CurrentInterpreterFrameHook(); f != nil; f = f.FrameBack() { + depth++ + } + return objects.NewInt(depth), nil +} + // hasInlineValues reports whether obj currently keeps its attributes in // the type's inline-values array. It mirrors the C probe: the owning type // must carry Py_TPFLAGS_INLINE_VALUES and the instance's value array must @@ -89,7 +191,8 @@ func hasInlineValues(args []objects.Object, _ map[string]objects.Object) (object // inheritance across the static type set. // // CPython: Modules/_testinternalcapi.c:2334 get_static_builtin_types -// (Objects/typeobject.c _PyStaticType_GetBuiltins) +// +// (Objects/typeobject.c _PyStaticType_GetBuiltins) func getStaticBuiltinTypes(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { types := []*objects.Type{ objects.ObjectType(), objects.TypeType(), @@ -112,7 +215,8 @@ func getStaticBuiltinTypes(_ []objects.Object, _ map[string]objects.Object) (obj // resolve to a wrapper_descriptor on that type. // // CPython: Objects/typeobject.c:11494 _PyType_GetSlotWrapperNames -// (Objects/typeobject.c:10952 slotdefs) +// +// (Objects/typeobject.c:10952 slotdefs) var slotWrapperNames = []string{ "__getattribute__", "__getattr__", "__setattr__", "__delattr__", "__repr__", "__hash__", "__call__", "__str__", @@ -137,7 +241,8 @@ var slotWrapperNames = []string{ // slotdefs table. // // CPython: Modules/_testinternalcapi.c:2341 identify_type_slot_wrappers -// (Objects/typeobject.c:11494 _PyType_GetSlotWrapperNames) +// +// (Objects/typeobject.c:11494 _PyType_GetSlotWrapperNames) func identifyTypeSlotWrappers(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { items := make([]objects.Object, len(slotWrapperNames)) for i, n := range slotWrapperNames { diff --git a/module/_testmultiphase/module.go b/module/_testmultiphase/module.go new file mode 100644 index 000000000..e10b3c44d --- /dev/null +++ b/module/_testmultiphase/module.go @@ -0,0 +1,277 @@ +// Package testmultiphase is the gopy port of CPython's +// Modules/_testmultiphase.c, the C extension that exercises multi-phase +// initialization of extension modules (PEP 489). The standard-library +// test suite reaches for it indirectly: test.test_importlib.util runs +// import_helper.import_module("_testmultiphase") at import time, so any +// test that pulls in that helper (test_pkgutil, test_pyclbr, the +// test_importlib extension suites) raises SkipTest when the module is +// absent. +// +// gopy cannot dlopen the compiled extension, so the main module is +// reproduced as a Go-native inittab entry: the same name, methods, types +// and constants the C execfunc installs. The many PyInit__testmultiphase_* +// variants (nonmodule, bad_slot_*, negative_size, ...) drive the +// extension-loader edge cases in test_importlib and are added when those +// suites need them. +// +// CPython: Modules/_testmultiphase.c:447 PyInit__testmultiphase +// CPython: Modules/_testmultiphase.c:392 execfunc +package testmultiphase + +import ( + "fmt" + + pyerrors "github.com/tamnd/gopy/errors" + "github.com/tamnd/gopy/imp" + "github.com/tamnd/gopy/objects" +) + +func init() { + // gopy cannot dlopen a compiled extension, so each PyInit_* entry the C + // extension exposes is registered as a gopy extension module keyed by + // name, carrying the PEP 489 Py_mod_multiple_interpreters slot value its + // PyModuleDef declares. _imp.create_dynamic dispatches here and applies + // the subinterpreter compat check before running the body. + // + // CPython: Modules/_testmultiphase.c:438 main_slots (PER_INTERPRETER_GIL) + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testmultiphase", + HasMultiInterpSlot: true, + MultiInterp: imp.MultiInterpPerInterpreterGIL, + Init: func() (*objects.Module, error) { return buildModule("_testmultiphase") }, + }) + // CPython: Modules/_testmultiphase.c:943 non_isolated_slots (NOT_SUPPORTED) + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_test_non_isolated", + HasMultiInterpSlot: true, + MultiInterp: imp.MultiInterpNotSupported, + Init: func() (*objects.Module, error) { return buildModule("_test_non_isolated") }, + }) + // CPython: Modules/_testmultiphase.c:964 shared_gil_only_slots (SUPPORTED, explicit) + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_test_shared_gil_only", + HasMultiInterpSlot: true, + MultiInterp: imp.MultiInterpSupported, + Init: func() (*objects.Module, error) { return buildModule("_test_shared_gil_only") }, + }) + // CPython: Modules/_testmultiphase.c:980 no_multiple_interpreter_slot_slots (no slot) + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_test_no_multiple_interpreter_slot", + HasMultiInterpSlot: false, + Init: func() (*objects.Module, error) { return buildModule("_test_no_multiple_interpreter_slot") }, + }) +} + +// exampleObject backs _testimportexec.Example: a GC type whose attribute +// store is an explicit x_attr dict consulted ahead of the generic +// attribute machinery. +// +// CPython: Modules/_testmultiphase.c:25 ExampleObject +type exampleObject struct { + objects.Header + xAttr *objects.Dict +} + +// exampleType / strType / errorType are the singletons installed by +// execfunc. +// +// CPython: Modules/_testmultiphase.c:124 Example_Type_spec +// CPython: Modules/_testmultiphase.c:366 Str_Type_spec +// CPython: Modules/_testmultiphase.c:399 PyErr_NewException("_testimportexec.error") +var ( + exampleType *objects.Type + strType *objects.Type + errorType *objects.Type +) + +// exampleDemo ports Example_demo: demo(o=None) returns o when it is a +// str, otherwise None. +// +// CPython: Modules/_testmultiphase.c:57 Example_demo +func exampleDemo(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) < 1 { + return nil, fmt.Errorf("TypeError: demo() missing self argument") + } + rest := args[1:] + if len(rest) > 1 { + return nil, fmt.Errorf("TypeError: demo() takes at most 1 argument (%d given)", len(rest)) + } + if len(rest) == 1 { + if _, ok := rest[0].(*objects.Unicode); ok { + return rest[0], nil + } + } + return objects.None(), nil +} + +// exampleGetattro ports Example_getattro: consult x_attr first, then fall +// back to PyObject_GenericGetAttr. +// +// CPython: Modules/_testmultiphase.c:77 Example_getattro +func exampleGetattro(o objects.Object, name objects.Object) (objects.Object, error) { + self, ok := o.(*exampleObject) + if ok && self.xAttr != nil { + found, err := self.xAttr.Contains(name) + if err != nil { + return nil, err + } + if found { + v, err := self.xAttr.GetItem(name) + if err != nil { + return nil, err + } + objects.Incref(v) + return v, nil + } + } + return objects.GenericGetAttr(o, name) +} + +// exampleSetattro ports Example_setattr: store into the lazily created +// x_attr dict; a delete of a missing key raises AttributeError. +// +// CPython: Modules/_testmultiphase.c:93 Example_setattr +func exampleSetattro(o objects.Object, name objects.Object, value objects.Object) error { + self, ok := o.(*exampleObject) + if !ok { + return fmt.Errorf("TypeError: not an Example") + } + if self.xAttr == nil { + self.xAttr = objects.NewDict() + } + if value == nil { + found, err := self.xAttr.Contains(name) + if err != nil { + return err + } + if !found { + return fmt.Errorf("AttributeError: delete non-existing Example attribute") + } + return self.xAttr.DelItem(name) + } + return self.xAttr.SetItem(name, value) +} + +// exampleTraverse keeps x_attr reachable for the collector. +// +// CPython: Modules/_testmultiphase.c:42 Example_traverse +func exampleTraverse(o objects.Object, visit objects.Visitor) error { + self, ok := o.(*exampleObject) + if !ok || self.xAttr == nil { + return nil + } + return visit(self.xAttr) +} + +// exampleNew constructs a bare Example instance. +// +// CPython: Modules/_testmultiphase.c:124 Example_Type_spec (tp_new via +// PyType_GenericNew default) +func exampleNew(cls *objects.Type, _ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + o := &exampleObject{} + o.Init(cls) + return o, nil +} + +// testexportFoo ports testexport_foo: foo(i, j) returns i + j. +// +// CPython: Modules/_testmultiphase.c:308 testexport_foo +func testexportFoo(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) != 2 { + return nil, fmt.Errorf("TypeError: foo() takes exactly 2 arguments (%d given)", len(args)) + } + i, ok := args[0].(*objects.Int) + if !ok { + return nil, fmt.Errorf("TypeError: an integer is required (got type %s)", args[0].Type().Name) + } + j, ok := args[1].(*objects.Int) + if !ok { + return nil, fmt.Errorf("TypeError: an integer is required (got type %s)", args[1].Type().Name) + } + iv, _ := i.Int64() + jv, _ := j.Int64() + return objects.NewInt(iv + jv), nil +} + +// callStateRegistrationFunc ports call_state_registration_func. gopy has +// no per-module C state registry (PyState_FindModule / PyState_AddModule +// / PyState_RemoveModule), so the lookup case returns None and the +// add/remove cases are no-ops; the function exists only so the main +// module's surface matches the extension. +// +// CPython: Modules/_testmultiphase.c:328 call_state_registration_func +func callStateRegistrationFunc(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) != 1 { + return nil, fmt.Errorf("TypeError: call_state_registration_func() takes exactly 1 argument (%d given)", len(args)) + } + if _, ok := args[0].(*objects.Int); !ok { + return nil, fmt.Errorf("TypeError: an integer is required (got type %s)", args[0].Type().Name) + } + return objects.None(), nil +} + +func init() { + // CPython: Modules/_testmultiphase.c:114 Example_Type_slots + exampleType = objects.NewType("Example", []*objects.Type{objects.ObjectType()}) + exampleType.Module = "_testimportexec" + exampleType.TpFlags |= objects.TpFlagHaveGC + exampleType.TpNew = exampleNew + exampleType.Getattro = exampleGetattro + exampleType.Setattro = exampleSetattro + exampleType.TpTraverse = exampleTraverse + objects.SetTypeDescr(exampleType, "demo", objects.NewMethodDescr(exampleType, "demo", exampleDemo)) + + // CPython: Modules/_testmultiphase.c:361 Str_Type_slots (Py_tp_base + // filled with &PyUnicode_Type in execfunc). + strType = objects.NewType("Str", []*objects.Type{objects.StrType()}) + strType.Module = "_testimportexec" + strType.TpFlags |= objects.TpFlagBasetype + + // CPython: Modules/_testmultiphase.c:399 PyErr_NewException + errorType = pyerrors.NewExcType("error", []*objects.Type{pyerrors.PyExc_Exception}) + errorType.Module = "_testimportexec" +} + +// buildModule assembles the _testmultiphase main module: the exported +// methods plus the Example/error/Str types and the int_const/str_const +// constants execfunc installs. +// +// CPython: Modules/_testmultiphase.c:392 execfunc +// CPython: Modules/_testmultiphase.c:444 main_def +func buildModule(name string) (*objects.Module, error) { + m := objects.NewModule(name) + d := m.Dict() + + // CPython: Modules/_testmultiphase.c:374 testexport_methods + methods := []struct { + name string + fn func([]objects.Object, map[string]objects.Object) (objects.Object, error) + }{ + {"foo", testexportFoo}, + {"call_state_registration_func", callStateRegistrationFunc}, + } + for _, mm := range methods { + if err := d.SetItem(objects.NewStr(mm.name), objects.NewBuiltinFunction(mm.name, mm.fn)); err != nil { + return nil, err + } + } + + if err := d.SetItem(objects.NewStr("Example"), exampleType); err != nil { + return nil, err + } + if err := d.SetItem(objects.NewStr("error"), errorType); err != nil { + return nil, err + } + if err := d.SetItem(objects.NewStr("Str"), strType); err != nil { + return nil, err + } + // CPython: Modules/_testmultiphase.c:415 PyModule_AddIntConstant int_const 1969 + if err := d.SetItem(objects.NewStr("int_const"), objects.NewInt(1969)); err != nil { + return nil, err + } + // CPython: Modules/_testmultiphase.c:419 PyModule_AddStringConstant str_const + if err := d.SetItem(objects.NewStr("str_const"), objects.NewStr("something different")); err != nil { + return nil, err + } + return m, nil +} diff --git a/module/_testsinglephase/module.go b/module/_testsinglephase/module.go new file mode 100644 index 000000000..716efec0a --- /dev/null +++ b/module/_testsinglephase/module.go @@ -0,0 +1,393 @@ +// Package testsinglephase is the gopy port of CPython's +// Modules/_testsinglephase.c, the legacy single-phase-init extension the +// import test suite drives through ExtensionFileLoader. CPython ships one +// compiled .so exposing several PyInit_ entry points; gopy registers each +// one as an extension module keyed by name, carrying the single-phase +// marker and the PyModuleDef m_size the import machinery reads. +// +// The variants mirror the kinds Python/import.c documents: +// - "basic" (_testsinglephase, m_size == -1): no per-module state, a +// process-global initialized_count, cached, reloaded from m_copy. +// - the indirect (_basic_wrapper) and direct (_basic_copy) basic variants. +// - "reinit" (_with_reinit, m_size == 0): re-runs init, no state. +// - "with state" (_with_state, m_size > 0): per-module state, re-runs init. +// - the *_check_cache_first variants: return PyState_FindModule first. +// - _testsinglephase_raise_exception: PyInit raises and returns NULL. +// +// CPython: Modules/_testsinglephase.c:489 init__testsinglephase_basic +package testsinglephase + +import ( + "fmt" + "sync" + "time" + + pyerrors "github.com/tamnd/gopy/errors" + "github.com/tamnd/gopy/imp" + "github.com/tamnd/gopy/objects" +) + +// variant selects how a registered entry stores its module state. +type variant int + +const ( + vBasic variant = iota // m_size == -1, process-global state + vReinit // m_size == 0, no state + vWithState // m_size > 0, per-module state +) + +func init() { + // gopy cannot dlopen the compiled extension, so each PyInit_ entry the C + // module exposes is registered as a single-phase gopy extension module. + // _imp.create_dynamic dispatches here; the single-phase marker drives the + // subinterpreter compat gate and the m_size selects the reload behaviour. + // + // CPython: Modules/_testsinglephase.c:489 _testsinglephase_basic + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testsinglephase", + SinglePhase: true, + MSize: -1, + Init: func() (*objects.Module, error) { return buildBasic("_testsinglephase") }, + }) + // PyInit__testsinglephase_basic_wrapper just calls PyInit__testsinglephase, + // so it shares the def (and modules_by_index slot) and builds a module + // named "_testsinglephase". + // + // CPython: Modules/_testsinglephase.c:537 PyInit__testsinglephase_basic_wrapper + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testsinglephase_basic_wrapper", + SinglePhase: true, + MSize: -1, + DefName: "_testsinglephase", + ShareDefWith: "_testsinglephase", + Init: func() (*objects.Module, error) { return buildBasic("_testsinglephase") }, + }) + // PyInit__testsinglephase_basic_copy has its own def but shares the basic + // methods and the process-global state. + // + // CPython: Modules/_testsinglephase.c:544 PyInit__testsinglephase_basic_copy + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testsinglephase_basic_copy", + SinglePhase: true, + MSize: -1, + Init: func() (*objects.Module, error) { return buildBasic("_testsinglephase_basic_copy") }, + }) + // CPython: Modules/_testsinglephase.c:582 PyInit__testsinglephase_with_reinit + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testsinglephase_with_reinit", + SinglePhase: true, + MSize: 0, + Init: func() (*objects.Module, error) { return buildStateful("_testsinglephase_with_reinit", vReinit) }, + }) + // CPython: Modules/_testsinglephase.c:659 PyInit__testsinglephase_with_state + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testsinglephase_with_state", + SinglePhase: true, + MSize: 42, + Init: func() (*objects.Module, error) { return buildStateful("_testsinglephase_with_state", vWithState) }, + }) + // The *_check_cache_first variants return PyState_FindModule(def) before + // creating a fresh module and are never recorded in the extensions cache. + // + // CPython: Modules/_testsinglephase.c:704 _check_cache_first modules + for _, cc := range []struct { + name string + mSize int + }{ + {"_testsinglephase_check_cache_first", -1}, + {"_testsinglephase_with_reinit_check_cache_first", 0}, + {"_testsinglephase_with_state_check_cache_first", 42}, + } { + name := cc.name + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: name, + SinglePhase: true, + MSize: cc.mSize, + CheckCacheFirst: true, + Init: func() (*objects.Module, error) { return buildCheckCacheFirst(name) }, + }) + } + // CPython: Modules/_testsinglephase.c:805 PyInit__testsinglephase_raise_exception + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testsinglephase_raise_exception", + SinglePhase: true, + MSize: -1, + Init: raiseException, + }) + // _testsinglephase_circular manages its own static cache (a process-global + // pointer) and imports a helper module from PyInit before adding itself to + // sys.modules, the gh-123950 circular-import fixture. Its def leaves m_size + // unset, so it is the reinit (m_size == 0) kind. + // + // CPython: Modules/_testsinglephase.c:780 PyInit__testsinglephase_circular + imp.RegisterExtModule(&imp.ExtModuleDef{ + Name: "_testsinglephase_circular", + SinglePhase: true, + MSize: 0, + Init: buildCircular, + }) +} + +// errorType is the _testsinglephase.error exception the module installs. +// +// CPython: Modules/_testsinglephase.c:303 PyErr_NewException("_testsinglephase.error") +var errorType = pyerrors.NewExcType("_testsinglephase.error", []*objects.Type{pyerrors.PyExc_Exception}) + +// moduleState mirrors the C module_state: the time the state was +// initialized. A zero initialized time means uninitialized. +// +// CPython: Modules/_testsinglephase.c:174 module_state +type moduleState struct { + initialized time.Time +} + +// notInitialized is global_state.initialized_count's sentinel value before +// the basic module is loaded or after _clear_globals. +// +// CPython: Modules/_testsinglephase.c:229 NOT_INITIALIZED +const notInitialized = -1 + +// globalState mirrors the C global_state shared by the basic module and its +// variants across (sub)interpreters: an initialized count and a single +// module_state. +// +// CPython: Modules/_testsinglephase.c:185 global_state +var globalState = struct { + mu sync.Mutex + initializedCount int64 + module moduleState +}{initializedCount: notInitialized} + +func secondsSinceEpoch(t time.Time) float64 { + if t.IsZero() { + return 0 + } + return float64(t.UnixNano()) / 1e9 +} + +// commonSum ports common_sum: sum(i, j) returns i + j. +// +// CPython: Modules/_testsinglephase.c:396 common_sum +func commonSum(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) != 2 { + return nil, fmt.Errorf("TypeError: sum() takes exactly 2 arguments (%d given)", len(args)) + } + i, ok := args[0].(*objects.Int) + if !ok { + return nil, fmt.Errorf("TypeError: an integer is required (got type %s)", args[0].Type().Name) + } + j, ok := args[1].(*objects.Int) + if !ok { + return nil, fmt.Errorf("TypeError: an integer is required (got type %s)", args[1].Type().Name) + } + iv, _ := i.Int64() + jv, _ := j.Int64() + return objects.NewInt(iv + jv), nil +} + +// raiseException is PyInit__testsinglephase_raise_exception: it sets +// RuntimeError("evil") and returns NULL, the gh-144601 fixture for a +// PyInit that fails. +// +// CPython: Modules/_testsinglephase.c:805 PyInit__testsinglephase_raise_exception +func raiseException() (*objects.Module, error) { + exc := pyerrors.New(pyerrors.PyExc_RuntimeError, objects.NewTuple([]objects.Object{objects.NewStr("evil")})) + return nil, objects.NewRaisedError(exc, "") +} + +// installCommon installs the methods and constants every variant shares: +// look_up_self, sum, state_initialized, plus the error type and the +// int_const / str_const / _module_initialized attributes init_module sets. +// +// CPython: Modules/_testsinglephase.c:325 init_module +func installCommon(m *objects.Module, st *moduleState, hasState bool) error { + d := m.Dict() + methods := []struct { + name string + fn func([]objects.Object, map[string]objects.Object) (objects.Object, error) + }{ + {"look_up_self", func([]objects.Object, map[string]objects.Object) (objects.Object, error) { + return imp.ModuleSelf(m), nil + }}, + {"sum", commonSum}, + {"state_initialized", func([]objects.Object, map[string]objects.Object) (objects.Object, error) { + // common_state_initialized returns None when the module has no + // state (m_size == 0); otherwise the seconds-since-epoch the + // state was initialized (0.0 once cleared). + if !hasState { + return objects.None(), nil + } + return objects.NewFloat(secondsSinceEpoch(st.initialized)), nil + }}, + } + for _, mm := range methods { + if err := d.SetItem(objects.NewStr(mm.name), objects.NewBuiltinFunction(mm.name, mm.fn)); err != nil { + return err + } + } + // CPython: Modules/_testsinglephase.c:303 state->error + if err := d.SetItem(objects.NewStr("error"), errorType); err != nil { + return err + } + // CPython: Modules/_testsinglephase.c:308 state->int_const 1969 + if err := d.SetItem(objects.NewStr("int_const"), objects.NewInt(1969)); err != nil { + return err + } + // CPython: Modules/_testsinglephase.c:313 state->str_const + if err := d.SetItem(objects.NewStr("str_const"), objects.NewStr("something different")); err != nil { + return err + } + // CPython: Modules/_testsinglephase.c:338 _module_initialized + if err := d.SetItem(objects.NewStr("_module_initialized"), objects.NewFloat(secondsSinceEpoch(st.initialized))); err != nil { + return err + } + return nil +} + +// buildBasic ports init__testsinglephase_basic: the basic module shares the +// process-global module_state and bumps the global initialized_count. +// state_initialized reads that shared state, so it returns 0.0 (not None) +// once the globals are cleared. +// +// CPython: Modules/_testsinglephase.c:497 init__testsinglephase_basic +func buildBasic(defName string) (*objects.Module, error) { + globalState.mu.Lock() + if globalState.initializedCount == notInitialized { + globalState.initializedCount = 0 + } + // clear_state then init_state: stamp the global state's initialized time. + globalState.module.initialized = time.Now() + st := globalState.module + globalState.initializedCount++ + globalState.mu.Unlock() + + m := objects.NewModule(defName) + // state_initialized must read the live global state, not a copy, so the + // closure captures &globalState.module. + if err := installCommon(m, &globalState.module, true); err != nil { + return nil, err + } + // Re-stamp _module_initialized from the snapshot taken under the lock so + // it matches state_initialized at load time. + if err := m.Dict().SetItem(objects.NewStr("_module_initialized"), objects.NewFloat(secondsSinceEpoch(st.initialized))); err != nil { + return nil, err + } + d := m.Dict() + if err := d.SetItem(objects.NewStr("initialized_count"), objects.NewBuiltinFunction("initialized_count", basicInitializedCount)); err != nil { + return nil, err + } + if err := d.SetItem(objects.NewStr("_clear_globals"), objects.NewBuiltinFunction("_clear_globals", basicClearGlobals)); err != nil { + return nil, err + } + return m, nil +} + +// basicInitializedCount ports basic_initialized_count. +// +// CPython: Modules/_testsinglephase.c:416 basic_initialized_count +func basicInitializedCount(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + globalState.mu.Lock() + defer globalState.mu.Unlock() + return objects.NewInt(globalState.initializedCount), nil +} + +// basicClearGlobals ports basic__clear_globals -> clear_global_state: it +// clears the shared module_state and resets initialized_count to +// NOT_INITIALIZED (-1). +// +// CPython: Modules/_testsinglephase.c:434 basic__clear_globals +// +// (Modules/_testsinglephase.c:197 clear_global_state) +func basicClearGlobals(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + globalState.mu.Lock() + defer globalState.mu.Unlock() + globalState.module.initialized = time.Time{} + globalState.initializedCount = notInitialized + return objects.None(), nil +} + +// buildStateful ports the with_reinit (m_size == 0) and with_state +// (m_size > 0) variants: each load runs init fresh against a state that is +// not the process-global one. A reinit module has no readable state +// (state_initialized returns None); a with_state module reads its own. +// +// CPython: Modules/_testsinglephase.c:582 / 659 +func buildStateful(name string, v variant) (*objects.Module, error) { + st := &moduleState{initialized: time.Now()} + m := objects.NewModule(name) + hasState := v == vWithState + if err := installCommon(m, st, hasState); err != nil { + return nil, err + } + if v == vWithState { + // _clear_module_state clears the per-module state. + // + // CPython: Modules/_testsinglephase.c:452 basic__clear_module_state + if err := m.Dict().SetItem(objects.NewStr("_clear_module_state"), + objects.NewBuiltinFunction("_clear_module_state", func([]objects.Object, map[string]objects.Object) (objects.Object, error) { + st.initialized = time.Time{} + return objects.None(), nil + })); err != nil { + return nil, err + } + } + return m, nil +} + +// staticModuleCircular mirrors the C static_module_circular pointer: the +// _testsinglephase_circular module caches itself in a process-global so a +// re-entrant PyInit (the circular import) reuses the same partially built +// object. clear_static_var resets it. +// +// CPython: Modules/_testsinglephase.c:758 static_module_circular +var staticModuleCircular *objects.Module + +// circularHelperName is the module PyInit imports before returning, the +// half of the cycle that imports _testsinglephase_circular back again. +// +// CPython: Modules/_testsinglephase.c:788 helper_mod_name +const circularHelperName = "test.test_import.data.circular_imports.singlephase" + +// buildCircular ports PyInit__testsinglephase_circular: it lazily builds the +// module into a static pointer, imports the helper module (which re-imports +// this module before it is in sys.modules), then records helper_mod_name and +// returns the cached object. +// +// CPython: Modules/_testsinglephase.c:780 PyInit__testsinglephase_circular +func buildCircular() (*objects.Module, error) { + if staticModuleCircular == nil { + m := objects.NewModule("_testsinglephase_circular") + // CPython: Modules/_testsinglephase.c:761 circularmod_clear_static_var + if err := m.Dict().SetItem(objects.NewStr("clear_static_var"), + objects.NewBuiltinFunction("clear_static_var", func([]objects.Object, map[string]objects.Object) (objects.Object, error) { + result := staticModuleCircular + staticModuleCircular = nil + if result == nil { + return objects.None(), nil + } + return result, nil + })); err != nil { + return nil, err + } + staticModuleCircular = m + } + if objects.ImportModuleHook == nil { + return nil, fmt.Errorf("ImportError: import machinery unavailable") + } + if _, err := objects.ImportModuleHook(circularHelperName); err != nil { + return nil, err + } + // CPython: Modules/_testsinglephase.c:795 PyModule_AddStringConstant + if err := staticModuleCircular.Dict().SetItem(objects.NewStr("helper_mod_name"), objects.NewStr(circularHelperName)); err != nil { + return nil, err + } + return staticModuleCircular, nil +} + +// buildCheckCacheFirst ports the *_check_cache_first PyInit functions, which +// only ever load fresh: a bare module carrying its own name. +// +// CPython: Modules/_testsinglephase.c:704 _check_cache_first modules +func buildCheckCacheFirst(name string) (*objects.Module, error) { + return objects.NewModule(name), nil +} diff --git a/module/_thread/excepthook.go b/module/_thread/excepthook.go new file mode 100644 index 000000000..7f5aa9247 --- /dev/null +++ b/module/_thread/excepthook.go @@ -0,0 +1,167 @@ +// _thread._excepthook and the _ExceptHookArgs struct-sequence back +// threading.excepthook. When a Thread.run() lets an exception escape, +// threading._bootstrap_inner builds an _ExceptHookArgs(exc_type, +// exc_value, exc_traceback, thread) and hands it to threading.excepthook, +// whose C default is _thread._excepthook. The default prints +// "Exception in thread {name}:" followed by the traceback to the thread's +// stderr. +// +// CPython: Modules/_threadmodule.c:2275 thread_excepthook +package _thread + +import ( + "fmt" + + "github.com/tamnd/gopy/errors" + "github.com/tamnd/gopy/imp" + "github.com/tamnd/gopy/objects" +) + +// exceptHookArgsType is the _thread._ExceptHookArgs struct-sequence type. +// +// CPython: Modules/_threadmodule.c:2266 ExceptHookArgs_desc +var exceptHookArgsType = objects.NewStructSeqTypeDesc(objects.StructSeqDesc{ + Name: "_thread._ExceptHookArgs", + Fields: []objects.StructSeqField{ + {Name: "exc_type", Doc: "Exception type"}, + {Name: "exc_value", Doc: "Exception value"}, + {Name: "exc_traceback", Doc: "Exception traceback"}, + {Name: "thread", Doc: "Thread"}, + }, + NInSequence: 4, +}) + +// threadExceptHook is the default threading.excepthook. It expects a +// single _ExceptHookArgs and reports the uncaught thread exception. +// +// CPython: Modules/_threadmodule.c:2275 thread_excepthook +func threadExceptHook(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) != 1 { + return nil, fmt.Errorf("TypeError: _thread._excepthook expected 1 argument") + } + hookArgs, ok := args[0].(*objects.StructSeq) + if !ok || hookArgs.Type() != exceptHookArgsType { + return nil, fmt.Errorf( + "TypeError: _thread.excepthook argument type must be ExceptHookArgs") + } + items := hookArgs.Items() + + // Silently ignore SystemExit, matching the C default. + excType := items[0] + if t, ok := excType.(*objects.Type); ok && objects.IsSubtype(t, errors.PyExc_SystemExit) { + return objects.None(), nil + } + + excValue := items[1] + thread := items[3] + + // Resolve the destination stream: sys.stderr, else thread._stderr. + // + // CPython: Modules/_threadmodule.c:2298 _PySys_GetOptionalAttr(stderr) + file := optionalSysStderr() + if file == nil || file == objects.None() { + if thread == objects.None() { + return objects.None(), nil + } + f, err := objects.GetAttr(thread, objects.NewStr("_stderr")) + if err != nil { + return nil, err + } + if f == objects.None() { + return objects.None(), nil + } + file = f + } + + if err := threadExceptHookFile(file, excValue, thread); err != nil { + return nil, err + } + return objects.None(), nil +} + +// threadExceptHookFile writes the thread name header and the traceback to +// file, then flushes it. +// +// CPython: Modules/_threadmodule.c:2197 thread_excepthook_file +func threadExceptHookFile(file, excValue, thread objects.Object) error { + if err := fileWriteString(file, "Exception in thread "); err != nil { + return err + } + + name := objects.Object(nil) + if thread != objects.None() { + n, err := objects.GetAttr(thread, objects.NewStr("name")) + if err == nil { + name = n + } + } + if name != nil { + s, err := objects.Str(name) + if err != nil { + return err + } + if err := fileWriteString(file, s); err != nil { + return err + } + } else { + if err := fileWriteString(file, ""); err != nil { + return err + } + } + + if err := fileWriteString(file, ":\n"); err != nil { + return err + } + + // Display the traceback through the same formatter sys.excepthook uses. + // + // CPython: Modules/_threadmodule.c:2241 _PyErr_Display + text := "" + if exc, ok := excValue.(*errors.Exception); ok { + text = errors.FormatException(exc) + } else { + repr, err := objects.Str(excValue) + if err == nil { + text = excValue.Type().Name + ": " + repr + "\n" + } else { + text = excValue.Type().Name + "\n" + } + } + if err := fileWriteString(file, text); err != nil { + return err + } + + // file.flush(), best effort. + if flush, err := objects.GetAttr(file, objects.NewStr("flush")); err == nil { + _, _ = objects.Call(flush, objects.NewTuple(nil), nil) + } + return nil +} + +// fileWriteString writes s through file.write, mirroring PyFile_WriteString. +// +// CPython: Objects/fileobject.c PyFile_WriteString +func fileWriteString(file objects.Object, s string) error { + write, err := objects.GetAttr(file, objects.NewStr("write")) + if err != nil { + return err + } + _, err = objects.Call(write, objects.NewTuple([]objects.Object{objects.NewStr(s)}), nil) + return err +} + +// optionalSysStderr returns sys.stderr, or nil if sys or its stderr is +// unavailable. +// +// CPython: Python/sysmodule.c _PySys_GetOptionalAttr +func optionalSysStderr() objects.Object { + sysMod, ok := imp.GetModule("sys") + if !ok { + return nil + } + f, err := objects.GetAttr(sysMod, objects.NewStr("stderr")) + if err != nil { + return nil + } + return f +} diff --git a/module/_thread/module.go b/module/_thread/module.go index d676101f9..422efa688 100644 --- a/module/_thread/module.go +++ b/module/_thread/module.go @@ -50,6 +50,7 @@ func buildModule() (*objects.Module, error) { {"_is_main_interpreter", threadIsMainInterpreter}, {"stack_size", threadStackSize}, {"_count", threadCount}, + {"_excepthook", threadExceptHook}, } for _, e := range entries { bf := objects.NewBuiltinFunction(e.name, e.fn) @@ -97,6 +98,13 @@ func buildModule() (*objects.Module, error) { return nil, err } + // _ExceptHookArgs: struct-sequence threading.excepthook receives. + // + // CPython: Modules/_threadmodule.c:2710 PyStructSequence_NewType + if err := d.SetItem(objects.NewStr("_ExceptHookArgs"), exceptHookArgsType); err != nil { + return nil, err + } + // error is the module-level exception class. errCls := objects.NewType("_thread.error", []*objects.Type{objects.ObjectType()}) if err := d.SetItem(objects.NewStr("error"), errCls); err != nil { @@ -491,9 +499,21 @@ func threadStartNewThread(args []objects.Object, kwargs map[string]objects.Objec go func() { defer atomic.AddInt64(&activeThreadCount, -1) if enter != nil { + // The identity is already known synchronously (the spawn hook + // returned it on the parent goroutine), so hand it back before + // enter() takes the GIL. enter() blocks until the GIL is free, + // and the parent is the holder: it only releases the lock once + // it returns from start_new_thread and later blocks (on a join, + // lock, or sleep) through Py_BEGIN_ALLOW_THREADS. Sending the id + // first lets the parent get that far instead of deadlocking + // against a child that cannot publish its id until it owns a GIL + // the parent still holds. + // + // CPython: Modules/_threadmodule.c:1166 thread_PyThread_start_new_thread + // returns the ident before the bootstrap thread runs. + idCh <- ident enter() defer leave() - idCh <- ident } else { idCh <- goid() } diff --git a/module/_thread/rlock.go b/module/_thread/rlock.go index f4a162ef2..ffae1ca09 100644 --- a/module/_thread/rlock.go +++ b/module/_thread/rlock.go @@ -213,7 +213,15 @@ func rlockAcquire(r *rlockObject, args []objects.Object, kwargs map[string]objec } if timeoutSecs < 0 { - r.gate.Lock() + // Block indefinitely on the gate. Drop the GIL while parked so the + // owning thread can run far enough to release the gate; holding the + // GIL here would deadlock the owner (and any other Python thread) + // against this goroutine. lockAcquire does the same for non-reentrant + // locks. + // + // CPython: Modules/_threadmodule.c:1083 rlock_acquire releases the GIL + // (ACQUIRE_LOCK runs under Py_BEGIN_ALLOW_THREADS) + objects.AllowThreads(func() { r.gate.Lock() }) r.mu.Lock() r.owner = me r.count = 1 @@ -222,19 +230,27 @@ func rlockAcquire(r *rlockObject, args []objects.Object, kwargs map[string]objec } deadline := time.Now().Add(time.Duration(timeoutSecs * float64(time.Second))) - for { - if r.gate.TryLock() { - r.mu.Lock() - r.owner = me - r.count = 1 - r.mu.Unlock() - return objects.True(), nil - } - if time.Now().After(deadline) { - return objects.False(), nil + acquired := false + objects.AllowThreads(func() { + for { + if r.gate.TryLock() { + acquired = true + return + } + if time.Now().After(deadline) { + return + } + time.Sleep(100 * time.Microsecond) } - time.Sleep(100 * time.Microsecond) + }) + if acquired { + r.mu.Lock() + r.owner = me + r.count = 1 + r.mu.Unlock() + return objects.True(), nil } + return objects.False(), nil } // rlockRelease decrements the recursion counter; when it hits zero the diff --git a/module/_time/module.go b/module/_time/module.go index 685b82e41..498e61790 100644 --- a/module/_time/module.go +++ b/module/_time/module.go @@ -355,7 +355,11 @@ func sleep(args []objects.Object, _ map[string]objects.Object) (objects.Object, return nil, fmt.Errorf("ValueError: sleep length must be non-negative") } if secs > 0 { - gotime.Sleep(gotime.Duration(secs * float64(gotime.Second))) + // Release the GIL while parked so other Python threads run. + // CPython: Modules/timemodule.c:394 time_sleep (Py_BEGIN_ALLOW_THREADS) + objects.AllowThreads(func() { + gotime.Sleep(gotime.Duration(secs * float64(gotime.Second))) + }) } return objects.None(), nil } diff --git a/module/_warnings/lexer.go b/module/_warnings/lexer.go index 17afdec07..d08f8df81 100644 --- a/module/_warnings/lexer.go +++ b/module/_warnings/lexer.go @@ -5,9 +5,10 @@ import ( "github.com/tamnd/gopy/compile" "github.com/tamnd/gopy/errors" + "github.com/tamnd/gopy/imp" "github.com/tamnd/gopy/objects" - "github.com/tamnd/gopy/parser/lexer" parsererrors "github.com/tamnd/gopy/parser/errors" + "github.com/tamnd/gopy/parser/lexer" ) // init wires the package-level hooks in parser/lexer and compile so @@ -30,6 +31,10 @@ func init() { // CPython: Objects/typeobject.c:4667 PyErr_WarnFormat(..., 1, ...) return WarnUnicode(errors.PyExc_RuntimeWarning, msg, 1, nil) } + imp.ImportWarnHook = func(msg string) error { + // CPython: Lib/importlib/_bootstrap.py:1353 _warnings.warn(msg, ImportWarning) + return WarnUnicode(errors.PyExc_ImportWarning, msg, 1, nil) + } } // FlushLexerWarnings posts every SyntaxWarning-class diagnostic the diff --git a/module/errno/entries_windows.go b/module/errno/entries_windows.go index d498916b5..53cd0e863 100644 --- a/module/errno/entries_windows.go +++ b/module/errno/entries_windows.go @@ -1,56 +1,99 @@ -// Windows errno table. The Windows C runtime defines a smaller subset -// of POSIX E* codes than Linux; this list mirrors the ones exposed by -// Go's syscall package on windows/amd64. +// Windows errno table. CPython's errno module on Windows exposes the +// codes the Universal CRT's defines, with the CRT's small +// POSIX-style values (EEXIST == 17), NOT Go's syscall package, which +// fabricates E* constants as 1<<29+iota on Windows. Hard-code the CRT +// values so errno.EEXIST and friends match CPython exactly and line up +// with the winerror->errno translation the VM applies to OSError. // -// CPython: Modules/errnomodule.c:121 add_errcode block (MS_WINDOWS arms) +// CPython: Modules/errnomodule.c add_errcode block (values from ucrt +// ; the classic 1-42 range plus the POSIX-2008 100-140 range) package errno -import "syscall" - // errnoEntries returns every (name, code) pair the errno module exposes // on Windows. // // CPython: Modules/errnomodule.c:88 errno_exec (Windows slice) func errnoEntries() []errnoEntry { return []errnoEntry{ - {"EPERM", int(syscall.EPERM)}, - {"ENOENT", int(syscall.ENOENT)}, - {"ESRCH", int(syscall.ESRCH)}, - {"EINTR", int(syscall.EINTR)}, - {"EIO", int(syscall.EIO)}, - {"ENXIO", int(syscall.ENXIO)}, - {"E2BIG", int(syscall.E2BIG)}, - {"ENOEXEC", int(syscall.ENOEXEC)}, - {"EBADF", int(syscall.EBADF)}, - {"ECHILD", int(syscall.ECHILD)}, - {"EAGAIN", int(syscall.EAGAIN)}, - {"ENOMEM", int(syscall.ENOMEM)}, - {"EACCES", int(syscall.EACCES)}, - {"EFAULT", int(syscall.EFAULT)}, - {"EBUSY", int(syscall.EBUSY)}, - {"EEXIST", int(syscall.EEXIST)}, - {"EXDEV", int(syscall.EXDEV)}, - {"ENODEV", int(syscall.ENODEV)}, - {"ENOTDIR", int(syscall.ENOTDIR)}, - {"EISDIR", int(syscall.EISDIR)}, - {"EINVAL", int(syscall.EINVAL)}, - {"ENFILE", int(syscall.ENFILE)}, - {"EMFILE", int(syscall.EMFILE)}, - {"ENOTTY", int(syscall.ENOTTY)}, - {"EFBIG", int(syscall.EFBIG)}, - {"ENOSPC", int(syscall.ENOSPC)}, - {"ESPIPE", int(syscall.ESPIPE)}, - {"EROFS", int(syscall.EROFS)}, - {"EMLINK", int(syscall.EMLINK)}, - {"EPIPE", int(syscall.EPIPE)}, - {"EDOM", int(syscall.EDOM)}, - {"ERANGE", int(syscall.ERANGE)}, - {"EDEADLK", int(syscall.EDEADLK)}, - {"ENAMETOOLONG", int(syscall.ENAMETOOLONG)}, - {"ENOLCK", int(syscall.ENOLCK)}, - {"ENOSYS", int(syscall.ENOSYS)}, - {"ENOTEMPTY", int(syscall.ENOTEMPTY)}, - {"EILSEQ", int(syscall.EILSEQ)}, + {"EPERM", 1}, + {"ENOENT", 2}, + {"ESRCH", 3}, + {"EINTR", 4}, + {"EIO", 5}, + {"ENXIO", 6}, + {"E2BIG", 7}, + {"ENOEXEC", 8}, + {"EBADF", 9}, + {"ECHILD", 10}, + {"EAGAIN", 11}, + {"ENOMEM", 12}, + {"EACCES", 13}, + {"EFAULT", 14}, + {"EBUSY", 16}, + {"EEXIST", 17}, + {"EXDEV", 18}, + {"ENODEV", 19}, + {"ENOTDIR", 20}, + {"EISDIR", 21}, + {"EINVAL", 22}, + {"ENFILE", 23}, + {"EMFILE", 24}, + {"ENOTTY", 25}, + {"EFBIG", 27}, + {"ENOSPC", 28}, + {"ESPIPE", 29}, + {"EROFS", 30}, + {"EMLINK", 31}, + {"EPIPE", 32}, + {"EDOM", 33}, + {"ERANGE", 34}, + {"EDEADLK", 36}, + {"EDEADLOCK", 36}, + {"ENAMETOOLONG", 38}, + {"ENOLCK", 39}, + {"ENOSYS", 40}, + {"ENOTEMPTY", 41}, + {"EILSEQ", 42}, + {"EADDRINUSE", 100}, + {"EADDRNOTAVAIL", 101}, + {"EAFNOSUPPORT", 102}, + {"EALREADY", 103}, + {"EBADMSG", 104}, + {"ECANCELED", 105}, + {"ECONNABORTED", 106}, + {"ECONNREFUSED", 107}, + {"ECONNRESET", 108}, + {"EDESTADDRREQ", 109}, + {"EHOSTUNREACH", 110}, + {"EIDRM", 111}, + {"EINPROGRESS", 112}, + {"EISCONN", 113}, + {"ELOOP", 114}, + {"EMSGSIZE", 115}, + {"ENETDOWN", 116}, + {"ENETRESET", 117}, + {"ENETUNREACH", 118}, + {"ENOBUFS", 119}, + {"ENODATA", 120}, + {"ENOLINK", 121}, + {"ENOMSG", 122}, + {"ENOPROTOOPT", 123}, + {"ENOSR", 124}, + {"ENOSTR", 125}, + {"ENOTCONN", 126}, + {"ENOTRECOVERABLE", 127}, + {"ENOTSOCK", 128}, + {"ENOTSUP", 129}, + {"EOPNOTSUPP", 130}, + {"EOVERFLOW", 132}, + {"EOWNERDEAD", 133}, + {"EPROTO", 134}, + {"EPROTONOSUPPORT", 135}, + {"EPROTOTYPE", 136}, + {"ETIME", 137}, + {"ETIMEDOUT", 138}, + {"ETXTBSY", 139}, + {"EWOULDBLOCK", 140}, } } diff --git a/module/io/bufferedio.go b/module/io/bufferedio.go index 3f1817899..b00e8839b 100644 --- a/module/io/bufferedio.go +++ b/module/io/bufferedio.go @@ -346,6 +346,9 @@ func bufferedIOBaseGetattr(self objects.Object, nameObj objects.Object) (objects if !ok { return nil, fmt.Errorf("TypeError: attribute name must be string") } + if v, ok, err := ioUserInstanceAttr(self, nameObj); ok || err != nil { + return v, err + } switch name.Value() { case "detach": return objects.NewBuiltinFunction("detach", func(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { @@ -372,7 +375,8 @@ func bufferedIOBaseGetattr(self objects.Object, nameObj objects.Object) (objects return nil, fmt.Errorf("UnsupportedOperation: write") }), nil } - return nil, fmt.Errorf("AttributeError: '_io._BufferedIOBase' object has no attribute '%s'", name.Value()) + // Dunders such as __class__/__dict__ resolve through the MRO walk. + return objects.GenericGetAttr(self, nameObj) } // bufferedIOBaseReadintoGeneric implements the shared concrete fallback for @@ -880,13 +884,8 @@ func (b *Buffered) bufferedWrite(args []objects.Object) (objects.Object, error) if len(args) < 1 { return nil, fmt.Errorf("TypeError: write() requires a data argument") } - var data []byte - switch v := args[0].(type) { - case *objects.Bytes: - data = v.Bytes() - case *objects.ByteArray: - data = v.Bytes() - default: + data, ok := objects.AsBytesLike(args[0]) + if !ok { return nil, fmt.Errorf("TypeError: a bytes-like object is required, not %s", args[0].Type().Name) } written := 0 @@ -1446,7 +1445,8 @@ func bufferedGetattr(self objects.Object, nameObj objects.Object) (objects.Objec return objects.NewStr(s), nil }), nil } - return nil, fmt.Errorf("AttributeError: '%s' object has no attribute '%s'", typeName, name.Value()) + // Dunders such as __class__/__dict__ resolve through the MRO walk. + return objects.GenericGetAttr(self, nameObj) } // --- constructors ------------------------------------------------------------ @@ -1775,7 +1775,8 @@ func rwPairGetattr(self objects.Object, nameObj objects.Object) (objects.Object, // CPython: Modules/_io/bufferedio.c:2441 bufferedrwpair_closed_get return objects.NewBool(p.writer.closed), nil } - return nil, fmt.Errorf("AttributeError: '_io.BufferedRWPair' object has no attribute '%s'", name.Value()) + // Dunders such as __class__/__dict__ resolve through the MRO walk. + return objects.GenericGetAttr(self, nameObj) } func init() { diff --git a/module/io/bytesio.go b/module/io/bytesio.go index c3e829119..e495803c8 100644 --- a/module/io/bytesio.go +++ b/module/io/bytesio.go @@ -45,6 +45,10 @@ func init() { BytesIOType.Iter = bytesIOIter BytesIOType.IterNext = bytesIOIterNext BytesIOType.Getattro = bytesIOGetattr + // BytesIO defines no __eq__, so it keeps object's identity-based hash. + // + // CPython: Modules/_io/bytesio.c:1062 PyBytesIO_Type (tp_hash inherited) + BytesIOType.Hash = objects.IdentityHash // LOAD_SPECIAL walks the type MRO for __enter__ / __exit__. // // CPython: Modules/_io/iobase.c:391 iobase_enter / :409 iobase_exit @@ -446,7 +450,10 @@ func bytesIOGetattr(o objects.Object, name objects.Object) (objects.Object, erro if fn := bytesIOMethod(b, n.Value()); fn != nil { return fn, nil } - return nil, fmt.Errorf("AttributeError: '_io.BytesIO' object has no attribute '%s'", n.Value()) + // Anything the custom dispatch above does not serve (dunders such as + // __class__, __dict__, __reduce_ex__) resolves through the normal MRO + // walk against BytesIOType's bases (object), matching stringIOGetattr. + return objects.GenericGetAttr(o, name) } // bytesIOMethod maps method names to BuiltinFunctions. diff --git a/module/io/codecs.go b/module/io/codecs.go index a99450e8e..3a9bbb48e 100644 --- a/module/io/codecs.go +++ b/module/io/codecs.go @@ -116,22 +116,6 @@ func encodeUTF32(s, variant string) []byte { // --- 8-bit code pages ------------------------------------------------------- -// charmapDecode decodes data using a 256-entry lookup table. -1 in the -// table means the byte is unmapped and should raise UnicodeDecodeError. -// -// CPython: Modules/_codecs/charmap.c PyUnicode_DecodeCharmap -func charmapDecode(data []byte, table *[256]rune, name string) (string, error) { - runes := make([]rune, len(data)) - for i, b := range data { - r := table[b] - if r < 0 { - return "", fmt.Errorf("UnicodeDecodeError: %s can't decode byte 0x%02x", name, b) - } - runes[i] = r - } - return string(runes), nil -} - // charmapEncode encodes s using the inverse of the 256-entry table. // The map is built once at table-construction time. // diff --git a/module/io/fileio.go b/module/io/fileio.go index ca9f51010..5af2be42e 100644 --- a/module/io/fileio.go +++ b/module/io/fileio.go @@ -14,10 +14,31 @@ import ( "fmt" "io" stdos "os" + "syscall" "github.com/tamnd/gopy/objects" ) +// clearGoFinalizer drops the Go runtime finalizer that os.NewFile arms on a +// borrowed descriptor. gopy owns the lifecycle of these fds through +// FileIO.Close and the closefd flag, so the descriptor is released +// deterministically when Python closes the file. Leaving Go's finalizer in +// place lets a later GC close a descriptor whose integer was already freed +// and reused by another open file, surfacing as a spurious EBADF +// ("bad file descriptor") on the unrelated file's next write. +// +// CPython: Modules/_io/fileio.c:159 _io_FileIO_close_impl owns the close; +// there is no background reclaim of the fd. +// +// os.NewFile arms the finalizer on the unexported inner *os.file, not on the +// returned *os.File, so runtime.SetFinalizer(f, nil) on the outer handle is a +// no-op and leaves the close finalizer live. os.File is struct{ file *file } +// with the inner pointer at offset 0, so read that pointer and clear the +// finalizer on the object it actually points at. +func clearGoFinalizer(f *stdos.File) { + objects.ClearOSFileFinalizer(f) +} + // SMALLCHUNK / DEFAULT_BUFFER_SIZE / LARGE_BUFFER_CUTOFF_SIZE mirror the // growth-policy constants used by readall() in CPython. // @@ -245,6 +266,7 @@ func fileIOCall(_ objects.Object, args []objects.Object, kwargs map[string]objec if f == nil { return nil, fmt.Errorf("OSError: bad file descriptor") } + clearGoFinalizer(f) fi := &FileIO{ f: f, nameIsInt: true, @@ -295,8 +317,12 @@ func fileIOCall(_ objects.Object, args []objects.Object, kwargs map[string]objec if f == nil { return nil, fmt.Errorf("OSError: bad file descriptor from opener") } + clearGoFinalizer(f) } else { f, err = stdos.OpenFile(name, flag, 0o666) + if err == nil { + clearGoFinalizer(f) + } if err != nil { // Preserve the os.PathError chain (errno + filename) with %w // so the unwind path can build a FileNotFoundError / @@ -308,6 +334,20 @@ func fileIOCall(_ objects.Object, args []objects.Object, kwargs map[string]objec return nil, fmt.Errorf("OSError: %w", err) } } + // open() succeeds on a directory on Unix, but a FileIO must never wrap + // one: fstat the descriptor and raise IsADirectoryError (EISDIR) when it + // names a directory, the way CPython rejects it at construction time + // rather than deferring the failure to the first read. + // + // CPython: Modules/_io/fileio.c:478 _io_FileIO___init___impl (S_ISDIR check) + if info, statErr := f.Stat(); statErr == nil && info.IsDir() { + _ = f.Close() + return nil, fmt.Errorf("OSError: %w", &stdos.PathError{ + Op: "open", + Path: name, + Err: syscall.EISDIR, + }) + } fi := &FileIO{ f: f, name: name, @@ -759,7 +799,8 @@ func fileIOGetattr(o objects.Object, name objects.Object) (objects.Object, error if fn := fileIOMethod(fi, n.Value()); fn != nil { return fn, nil } - return nil, fmt.Errorf("AttributeError: '_io.FileIO' object has no attribute '%s'", n.Value()) + // Dunders such as __class__/__dict__ resolve through the MRO walk. + return objects.GenericGetAttr(o, name) } // fileIOSetattr handles attribute assignment on FileIO. Only .name is diff --git a/module/io/iobase.go b/module/io/iobase.go index 4d823dc0b..1c55e2e1e 100644 --- a/module/io/iobase.go +++ b/module/io/iobase.go @@ -325,6 +325,29 @@ func IOBaseCannotPickle(self objects.Object) (objects.Object, error) { return nil, fmt.Errorf("TypeError: cannot pickle '%s' instances", name) } +// ioUserInstanceAttr resolves attribute lookups for Python subclasses of the +// io base types. Those instances are *Instance objects, so a method the +// subclass (or a mix-in) defines must win over the synthesized native method, +// exactly as PyObject_GenericGetAttr walks the MRO. It returns (value, true, +// nil) when the generic path resolves the name, (nil, false, nil) when the +// caller should fall back to the native method synthesis, and an error to +// propagate verbatim. +// +// CPython: Objects/object.c:1389 _PyObject_GenericGetAttrWithDict +func ioUserInstanceAttr(o objects.Object, name objects.Object) (objects.Object, bool, error) { + if _, ok := o.(*objects.Instance); !ok { + return nil, false, nil + } + v, err := objects.GenericGetAttr(o, name) + if err == nil { + return v, true, nil + } + if objects.IsAttributeError(err) { + return nil, false, nil + } + return nil, false, err +} + // iobaseGetattro dispatches attribute lookup for _IOBase objects. // // CPython: Modules/_io/iobase.c:860 iobase_getset + iobase_methods @@ -333,6 +356,9 @@ func iobaseGetattro(o objects.Object, name objects.Object) (objects.Object, erro if !ok { return nil, fmt.Errorf("TypeError: attribute name must be string") } + if v, ok, err := ioUserInstanceAttr(o, name); ok || err != nil { + return v, err + } return iobaseAttr(o, n.Value()) } @@ -341,6 +367,9 @@ func rawiobaseGetattro(o objects.Object, name objects.Object) (objects.Object, e if !ok { return nil, fmt.Errorf("TypeError: attribute name must be string") } + if v, ok, err := ioUserInstanceAttr(o, name); ok || err != nil { + return v, err + } // Instance dict shadows the type methods so subclasses (and tests) can // override read / readall / readinto / write via setattr, mirroring // CPython's PyObject_GenericGetAttr where the instance __dict__ wins @@ -372,7 +401,10 @@ func iobaseAttr(o objects.Object, name string) (objects.Object, error) { if fn := iobaseMethod(o, name); fn != nil { return fn, nil } - return nil, fmt.Errorf("AttributeError: '_io._IOBase' object has no attribute %q", name) + // User subclasses (*Instance) keep their attributes in a managed dict and + // resolve dunders (__class__, __dict__) through the MRO; defer to the + // generic path rather than raising the bare _IOBase message. + return objects.GenericGetAttr(o, objects.NewStr(name)) } // iobaseSetattro stores an attribute into the instance dict. @@ -383,7 +415,10 @@ func iobaseSetattro(o objects.Object, name objects.Object, value objects.Object) } d := iobaseGetDict(o) if d == nil { - return fmt.Errorf("AttributeError: cannot set attribute on _IOBase without dict") + // User subclasses of the io base types are *Instance objects carrying + // their own managed dict; route their attribute stores through the + // normal generic path instead of the native _IOBase dict. + return objects.GenericSetAttr(o, name, value) } if value == nil { return d.DelItem(objects.NewStr(n.Value())) diff --git a/module/io/module.go b/module/io/module.go index 37e9d7947..6ac77ebe5 100644 --- a/module/io/module.go +++ b/module/io/module.go @@ -15,6 +15,7 @@ import ( "fmt" "os" "strings" + "syscall" "github.com/tamnd/gopy/errors" "github.com/tamnd/gopy/imp" @@ -439,9 +440,12 @@ func ioOpen(a *ioOpenArgs) (objects.Object, error) { if f == nil { return nil, fmt.Errorf("OSError: bad file descriptor from opener") } + clearGoFinalizer(f) raw = NewFileIO(f, a.file, rawMode, readable, writable) } else { - f, err := os.OpenFile(a.file, flag, 0o600) + // 0o666 is CPython's default create mode for open(); the process + // umask narrows it. CPython: Modules/_io/fileio.c _io_FileIO___init___impl. + f, err := os.OpenFile(a.file, flag, 0o666) //nolint:gosec // CPython open() default mode, umask-narrowed if err != nil { // Preserve the os.PathError chain (errno + filename) with %w // so the unwind path builds a FileNotFoundError / @@ -452,6 +456,20 @@ func ioOpen(a *ioOpenArgs) (objects.Object, error) { // CPython: Modules/_io/fileio.c:451 _io_FileIO___init___impl return nil, fmt.Errorf("OSError: %w", err) } + // open() succeeds on a directory on Unix; reject it at construction + // with IsADirectoryError (EISDIR) so the failure surfaces here rather + // than on the first read. + // + // CPython: Modules/_io/fileio.c:478 _io_FileIO___init___impl (S_ISDIR check) + if info, statErr := f.Stat(); statErr == nil && info.IsDir() { + _ = f.Close() + return nil, fmt.Errorf("OSError: %w", &os.PathError{ + Op: "open", + Path: a.file, + Err: syscall.EISDIR, + }) + } + clearGoFinalizer(f) raw = NewFileIO(f, a.file, rawMode, readable, writable) } diff --git a/module/io/stringio.go b/module/io/stringio.go index a5db7218c..fca896e96 100644 --- a/module/io/stringio.go +++ b/module/io/stringio.go @@ -59,6 +59,10 @@ func init() { StringIOType.Iter = stringIOIter StringIOType.IterNext = stringIOIterNext StringIOType.Getattro = stringIOGetattr + // StringIO defines no __eq__, so it keeps object's identity-based hash. + // + // CPython: Modules/_io/stringio.c:1056 PyStringIO_Type (tp_hash inherited) + StringIOType.Hash = objects.IdentityHash registerStringIODescrs() objects.AddIterSlotWrappers(StringIOType) } diff --git a/module/io/textio_codec.go b/module/io/textio_codec.go index f4f70e9c0..1e19e3865 100644 --- a/module/io/textio_codec.go +++ b/module/io/textio_codec.go @@ -14,8 +14,6 @@ package io import ( "encoding/binary" "fmt" - "unicode/utf16" - "unicode/utf8" "github.com/tamnd/gopy/codecs" ) @@ -48,47 +46,51 @@ type IncrementalEncoder interface { Reset() } -// getIncrementalDecoder returns a fresh decoder for encoding. errors -// is the error-handling strategy ("strict", "replace", "ignore"); only -// "strict" is implemented today, matching the current one-shot -// `decodeBytes` behavior. Unknown encodings return a LookupError-shaped -// Go error so the caller can surface it to Python. +// getIncrementalDecoder returns a fresh decoder for encoding. errors is +// the error-handling strategy ("strict", "replace", "ignore", +// "backslashreplace", ...); the decoders carry it and hand the complete +// portion of each chunk to the shared codecs package, which applies the +// named handler exactly as bytes.decode does. Unknown encodings return a +// LookupError-shaped Go error so the caller can surface it to Python. // // CPython: Modules/_io/textio.c:912 _textiowrapper_set_decoder // (calls _PyCodecInfo_GetIncrementalDecoder). -func getIncrementalDecoder(encoding, _ string) (IncrementalDecoder, error) { +func getIncrementalDecoder(encoding, errors string) (IncrementalDecoder, error) { + if errors == "" { + errors = "strict" + } switch normalizeCodec(encoding) { case "utf-8": - return &utf8Decoder{}, nil + return &utf8Decoder{errors: errors}, nil case "ascii": - return &asciiDecoder{}, nil + return &asciiDecoder{errors: errors}, nil case "latin-1": - return &latin1Decoder{}, nil + return &latin1Decoder{errors: errors}, nil case "utf-16": - return &utf16Decoder{variant: ""}, nil + return &utf16Decoder{variant: "", errors: errors}, nil case "utf-16-le": - return &utf16Decoder{variant: "le"}, nil + return &utf16Decoder{variant: "le", errors: errors}, nil case "utf-16-be": - return &utf16Decoder{variant: "be"}, nil + return &utf16Decoder{variant: "be", errors: errors}, nil case "utf-32": - return &utf32Decoder{variant: ""}, nil + return &utf32Decoder{variant: "", errors: errors}, nil case "utf-32-le": - return &utf32Decoder{variant: "le"}, nil + return &utf32Decoder{variant: "le", errors: errors}, nil case "utf-32-be": - return &utf32Decoder{variant: "be"}, nil + return &utf32Decoder{variant: "be", errors: errors}, nil case "cp1252": - return &charmapDecoder{table: &cp1252Table.decode, name: "cp1252"}, nil + return &charmapDecoder{table: &cp1252Table.decode, name: "cp1252", errors: errors}, nil case "cp1250": - return &charmapDecoder{table: &cp1250Table.decode, name: "cp1250"}, nil + return &charmapDecoder{table: &cp1250Table.decode, name: "cp1250", errors: errors}, nil case "cp1251": - return &charmapDecoder{table: &cp1251Table.decode, name: "cp1251"}, nil + return &charmapDecoder{table: &cp1251Table.decode, name: "cp1251", errors: errors}, nil case "cp437": - return &charmapDecoder{table: &cp437Table.decode, name: "cp437"}, nil + return &charmapDecoder{table: &cp437Table.decode, name: "cp437", errors: errors}, nil case "mac-roman": - return &charmapDecoder{table: &macRomanTable.decode, name: "mac-roman"}, nil + return &charmapDecoder{table: &macRomanTable.decode, name: "mac-roman", errors: errors}, nil } if ci, err := codecs.Lookup(encoding); err == nil { - return ®istryDecoder{ci: ci}, nil + return ®istryDecoder{ci: ci, errors: errors}, nil } return nil, fmt.Errorf("LookupError: unknown encoding: %s", encoding) } @@ -143,7 +145,8 @@ func getIncrementalEncoder(encoding, _ string) (IncrementalEncoder, error) { // incomplete multi-byte sequence. CPython's utf-8 incremental decoder // keeps the same window because a code point spans at most four bytes. type utf8Decoder struct { - buf []byte + buf []byte + errors string } func (d *utf8Decoder) Decode(input []byte, final bool) (string, error) { @@ -154,28 +157,20 @@ func (d *utf8Decoder) Decode(input []byte, final bool) (string, error) { src = append(append([]byte{}, d.buf...), input...) d.buf = d.buf[:0] } - // Walk back from the end to find the longest tail that is either - // a complete utf-8 sequence or an incomplete (but valid so-far) - // prefix. RuneStart marks the first byte of a sequence. - keep := 0 - if !final && len(src) > 0 { - for i := len(src) - 1; i >= 0 && i >= len(src)-4; i-- { - if utf8.RuneStart(src[i]) { - if !utf8.FullRune(src[i:]) { - keep = len(src) - i - } - break - } - } - } - complete := src[:len(src)-keep] - if !utf8.Valid(complete) { - return "", fmt.Errorf("UnicodeDecodeError: invalid utf-8 sequence") + // DecodeUTF8Incremental holds back an incomplete trailing sequence + // when final is false and applies the configured error handler to + // the complete portion, so an invalid byte under "ignore" / + // "replace" / "backslashreplace" is repaired instead of raising. + // + // CPython: Objects/unicodeobject.c:4756 PyUnicode_DecodeUTF8Stateful + out, remaining, err := codecs.DecodeUTF8Incremental(src, d.errors, final) + if err != nil { + return "", err } - if keep > 0 { - d.buf = append(d.buf[:0], src[len(src)-keep:]...) + if len(remaining) > 0 { + d.buf = append(d.buf[:0], remaining...) } - return string(complete), nil + return out, nil } func (d *utf8Decoder) GetState() ([]byte, int64) { return append([]byte{}, d.buf...), 0 } @@ -187,15 +182,11 @@ func (d *utf8Decoder) Reset() { d.buf = d.buf[:0] } // --- ascii / latin-1 ------------------------------------------------------- -type asciiDecoder struct{} +type asciiDecoder struct{ errors string } -func (asciiDecoder) Decode(input []byte, _ bool) (string, error) { - for _, b := range input { - if b > 127 { - return "", fmt.Errorf("UnicodeDecodeError: ordinal not in range(128)") - } - } - return string(input), nil +func (d asciiDecoder) Decode(input []byte, _ bool) (string, error) { + out, _, err := codecs.Decode(input, "ascii", d.errors) + return out, err } func (asciiDecoder) GetState() ([]byte, int64) { return nil, 0 } func (asciiDecoder) SetState([]byte, int64) error { return nil } @@ -210,14 +201,13 @@ func encodeASCII(s string) ([]byte, error) { return []byte(s), nil } -type latin1Decoder struct{} +type latin1Decoder struct{ errors string } -func (latin1Decoder) Decode(input []byte, _ bool) (string, error) { - runes := make([]rune, len(input)) - for i, b := range input { - runes[i] = rune(b) - } - return string(runes), nil +func (d latin1Decoder) Decode(input []byte, _ bool) (string, error) { + // latin-1 maps every byte to a code point, so the error handler is + // never invoked, but route through codecs for uniformity. + out, _, err := codecs.Decode(input, "latin-1", d.errors) + return out, err } func (latin1Decoder) GetState() ([]byte, int64) { return nil, 0 } func (latin1Decoder) SetState([]byte, int64) error { return nil } @@ -242,6 +232,7 @@ func encodeLatin1(s string) ([]byte, error) { type utf16Decoder struct { variant string // "", "le", or "be" buf []byte + errors string // flags encodes endianness for tell/seek snapshots. // 0 = undecided (auto-variant before BOM sniff) // 1 = little-endian @@ -294,18 +285,21 @@ func (d *utf16Decoder) Decode(input []byte, final bool) (string, error) { } } keep := len(src) % 2 - if final && keep != 0 { - return "", fmt.Errorf("UnicodeDecodeError: utf-16 truncated (odd byte count)") + if final { + // A trailing half code unit on the final chunk is a truncation + // the error handler must see; hand the whole tail to codecs. + keep = 0 } body := src[:len(src)-keep] if keep > 0 { d.buf = append(d.buf, src[len(src)-keep:]...) } - units := make([]uint16, len(body)/2) - for i := range units { - units[i] = bo.Uint16(body[2*i:]) + name := "utf-16-le" + if bo == binary.BigEndian { + name = "utf-16-be" } - return string(utf16.Decode(units)), nil + out, _, err := codecs.Decode(body, name, d.errors) + return out, err } func (d *utf16Decoder) GetState() ([]byte, int64) { @@ -328,6 +322,7 @@ func (d *utf16Decoder) Reset() { type utf32Decoder struct { variant string buf []byte + errors string flags int64 } @@ -372,22 +367,21 @@ func (d *utf32Decoder) Decode(input []byte, final bool) (string, error) { } } keep := len(src) % 4 - if final && keep != 0 { - return "", fmt.Errorf("UnicodeDecodeError: utf-32 truncated (length %% 4 != 0)") + if final { + // A trailing partial code unit on the final chunk is a + // truncation the error handler must see. + keep = 0 } body := src[:len(src)-keep] if keep > 0 { d.buf = append(d.buf, src[len(src)-keep:]...) } - runes := make([]rune, 0, len(body)/4) - for i := 0; i < len(body); i += 4 { - cp := bo.Uint32(body[i:]) - if cp > 0x10FFFF || (cp >= 0xD800 && cp <= 0xDFFF) { - return "", fmt.Errorf("UnicodeDecodeError: invalid utf-32 codepoint U+%X", cp) - } - runes = append(runes, rune(cp)) + name := "utf-32-le" + if bo == binary.BigEndian { + name = "utf-32-be" } - return string(runes), nil + out, _, err := codecs.Decode(body, name, d.errors) + return out, err } func (d *utf32Decoder) GetState() ([]byte, int64) { @@ -408,12 +402,14 @@ func (d *utf32Decoder) Reset() { // --- charmap (single-byte) ------------------------------------------------- type charmapDecoder struct { - table *[256]rune - name string + table *[256]rune + name string + errors string } func (d *charmapDecoder) Decode(input []byte, _ bool) (string, error) { - return charmapDecode(input, d.table, d.name) + out, _, err := codecs.Decode(input, d.name, d.errors) + return out, err } func (d *charmapDecoder) GetState() ([]byte, int64) { return nil, 0 } func (d *charmapDecoder) SetState([]byte, int64) error { return nil } @@ -466,14 +462,15 @@ func (e *bomEncoder) Reset() { e.state = 0 } // // CPython: Python/codecs.c:570 _PyCodecInfo_GetIncrementalDecoder type registryDecoder struct { - ci *codecs.CodecInfo - buf []byte - out string + ci *codecs.CodecInfo + buf []byte + out string + errors string } func (d *registryDecoder) Decode(input []byte, final bool) (string, error) { d.buf = append(d.buf, input...) - s, _, err := d.ci.Decode(d.buf, "strict") + s, _, err := d.ci.Decode(d.buf, d.errors) if err != nil { // Allow buffering when not final: a trailing incomplete sequence // may complete on the next chunk. @@ -497,7 +494,7 @@ func (d *registryDecoder) SetState(buffer []byte, _ int64) error { if len(buffer) == 0 { return nil } - s, _, err := d.ci.Decode(d.buf, "strict") + s, _, err := d.ci.Decode(d.buf, d.errors) if err == nil { d.out = s } diff --git a/module/io/textiowrapper.go b/module/io/textiowrapper.go index b6da128f9..a8715bc81 100644 --- a/module/io/textiowrapper.go +++ b/module/io/textiowrapper.go @@ -969,7 +969,8 @@ func textIOWrapperGetattr(o objects.Object, name objects.Object) (objects.Object return v, nil } } - return nil, fmt.Errorf("AttributeError: '_io.TextIOWrapper' object has no attribute '%s'", n.Value()) + // Dunders such as __class__/__reduce_ex__ resolve through the MRO walk. + return objects.GenericGetAttr(o, name) } // textIOWrapperReadonlyAttrs are the C-level data descriptors that block @@ -1435,7 +1436,8 @@ func incrementalNLDecoderGetattr(o objects.Object, name objects.Object) (objects return objects.None(), nil }), nil } - return nil, fmt.Errorf("AttributeError: '_io.IncrementalNewlineDecoder' object has no attribute '%s'", n.Value()) + // Dunders such as __class__/__dict__ resolve through the MRO walk. + return objects.GenericGetAttr(o, name) } // translateNewlines applies universal newline tracking and (optionally) @@ -1538,11 +1540,14 @@ var TextIOBaseType = objects.NewType("_io._TextIOBase", []*objects.Type{IOBaseTy // textIOBaseGetattr dispatches attribute lookups on _TextIOBase instances. // // CPython: Modules/_io/textio.c:187 textiobase_methods + textiobase_getset -func textIOBaseGetattr(_ objects.Object, nameObj objects.Object) (objects.Object, error) { +func textIOBaseGetattr(self objects.Object, nameObj objects.Object) (objects.Object, error) { name, ok := nameObj.(*objects.Unicode) if !ok { return nil, fmt.Errorf("TypeError: attribute name must be string") } + if v, ok, err := ioUserInstanceAttr(self, nameObj); ok || err != nil { + return v, err + } switch name.Value() { case "detach": // CPython: Modules/_io/textio.c:66 _io__TextIOBase_detach_impl @@ -1574,7 +1579,8 @@ func textIOBaseGetattr(_ objects.Object, nameObj objects.Object) (objects.Object // CPython: Modules/_io/textio.c:180 _io__TextIOBase_errors_get_impl return objects.None(), nil } - return nil, fmt.Errorf("AttributeError: '_io._TextIOBase' object has no attribute '%s'", name.Value()) + // Dunders such as __class__/__dict__ resolve through the MRO walk. + return objects.GenericGetAttr(self, nameObj) } func init() { diff --git a/module/marshal/module.go b/module/marshal/module.go index fd4c46dc0..16af5ce48 100644 --- a/module/marshal/module.go +++ b/module/marshal/module.go @@ -95,6 +95,12 @@ func loads(args []objects.Object, kwargs map[string]objects.Object) (objects.Obj } val, err := marshal.Load(bytes.NewReader(src)) if err != nil { + // A truncated or empty buffer surfaces as EOFError, mirroring + // r_object/r_byte/r_string; any other decode failure is a ValueError. + // CPython: Python/marshal.c:1922 marshal_loads_impl + if marshal.IsEOF(err) { + return nil, fmt.Errorf("EOFError: %w", err) + } return nil, fmt.Errorf("ValueError: %w", err) } return wrap(val), nil @@ -158,8 +164,12 @@ func bufferOf(o objects.Object) ([]byte, error) { return v.Bytes(), nil case *objects.ByteArray: return v.Bytes(), nil + case *objects.MemoryView: + // Tobytes() serializes the exposed view (honoring offset/length), + // matching how marshal.loads consumes any bytes-like buffer. + return v.Tobytes().Bytes(), nil } - return nil, fmt.Errorf("TypeError: a bytes-like object is required, not '%T'", o) + return nil, fmt.Errorf("TypeError: a bytes-like object is required, not '%s'", o.Type().Name) } // unwrap converts a Python objects.Object into the native Go form the diff --git a/module/os/module.go b/module/os/module.go index dcbbe1973..0d3b638a6 100644 --- a/module/os/module.go +++ b/module/os/module.go @@ -102,14 +102,26 @@ func goFileModeToStMode(m goos.FileMode) int64 { return mode } -// newStatResult assembles an os.stat_result from the second-resolution -// components gathered by the platform stat helpers. The visible integer -// time slots truncate the float seconds; the hidden float and nanosecond -// timestamps and block fields follow the CPython layout. +// newStatResult assembles an os.stat_result from the nanosecond-resolution +// components gathered by the platform stat helpers. CPython derives three +// views from the same struct timespec: the visible integer slot is the +// floor-second, the hidden float slot is sec + 1e-9*nsec, and the hidden +// *_ns slot is the full nanosecond count. The block fields trail the +// timestamps in the CPython layout. atimeNs/mtimeNs/ctimeNs are full +// nanoseconds since the epoch. // // CPython: Modules/posixmodule.c:2456 _pystat_fromstructstat -func newStatResult(mode, ino, dev, nlink, uid, gid, size, atime, mtime, ctime, blksize, blocks, rdev int64) *objects.StructSeq { - ns := func(sec int64) objects.Object { return objects.NewInt(sec * 1_000_000_000) } +func newStatResult(mode, ino, dev, nlink, uid, gid, size, atimeNs, mtimeNs, ctimeNs, blksize, blocks, rdev int64) *objects.StructSeq { + // fillTime mirrors fill_time: the integer field floors toward negative + // infinity, the float field carries the fractional second. + // CPython: Modules/posixmodule.c:2417 fill_time + floorSec := func(ns int64) int64 { + sec := ns / 1_000_000_000 + if ns%1_000_000_000 != 0 && ns < 0 { + sec-- + } + return sec + } return objects.NewStructSeq(statResultType, []objects.Object{ objects.NewInt(mode), objects.NewInt(ino), @@ -118,15 +130,15 @@ func newStatResult(mode, ino, dev, nlink, uid, gid, size, atime, mtime, ctime, b objects.NewInt(uid), objects.NewInt(gid), objects.NewInt(size), - objects.NewInt(atime), // unnamed: integer st_atime - objects.NewInt(mtime), // unnamed: integer st_mtime - objects.NewInt(ctime), // unnamed: integer st_ctime - objects.NewFloat(float64(atime)), - objects.NewFloat(float64(mtime)), - objects.NewFloat(float64(ctime)), - ns(atime), - ns(mtime), - ns(ctime), + objects.NewInt(floorSec(atimeNs)), // unnamed: integer st_atime + objects.NewInt(floorSec(mtimeNs)), // unnamed: integer st_mtime + objects.NewInt(floorSec(ctimeNs)), // unnamed: integer st_ctime + objects.NewFloat(float64(atimeNs) / 1e9), + objects.NewFloat(float64(mtimeNs) / 1e9), + objects.NewFloat(float64(ctimeNs) / 1e9), + objects.NewInt(atimeNs), + objects.NewInt(mtimeNs), + objects.NewInt(ctimeNs), objects.NewInt(blksize), objects.NewInt(blocks), objects.NewInt(rdev), @@ -206,13 +218,18 @@ func osTimes(_ []objects.Object, _ map[string]objects.Object) (objects.Object, e } func init() { - _ = imp.AppendInittab("os", buildOS) - _ = imp.AppendInittab("posix", buildPosixModule) - // On Windows, Lib/os.py does `from nt import *`; register the same - // syscall surface under the "nt" name so `import nt` resolves. - // CPython: Modules/posixmodule.c posixmodule_init (registers as "nt" on Windows) + // CPython compiles posixmodule.c under a single name per platform: "nt" + // on Windows, "posix" everywhere else (Modules/posixmodule.c builds with + // MODNAME = "nt" when MS_WINDOWS). Lib/os.py then selects ntpath vs + // posixpath by testing which name is in sys.builtin_module_names, so + // registering both on Windows makes os.py pick posixpath and mangle + // drive-absolute paths. Mirror CPython: one name, gated on the platform. + // + // CPython: Modules/posixmodule.c posixmodule_init (MODNAME "nt" on Windows) if runtime.GOOS == "windows" { _ = imp.AppendInittab("nt", buildPosixModule) + } else { + _ = imp.AppendInittab("posix", buildPosixModule) } _ = imp.AppendInittab("os.path", buildOSPath) // posixpath and ntpath now load from stdlib/ via PathFinder. @@ -229,7 +246,13 @@ func buildPosixModule() (*objects.Module, error) { if err != nil { return nil, err } - posix := objects.NewModule("posix") + // The compiled module is named "nt" on Windows, "posix" elsewhere, + // matching the single MODNAME CPython's posixmodule.c builds with. + modName := "posix" + if runtime.GOOS == "windows" { + modName = "nt" + } + posix := objects.NewModule(modName) pd := posix.Dict() md := m.Dict() for _, k := range md.Keys() { @@ -280,13 +303,25 @@ func buildPath() (*objects.Module, error) { func buildOS() (*objects.Module, error) { // environ: populate from the real process environment. // CPython: Modules/posixmodule.c:1768 convertenviron + // posix.environ holds bytes keys/values on POSIX (Lib/os.py decodes + // them through fsdecode); the nt build keeps str. CPython: + // Modules/posixmodule.c convertenviron. + environBytes := runtime.GOOS != "windows" environDict := objects.NewDict() for _, kv := range goos.Environ() { k, v, ok := strings.Cut(kv, "=") if !ok { continue } - if err := environDict.SetItem(objects.NewStr(k), objects.NewStr(v)); err != nil { + var kObj, vObj objects.Object + if environBytes { + kObj = objects.NewBytes([]byte(k)) + vObj = objects.NewBytes([]byte(v)) + } else { + kObj = objects.NewStr(k) + vObj = objects.NewStr(v) + } + if err := environDict.SetItem(kObj, vObj); err != nil { return nil, err } } @@ -299,10 +334,14 @@ func buildOS() (*objects.Module, error) { linesep := "\n" pathsep := ":" osName := "posix" + // altsep is the alternate path separator: None on POSIX, '/' on Windows. + // CPython: Modules/posixmodule.c / Lib/ntpath.py:altsep + altsep := objects.None() if runtime.GOOS == "windows" { linesep = "\r\n" pathsep = ";" osName = "nt" + altsep = objects.NewStr("/") } entries := []struct { @@ -310,6 +349,7 @@ func buildOS() (*objects.Module, error) { val objects.Object }{ {"sep", objects.NewStr(sep)}, + {"altsep", altsep}, {"extsep", objects.NewStr(".")}, {"pardir", objects.NewStr("..")}, {"curdir", objects.NewStr(".")}, @@ -324,6 +364,8 @@ func buildOS() (*objects.Module, error) { {"listdir", objects.NewBuiltinFunction("listdir", listdir)}, {"stat", objects.NewBuiltinFunction("stat", stat)}, {"getenv", objects.NewBuiltinFunction("getenv", getenv)}, + {"putenv", objects.NewBuiltinFunction("putenv", putenv)}, + {"unsetenv", objects.NewBuiltinFunction("unsetenv", unsetenv)}, {"getpid", objects.NewBuiltinFunction("getpid", getpid)}, {"getuid", objects.NewBuiltinFunction("getuid", getuid)}, {"makedirs", objects.NewBuiltinFunction("makedirs", makedirs)}, @@ -431,7 +473,7 @@ func buildOS() (*objects.Module, error) { for _, group := range [][]struct { name string val objects.Object - }{entries, posixIdentityEntries()} { + }{entries, posixIdentityEntries(), winPathEntries()} { for _, e := range group { if err := d.SetItem(objects.NewStr(e.name), e.val); err != nil { return nil, err @@ -484,7 +526,12 @@ func osModuleGetattr(m *objects.Module) func([]objects.Object, map[string]object return cls, nil } if name != "path" { - return nil, fmt.Errorf("AttributeError: module 'os' has no attribute %q", name) + // Match the standard module-getattro miss message (single + // quotes). CPython's os.py has no __getattr__, so a missing + // attribute raises "module 'os' has no attribute 'X'". + // + // CPython: Objects/moduleobject.c:1024 _Py_module_getattro_impl + return nil, fmt.Errorf("AttributeError: module 'os' has no attribute '%s'", name) } pathMod := osPathModule() if err := m.Dict().SetItem(objects.NewStr("path"), pathMod); err != nil { @@ -867,12 +914,67 @@ func stat(args []objects.Object, _ map[string]objects.Object) (objects.Object, e ino, dev, nlink, uid, gid, atime, ctime := statSysFields(info) blksize, blocks, rdev := statBlockFields(info) return newStatResult(statMode(info), int64(ino), int64(dev), int64(nlink), - int64(uid), int64(gid), info.Size(), atime, info.ModTime().Unix(), ctime, + int64(uid), int64(gid), info.Size(), atime, info.ModTime().UnixNano(), ctime, blksize, blocks, rdev), nil } // getenv mirrors Lib/os.py:818 getenv: returns environ[key] or default. // CPython: Lib/os.py:818 getenv +// fsArg decodes a putenv/unsetenv argument that may arrive as str or +// bytes (Lib/os.py's posix _Environ fsencodes keys and values to bytes +// before calling putenv / unsetenv). +func fsArg(o objects.Object) (string, error) { + switch v := o.(type) { + case *objects.Bytes: + return string(v.Bytes()), nil + case *objects.ByteArray: + return string(v.Bytes()), nil + default: + return objects.Str(o) + } +} + +// putenv implements posix.putenv(key, value): set a process environment +// variable. Lib/os.py's _Environ.__setitem__ calls it before updating +// its backing dict. +// +// CPython: Modules/posixmodule.c os_putenv_impl +func putenv(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) < 2 { + return nil, fmt.Errorf("TypeError: putenv() takes exactly 2 arguments (%d given)", len(args)) + } + key, err := fsArg(args[0]) + if err != nil { + return nil, err + } + value, err := fsArg(args[1]) + if err != nil { + return nil, err + } + if err := goos.Setenv(key, value); err != nil { + return nil, fmt.Errorf("OSError: %s", err.Error()) + } + return objects.None(), nil +} + +// unsetenv implements posix.unsetenv(key): remove a process environment +// variable. Lib/os.py's _Environ.__delitem__ calls it. +// +// CPython: Modules/posixmodule.c os_unsetenv_impl +func unsetenv(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) < 1 { + return nil, fmt.Errorf("TypeError: unsetenv() takes exactly 1 argument (%d given)", len(args)) + } + key, err := fsArg(args[0]) + if err != nil { + return nil, err + } + if err := goos.Unsetenv(key); err != nil { + return nil, fmt.Errorf("OSError: %s", err.Error()) + } + return objects.None(), nil +} + func getenv(args []objects.Object, kwargs map[string]objects.Object) (objects.Object, error) { if len(args) == 0 { return nil, fmt.Errorf("TypeError: getenv() missing required argument: 'key'") @@ -1295,14 +1397,17 @@ func osLstat(args []objects.Object, _ map[string]objects.Object) (objects.Object return nil, fmt.Errorf("OSError: %w", serr) } ino, dev, nlink, uid, gid, atime, ctime := statSysFields(info) - mtime := info.ModTime().Unix() + mtime := info.ModTime().UnixNano() blksize, blocks, rdev := statBlockFields(info) return newStatResult(statMode(info), int64(ino), int64(dev), int64(nlink), int64(uid), int64(gid), info.Size(), atime, mtime, ctime, blksize, blocks, rdev), nil } -// osFstat returns the stat of an open file descriptor. -// The underlying fd is not closed; runtime.SetFinalizer is cleared on -// the temporary os.File wrapper so the GC never closes it. +// osFstat returns the stat of an open file descriptor. The work is +// delegated to the platform fstatResult helper, which calls fstat(2) +// directly through syscall rather than borrowing the fd in a temporary +// os.File. An os.File wrapper arms a finalizer on its inner file handle +// that runtime.SetFinalizer on the outer struct cannot clear, so a GC of +// the wrapper would close the live descriptor out from under its owner. // // CPython: Modules/posixmodule.c:3399 os_fstat_impl func osFstat(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { @@ -1314,16 +1419,7 @@ func osFstat(args []objects.Object, _ map[string]objects.Object) (objects.Object return nil, fmt.Errorf("TypeError: an integer is required") } fdVal, _ := fdObj.Int64() - f := goos.NewFile(uintptr(fdVal), "") - runtime.SetFinalizer(f, nil) - info, err := f.Stat() - if err != nil { - return nil, fmt.Errorf("OSError: %w", err) - } - ino, dev, nlink, uid, gid, atime, ctime := statSysFields(info) - mtime := info.ModTime().Unix() - blksize, blocks, rdev := statBlockFields(info) - return newStatResult(statMode(info), int64(ino), int64(dev), int64(nlink), int64(uid), int64(gid), info.Size(), atime, mtime, ctime, blksize, blocks, rdev), nil + return fstatResult(fdVal) } // osReplace atomically renames src to dst, replacing dst if it exists. diff --git a/module/os/module_test.go b/module/os/module_test.go index a4bccb24d..4c99b6667 100644 --- a/module/os/module_test.go +++ b/module/os/module_test.go @@ -119,13 +119,21 @@ func TestEnviron(t *testing.T) { if !ok { t.Fatalf("environ is %T, want *Dict", env) } + // On POSIX, posix.environ holds bytes keys/values (Lib/os.py decodes + // them); the nt build keeps str. Look the key up the same way. + keyObj := func(s string) objects.Object { + if runtime.GOOS == "windows" { + return objects.NewStr(s) + } + return objects.NewBytes([]byte(s)) + } pathKey := "PATH" if runtime.GOOS == "windows" { - if _, err2 := envDict.GetItem(objects.NewStr("Path")); err2 == nil { + if _, err2 := envDict.GetItem(keyObj("Path")); err2 == nil { pathKey = "Path" } } - v, err := envDict.GetItem(objects.NewStr(pathKey)) + v, err := envDict.GetItem(keyObj(pathKey)) if err != nil { t.Fatalf("environ[%q]: %v", pathKey, err) } diff --git a/module/os/posix_extra.go b/module/os/posix_extra.go index aa313c1cc..cef9e1a1e 100644 --- a/module/os/posix_extra.go +++ b/module/os/posix_extra.go @@ -25,21 +25,51 @@ func osChmod(args []objects.Object, _ map[string]objects.Object) (objects.Object if len(args) < 2 { return nil, fmt.Errorf("TypeError: chmod() missing required arguments") } - path, ok := args[0].(*objects.Unicode) - if !ok { - return nil, fmt.Errorf("TypeError: chmod() path must be str") + p, err := pathStringArg(args[0], "chmod") + if err != nil { + return nil, err } mode, ok := args[1].(*objects.Int) if !ok { return nil, fmt.Errorf("TypeError: chmod() mode must be int") } m, _ := mode.Int64() - if err := goos.Chmod(path.Value(), goos.FileMode(m)); err != nil { + if err := goos.Chmod(p, goos.FileMode(m)); err != nil { return nil, fmt.Errorf("OSError: %w", err) } return objects.None(), nil } +// pathStringArg coerces a path argument the way CPython's path_converter +// does: a str is taken verbatim, bytes are decoded, and any other object is +// run through os.fspath (__fspath__) so pathlib.Path and other PathLike +// objects are accepted. +// +// CPython: Modules/posixmodule.c:1093 path_converter +func pathStringArg(o objects.Object, fname string) (string, error) { + switch v := o.(type) { + case *objects.Unicode: + return v.Value(), nil + case *objects.Bytes: + return string(v.Bytes()), nil + } + m, err := objects.GetAttr(o, objects.NewStr("__fspath__")) + if err != nil { + return "", fmt.Errorf("TypeError: %s: path should be string, bytes or os.PathLike, not %s", fname, o.Type().Name) + } + r, err := objects.Call(m, objects.NewTuple(nil), nil) + if err != nil { + return "", err + } + switch v := r.(type) { + case *objects.Unicode: + return v.Value(), nil + case *objects.Bytes: + return string(v.Bytes()), nil + } + return "", fmt.Errorf("TypeError: expected __fspath__ to return str or bytes, not %s", r.Type().Name) +} + // osSymlink creates a symbolic link at link_name pointing at src. // // CPython: Modules/posixmodule.c os_symlink_impl @@ -47,15 +77,15 @@ func osSymlink(args []objects.Object, _ map[string]objects.Object) (objects.Obje if len(args) < 2 { return nil, fmt.Errorf("TypeError: symlink() requires src and dst") } - src, ok := args[0].(*objects.Unicode) - if !ok { - return nil, fmt.Errorf("TypeError: symlink() src must be str") + src, err := pathStringArg(args[0], "symlink") + if err != nil { + return nil, err } - dst, ok := args[1].(*objects.Unicode) - if !ok { - return nil, fmt.Errorf("TypeError: symlink() dst must be str") + dst, err := pathStringArg(args[1], "symlink") + if err != nil { + return nil, err } - if err := goos.Symlink(src.Value(), dst.Value()); err != nil { + if err := goos.Symlink(src, dst); err != nil { return nil, fmt.Errorf("OSError: %w", err) } return objects.None(), nil @@ -68,11 +98,11 @@ func osReadlink(args []objects.Object, _ map[string]objects.Object) (objects.Obj if len(args) < 1 { return nil, fmt.Errorf("TypeError: readlink() missing path") } - path, ok := args[0].(*objects.Unicode) - if !ok { - return nil, fmt.Errorf("TypeError: readlink() path must be str") + path, err := pathStringArg(args[0], "readlink") + if err != nil { + return nil, err } - target, err := goos.Readlink(path.Value()) + target, err := goos.Readlink(path) if err != nil { return nil, fmt.Errorf("OSError: %w", err) } @@ -86,15 +116,15 @@ func osLink(args []objects.Object, _ map[string]objects.Object) (objects.Object, if len(args) < 2 { return nil, fmt.Errorf("TypeError: link() requires src and dst") } - src, ok := args[0].(*objects.Unicode) - if !ok { - return nil, fmt.Errorf("TypeError: link() src must be str") + src, err := pathStringArg(args[0], "link") + if err != nil { + return nil, err } - dst, ok := args[1].(*objects.Unicode) - if !ok { - return nil, fmt.Errorf("TypeError: link() dst must be str") + dst, err := pathStringArg(args[1], "link") + if err != nil { + return nil, err } - if err := goos.Link(src.Value(), dst.Value()); err != nil { + if err := goos.Link(src, dst); err != nil { return nil, fmt.Errorf("OSError: %w", err) } return objects.None(), nil @@ -152,12 +182,17 @@ func osCPUCount(args []objects.Object, _ map[string]objects.Object) (objects.Obj return objects.NewInt(int64(n)), nil } -// osIsatty returns True if fd is a tty. The implementation Stats the -// fd through the goos package and tests the char-device bit, which -// matches what `isatty(3)` reports for the common cases _colorize -// cares about. +// osIsatty returns True if fd is a tty. It fstats the descriptor and +// tests the char-device type bit, which matches what `isatty(3)` reports +// for the common cases _colorize cares about. The stat goes through the +// platform fstatResult helper, which calls fstat(2) directly rather than +// borrowing the fd in a temporary os.File. A borrowed os.File arms a +// finalizer on its inner handle that runtime.SetFinalizer on the outer +// struct cannot clear, so a GC of the wrapper would close a descriptor we +// do not own and unrelated writes would later fail with EBADF. // -// CPython: Modules/posixmodule.c:11947 os_isatty_impl +// CPython: Modules/posixmodule.c:11947 os_isatty_impl borrows the fd +// and never closes it. func osIsatty(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { if len(args) < 1 { return nil, fmt.Errorf("TypeError: isatty() missing required argument: 'fd'") @@ -167,18 +202,18 @@ func osIsatty(args []objects.Object, _ map[string]objects.Object) (objects.Objec return nil, fmt.Errorf("TypeError: an integer is required") } fdVal, _ := fdObj.Int64() - f := goos.NewFile(uintptr(fdVal), "") - if f == nil { - return objects.NewBool(false), nil - } - info, err := f.Stat() + st, err := fstatResult(fdVal) if err != nil { // CPython os.isatty returns False on any error rather than - // raising; a Stat failure here means the fd is not a real + // raising; a stat failure here means the fd is not a real // device, which is exactly what callers want to know. return objects.NewBool(false), nil //nolint:nilerr // CPython os.isatty parity } - return objects.NewBool((info.Mode() & goos.ModeCharDevice) != 0), nil + // st_mode is the first stat_result slot. S_IFMT masks the file-type + // nibble; S_IFCHR marks a character device. + const sIFMT, sIFCHR = 0o170000, 0o020000 + mode, _ := st.Items()[0].(*objects.Int).Int64() + return objects.NewBool(mode&sIFMT == sIFCHR), nil } // osFsdecode decodes filename from the filesystem encoding (utf-8 on diff --git a/module/os/posix_unix.go b/module/os/posix_unix.go index f427a0713..d8e9a8685 100644 --- a/module/os/posix_unix.go +++ b/module/os/posix_unix.go @@ -280,3 +280,15 @@ func posixIdentityEntries() []struct { {"getgroups", objects.NewBuiltinFunction("getgroups", osGetgroups)}, } } + +// winPathEntries is empty on POSIX: posixmodule.c registers _path_splitroot +// and the listdrives family only inside its #ifdef MS_WINDOWS block, so on +// POSIX os._path_splitroot raises AttributeError just like CPython. +// +// CPython: Modules/posixmodule.c:4707 #ifdef MS_WINDOWS +func winPathEntries() []struct { + name string + val objects.Object +} { + return nil +} diff --git a/module/os/posix_windows.go b/module/os/posix_windows.go index 189fe8b5b..89185022d 100644 --- a/module/os/posix_windows.go +++ b/module/os/posix_windows.go @@ -289,3 +289,150 @@ func osUmask(args []objects.Object, _ map[string]objects.Object) (objects.Object } return objects.NewInt(0), nil } + +// winPathEntries returns the Windows-only path helpers posixmodule.c registers +// inside its #ifdef MS_WINDOWS block. Only _path_splitroot is needed by the +// stdlib bootstrap; the rest of the listdrives/_path_* family is unported. +// +// CPython: Modules/posixmodule.c:4707 #ifdef MS_WINDOWS +func winPathEntries() []struct { + name string + val objects.Object +} { + return []struct { + name string + val objects.Object + }{ + {"_path_splitroot", objects.NewBuiltinFunction("_path_splitroot", osPathSplitroot)}, + } +} + +// osPathSplitroot splits a Windows path into (root, rest), where root is +// everything up to and including the leading separator after a drive or UNC +// share. importlib._bootstrap_external uses it to reimplement os.path.join and +// os.path.isabs without importing ntpath at bootstrap time. +// +// The C accelerator runs PathCchSkipRoot over a copy with forward slashes +// folded to backslashes, then slices the original (unfolded) path at the root +// length. That is exactly the drive+root prefix ntpath.splitroot computes, so +// this port follows the ntpath.splitroot algorithm and joins its (drive, root) +// halves into the single root element the 2-tuple form returns. +// +// CPython: Modules/posixmodule.c:5230 os__path_splitroot_impl +// CPython: Lib/ntpath.py:172 splitroot +func osPathSplitroot(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) != 1 { + return nil, fmt.Errorf("TypeError: _path_splitroot() takes exactly one argument (%d given)", len(args)) + } + s, err := objects.Str(args[0]) + if err != nil { + return nil, err + } + root, rest := splitrootWindows(s) + return objects.NewTuple([]objects.Object{objects.NewStr(root), objects.NewStr(rest)}), nil +} + +const ( + srSep = '\\' + srAlt = '/' + srColon = ':' +) + +// srAt reads the slash-folded rune at index i (out-of-range yields 0), so the +// structural tests run over the normp = p.replace('/', '\\') view. +func srAt(r []rune, i int) rune { + if i < 0 || i >= len(r) { + return 0 + } + if r[i] == srAlt { + return srSep + } + return r[i] +} + +// srSlice returns string(r[a:b]) clamped to bounds, the Python p[a:b] slice. +func srSlice(r []rune, a, b int) string { + if a < 0 { + a = 0 + } + if b > len(r) { + b = len(r) + } + if a >= b { + return "" + } + return string(r[a:b]) +} + +// srFindSep is normp.find('\\', start) over the slash-folded view. +func srFindSep(r []rune, start int) int { + for i := start; i < len(r); i++ { + if srAt(r, i) == srSep { + return i + } + } + return -1 +} + +// srHasUNCPrefix reports normp[:8].upper() == '\\\\?\\UNC\\'. +func srHasUNCPrefix(r []rune) bool { + want := [8]rune{srSep, srSep, '?', srSep, 'U', 'N', 'C', srSep} + if len(r) < 8 { + return false + } + for i := 0; i < 8; i++ { + c := srAt(r, i) + if c >= 'a' && c <= 'z' { + c -= 'a' - 'A' + } + if c != want[i] { + return false + } + } + return true +} + +// srSplitUNC handles \\server\share or \\?\UNC\server\share roots. +func srSplitUNC(p string, r []rune) (string, string) { + start := 2 + if srHasUNCPrefix(r) { + start = 8 + } + index := srFindSep(r, start) + if index == -1 { + return p, "" + } + index2 := srFindSep(r, index+1) + if index2 == -1 { + return p, "" + } + // drive=p[:index2], root=p[index2:index2+1], tail=p[index2+1:]. + return srSlice(r, 0, index2+1), srSlice(r, index2+1, len(r)) +} + +// splitrootWindows is the ntpath.splitroot algorithm folded to the 2-tuple +// (drive+root, tail) shape os._path_splitroot returns. It indexes by rune to +// preserve Python str (code-point) slicing semantics. +// +// CPython: Lib/ntpath.py:172 splitroot +func splitrootWindows(p string) (root, tail string) { + r := []rune(p) + switch { + case srAt(r, 0) == srSep: + if srAt(r, 1) == srSep { + return srSplitUNC(p, r) + } + // Relative path with root, e.g. \Windows: drive="", root=p[:1]. + return srSlice(r, 0, 1), srSlice(r, 1, len(r)) + case srAt(r, 1) == srColon: + if srAt(r, 2) == srSep { + // Absolute drive-letter path, e.g. X:\Windows. + return srSlice(r, 0, 3), srSlice(r, 3, len(r)) + } + // Relative path with drive, e.g. X:Windows: drive=p[:2], root="". + return srSlice(r, 0, 2), srSlice(r, 2, len(r)) + default: + // Relative path, e.g. Windows. + return "", p + } +} diff --git a/module/os/scandir.go b/module/os/scandir.go index ca4823a94..eb5a86881 100644 --- a/module/os/scandir.go +++ b/module/os/scandir.go @@ -187,6 +187,8 @@ func direntryGetattr(o objects.Object, name objects.Object) (objects.Object, err return objects.NewBuiltinFunction("is_file", direntryIsFile(de)), nil case "is_symlink": return objects.NewBuiltinFunction("is_symlink", direntryIsSymlink(de)), nil + case "is_junction": + return objects.NewBuiltinFunction("is_junction", direntryIsJunction(de)), nil case "stat": return objects.NewBuiltinFunction("stat", direntryStat(de)), nil case "inode": @@ -252,6 +254,17 @@ func direntryIsSymlink(de *DirEntry) func(args []objects.Object, kwargs map[stri } } +// direntryIsJunction builds the bound is_junction() method. Junctions +// are a Windows-only concept; on every platform gopy targets here the +// answer is always False. +// +// CPython: Modules/posixmodule.c DirEntry_is_junction +func direntryIsJunction(_ *DirEntry) func(args []objects.Object, kwargs map[string]objects.Object) (objects.Object, error) { + return func(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + return objects.False(), nil + } +} + // direntryStat builds the bound stat(*, follow_symlinks=True) method. // // CPython: Modules/posixmodule.c:13278 DirEntry_stat diff --git a/module/os/stat_darwin.go b/module/os/stat_darwin.go index 007ddccf7..9bdcf3f27 100644 --- a/module/os/stat_darwin.go +++ b/module/os/stat_darwin.go @@ -13,10 +13,13 @@ import ( ) // statSysFields extracts platform fields from a FileInfo's syscall.Stat_t. -// Darwin/FreeBSD carry atime/ctime in Atimespec/Ctimespec. +// Darwin/FreeBSD carry atime/ctime in Atimespec/Ctimespec. The returned +// atime/ctime are full nanoseconds since the epoch so stat_result keeps +// the sub-second precision CPython's FileFinder relies on for cache +// invalidation. // CPython: Modules/posixmodule.c:3238 os_stat_impl func statSysFields(info goos.FileInfo) (ino, dev, nlink uint64, uid, gid uint32, atime, ctime int64) { - mtime := info.ModTime().Unix() + mtime := info.ModTime().UnixNano() atime = mtime ctime = mtime sys, ok := info.Sys().(*syscall.Stat_t) @@ -28,8 +31,8 @@ func statSysFields(info goos.FileInfo) (ino, dev, nlink uint64, uid, gid uint32, nlink = uint64(sys.Nlink) uid = sys.Uid gid = sys.Gid - atime = sys.Atimespec.Sec - ctime = sys.Ctimespec.Sec + atime = sys.Atimespec.Sec*1_000_000_000 + sys.Atimespec.Nsec + ctime = sys.Ctimespec.Sec*1_000_000_000 + sys.Ctimespec.Nsec return } @@ -60,6 +63,25 @@ func statBlockFields(info goos.FileInfo) (blksize, blocks, rdev int64) { return } +// fstatResult stats an open descriptor via fstat(2) and assembles the +// stat_result directly from the syscall.Stat_t. It never wraps the fd in +// an os.File, so no finalizer is armed that could close the live +// descriptor when the wrapper is garbage-collected. +// +// CPython: Modules/posixmodule.c:3399 os_fstat_impl +func fstatResult(fdVal int64) (*objects.StructSeq, error) { + var st syscall.Stat_t + if err := syscall.Fstat(int(fdVal), &st); err != nil { + return nil, fmt.Errorf("OSError: %w", err) + } + atime := st.Atimespec.Sec*1_000_000_000 + st.Atimespec.Nsec + mtime := st.Mtimespec.Sec*1_000_000_000 + st.Mtimespec.Nsec + ctime := st.Ctimespec.Sec*1_000_000_000 + st.Ctimespec.Nsec + return newStatResult(int64(st.Mode), int64(st.Ino), int64(st.Dev), int64(st.Nlink), + int64(st.Uid), int64(st.Gid), st.Size, atime, mtime, ctime, + int64(st.Blksize), st.Blocks, int64(st.Rdev)), nil +} + // getuid returns the real user ID of the calling process. // CPython: Modules/posixmodule.c:9635 os_getuid_impl func getuid(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { diff --git a/module/os/stat_linux.go b/module/os/stat_linux.go index 95c11adbd..7c1d6c9d2 100644 --- a/module/os/stat_linux.go +++ b/module/os/stat_linux.go @@ -16,7 +16,7 @@ import ( // Linux carries atime/ctime in Atim/Ctim. // CPython: Modules/posixmodule.c:3238 os_stat_impl func statSysFields(info goos.FileInfo) (ino, dev, nlink uint64, uid, gid uint32, atime, ctime int64) { - mtime := info.ModTime().Unix() + mtime := info.ModTime().UnixNano() atime = mtime ctime = mtime sys, ok := info.Sys().(*syscall.Stat_t) @@ -28,8 +28,8 @@ func statSysFields(info goos.FileInfo) (ino, dev, nlink uint64, uid, gid uint32, nlink = uint64(sys.Nlink) //nolint:unconvert // Nlink is uint32 on linux/arm64 uid = sys.Uid gid = sys.Gid - atime = sys.Atim.Sec - ctime = sys.Ctim.Sec + atime = sys.Atim.Sec*1_000_000_000 + int64(sys.Atim.Nsec) //nolint:unconvert // Nsec is int32 on 32-bit linux + ctime = sys.Ctim.Sec*1_000_000_000 + int64(sys.Ctim.Nsec) //nolint:unconvert // Nsec is int32 on 32-bit linux return } @@ -60,6 +60,25 @@ func statBlockFields(info goos.FileInfo) (blksize, blocks, rdev int64) { return } +// fstatResult stats an open descriptor via fstat(2) and assembles the +// stat_result directly from the syscall.Stat_t. It never wraps the fd in +// an os.File, so no finalizer is armed that could close the live +// descriptor when the wrapper is garbage-collected. +// +// CPython: Modules/posixmodule.c:3399 os_fstat_impl +func fstatResult(fdVal int64) (*objects.StructSeq, error) { + var st syscall.Stat_t + if err := syscall.Fstat(int(fdVal), &st); err != nil { + return nil, fmt.Errorf("OSError: %w", err) + } + atime := st.Atim.Sec*1_000_000_000 + int64(st.Atim.Nsec) //nolint:unconvert // Nsec is int32 on 32-bit linux + mtime := st.Mtim.Sec*1_000_000_000 + int64(st.Mtim.Nsec) //nolint:unconvert // Nsec is int32 on 32-bit linux + ctime := st.Ctim.Sec*1_000_000_000 + int64(st.Ctim.Nsec) //nolint:unconvert // Nsec is int32 on 32-bit linux + return newStatResult(int64(st.Mode), int64(st.Ino), int64(st.Dev), int64(st.Nlink), + int64(st.Uid), int64(st.Gid), st.Size, atime, mtime, ctime, + int64(st.Blksize), int64(st.Blocks), int64(st.Rdev)), nil //nolint:unconvert // Blksize/Blocks are int32 on 32-bit linux +} + // getuid returns the real user ID of the calling process. // CPython: Modules/posixmodule.c:9635 os_getuid_impl func getuid(_ []objects.Object, _ map[string]objects.Object) (objects.Object, error) { diff --git a/module/os/stat_other.go b/module/os/stat_other.go index 2d45ff1d4..66934d412 100644 --- a/module/os/stat_other.go +++ b/module/os/stat_other.go @@ -7,14 +7,34 @@ package os import ( "fmt" goos "os" + "runtime" "github.com/tamnd/gopy/objects" ) +// fstatResult stats an open descriptor through a temporary os.File on +// platforms without a syscall.Stat_t. SetFinalizer is cleared on a +// best-effort basis; these fallback targets do not run the kqueue +// netpoller that makes the borrowed-fd close fatal on Darwin. +// +// CPython: Modules/posixmodule.c:3399 os_fstat_impl +func fstatResult(fdVal int64) (*objects.StructSeq, error) { + f := goos.NewFile(uintptr(fdVal), "") + runtime.SetFinalizer(f, nil) + info, err := f.Stat() + if err != nil { + return nil, fmt.Errorf("OSError: %w", err) + } + ino, dev, nlink, uid, gid, atime, ctime := statSysFields(info) + mtime := info.ModTime().UnixNano() + blksize, blocks, rdev := statBlockFields(info) + return newStatResult(statMode(info), int64(ino), int64(dev), int64(nlink), int64(uid), int64(gid), info.Size(), atime, mtime, ctime, blksize, blocks, rdev), nil +} + // statSysFields returns minimal values on unsupported platforms. // CPython: Modules/posixmodule.c:3238 os_stat_impl func statSysFields(info goos.FileInfo) (ino, dev, nlink uint64, uid, gid uint32, atime, ctime int64) { - mtime := info.ModTime().Unix() + mtime := info.ModTime().UnixNano() return 0, 0, 1, 0, 0, mtime, mtime } diff --git a/module/os/stat_windows.go b/module/os/stat_windows.go index c751cd9fb..18877ff46 100644 --- a/module/os/stat_windows.go +++ b/module/os/stat_windows.go @@ -5,12 +5,33 @@ package os import ( + "fmt" goos "os" + "runtime" "syscall" "github.com/tamnd/gopy/objects" ) +// fstatResult stats an open descriptor. Windows resolves the fd through a +// temporary os.File whose Stat goes via GetFileInformationByHandle; the +// netpoll-vs-finalizer hazard that motivates the POSIX raw-syscall path +// does not apply to Windows handles, so the wrapper is reused here. +// +// CPython: Modules/posixmodule.c:3399 os_fstat_impl +func fstatResult(fdVal int64) (*objects.StructSeq, error) { + f := goos.NewFile(uintptr(fdVal), "") + runtime.SetFinalizer(f, nil) + info, err := f.Stat() + if err != nil { + return nil, fmt.Errorf("OSError: %w", err) + } + ino, dev, nlink, uid, gid, atime, ctime := statSysFields(info) + mtime := info.ModTime().UnixNano() + blksize, blocks, rdev := statBlockFields(info) + return newStatResult(statMode(info), int64(ino), int64(dev), int64(nlink), int64(uid), int64(gid), info.Size(), atime, mtime, ctime, blksize, blocks, rdev), nil +} + // statSysFields extracts platform fields from a Windows FileInfo's // Win32FileAttributeData. Windows reports CreationTime / LastAccessTime // / LastWriteTime as FILETIME (100-ns intervals since 1601-01-01); we @@ -21,7 +42,7 @@ import ( // // CPython: Modules/posixmodule.c:1924 win32_stat func statSysFields(info goos.FileInfo) (ino, dev, nlink uint64, uid, gid uint32, atime, ctime int64) { - mtime := info.ModTime().Unix() + mtime := info.ModTime().UnixNano() atime = mtime ctime = mtime nlink = 1 @@ -29,8 +50,8 @@ func statSysFields(info goos.FileInfo) (ino, dev, nlink uint64, uid, gid uint32, if !ok || sys == nil { return } - atime = sys.LastAccessTime.Nanoseconds() / 1e9 - ctime = sys.CreationTime.Nanoseconds() / 1e9 + atime = sys.LastAccessTime.Nanoseconds() + ctime = sys.CreationTime.Nanoseconds() return } diff --git a/module/sys/config.go b/module/sys/config.go index f9552e1eb..bf5aefadc 100644 --- a/module/sys/config.go +++ b/module/sys/config.go @@ -61,6 +61,12 @@ func UpdateConfig(d *objects.Dict, cfg *initconfig.PyConfig) error { } } + // CPython: Python/sysmodule.c sets sys.dont_write_bytecode from the + // config alongside the flags structseq mirror. + if err := setItem(d, "dont_write_bytecode", objects.NewBool(cfg.WriteBytecode == 0)); err != nil { + return err + } + if cfg.PycachePrefix != "" { if err := setStr(d, "pycache_prefix", cfg.PycachePrefix); err != nil { return err diff --git a/module/sys/excepthook.go b/module/sys/excepthook.go index ea8d8df2b..88fa5873b 100644 --- a/module/sys/excepthook.go +++ b/module/sys/excepthook.go @@ -1,19 +1,19 @@ // sys.excepthook is invoked by the interpreter when an exception goes -// uncaught at the top level, and by threading.py to display exceptions -// raised in worker threads. CPython routes through PyErr_Display which -// formats a traceback. The gopy port writes a minimal "type: value" -// line to sys.stderr; full traceback formatting can plug in once the -// stack-walk hook is exposed. +// uncaught at the top level, by threading.py to display exceptions +// raised in worker threads, and by code.InteractiveConsole to render a +// traceback. CPython routes through PyErr_Display, which formats the +// full traceback (chained causes included) and writes it through the +// live sys.stderr object so a redirected or mocked stream captures it. // // CPython: Python/sysmodule.c sys_excepthook_impl // CPython: Python/pythonrun.c PyErr_Display - package sys import ( "fmt" "os" + "github.com/tamnd/gopy/errors" "github.com/tamnd/gopy/objects" ) @@ -21,15 +21,44 @@ func excepthookShim(args []objects.Object, _ map[string]objects.Object) (objects if len(args) < 3 { return objects.None(), nil } - exc := args[1] - repr, err := objects.Str(exc) - if err != nil { - // Best-effort hook: a failing repr is swallowed because the - // excepthook itself runs while an exception is already being - // reported and must not re-raise. - return objects.None(), nil //nolint:nilerr // intentional swallow + // PyErr_Display formats the value argument (args[1]); the type and + // traceback are derived from it. code.InteractiveConsole already + // stitched the traceback onto the value via with_traceback before + // calling the hook, so FormatException(value) renders the same frames. + text := excepthookText(args[1]) + + // Write through the live sys.stderr the way _PyErr_Display does, so a + // caller that swapped sys.stderr (tests mocking the stream, the REPL's + // captured stderr) sees the output instead of the process fd. + // + // CPython: Python/pythonrun.c _PyErr_Display (PySys_GetObject "stderr") + d := liveSysDict() + if d != nil { + if errf, _ := d.GetItem(objects.NewStr("stderr")); errf != nil && errf != objects.None() { + if write, err := objects.GetAttr(errf, objects.NewStr("write")); err == nil { + if _, err := objects.Call(write, objects.NewTuple([]objects.Object{objects.NewStr(text)}), nil); err == nil { + return objects.None(), nil + } + } + } } - tp := exc.Type().Name - fmt.Fprintf(os.Stderr, "%s: %s\n", tp, repr) + // Fall back to the process stderr only when sys.stderr is missing or + // unusable, mirroring CPython's last-resort write to the C-level + // stderr in _PyErr_Display. + fmt.Fprint(os.Stderr, text) return objects.None(), nil } + +// excepthookText renders the traceback string for the exception value, +// falling back to a "Type: repr" line when the object is not a gopy +// Exception (the hook must never raise while reporting an error). +func excepthookText(value objects.Object) string { + if exc, ok := value.(*errors.Exception); ok { + return errors.FormatException(exc) + } + repr, err := objects.Str(value) + if err != nil { + return value.Type().Name + "\n" + } + return value.Type().Name + ": " + repr + "\n" +} diff --git a/module/sys/module.go b/module/sys/module.go index 9906cea31..b2dcd4832 100644 --- a/module/sys/module.go +++ b/module/sys/module.go @@ -88,6 +88,50 @@ func SetPath(path []string) { } } +// pendingStdlibDir records the stdlib root the next sys-module build +// should expose as sys._stdlib_dir. FrozenImporter._resolve_filename +// reads it to locate the on-disk copy of a frozen module. SetStdlibDir +// also refreshes the live attribute when sys is already imported. +// +// CPython: Python/sysmodule.c:3951 _PySys_UpdateConfig (stdlib_dir) +var pendingStdlibDir string + +// SetStdlibDir records the stdlib root and exposes it as +// sys._stdlib_dir, refreshing the live attribute when sys is already +// imported. +// +// CPython: Python/sysmodule.c:3951 _PySys_UpdateConfig (stdlib_dir) +func SetStdlibDir(dir string) { + pendingStdlibDir = dir + if md := liveSysDict(); md != nil { + _ = md.SetItem(objects.NewStr("_stdlib_dir"), objects.NewStr(dir)) + } +} + +// pendingSafePath records the safe_path flag supplied on the command +// line (-P / -I / PYTHONSAFEPATH) before sys is built. buildModule +// reads it when stamping sys.flags; SetSafePath also refreshes the live +// flags struct-sequence when sys is already imported. +// +// CPython: Python/initconfig.c:1828 config_init_safe_path +var pendingSafePath bool + +// SetSafePath records safe_path and, when sys is already live, rebuilds +// sys.flags so sys.flags.safe_path reads True. +// +// CPython: Python/sysmodule.c:3478 set_flags_from_config (safe_path) +func SetSafePath(on bool) { + pendingSafePath = on + if md := liveSysDict(); md != nil { + cfg := &initconfig.PyConfig{} + cfg.InitPythonConfig() + if on { + cfg.SafePath = 1 + } + _ = md.SetItem(objects.NewStr("flags"), makeFlags(cfg)) + } +} + // LivePath returns the current sys.path entries as a Go slice, or nil // when sys has not been imported yet (PathFinder then falls back to // its static Paths snapshot, which is what unit tests that drive @@ -302,6 +346,9 @@ func buildModule() (*objects.Module, error) { // CPython: Python/sysmodule.c:3478 set_flags_from_config defaultCfg := &initconfig.PyConfig{} defaultCfg.InitPythonConfig() + if pendingSafePath { + defaultCfg.SafePath = 1 + } if err := setItem(md, "flags", makeFlags(defaultCfg)); err != nil { return nil, err } @@ -338,6 +385,15 @@ func buildModule() (*objects.Module, error) { return nil, err } } + // sys._stdlib_dir lets FrozenImporter._resolve_filename find the + // on-disk copy of a frozen module. + // + // CPython: Python/sysmodule.c:3951 _PySys_UpdateConfig (stdlib_dir) + if pendingStdlibDir != "" { + if err := setStr(md, "_stdlib_dir", pendingStdlibDir); err != nil { + return nil, err + } + } // sys.exc_info reads the per-thread handled-exception slot the vm // maintains across PUSH_EXC_INFO / POP_EXCEPT. unittest's // _Outcome.testPartExecutor and traceback.format_exc both call it diff --git a/module/sys/stdlib_module_names.go b/module/sys/stdlib_module_names.go new file mode 100644 index 000000000..b07c7c5f0 --- /dev/null +++ b/module/sys/stdlib_module_names.go @@ -0,0 +1,305 @@ +// Code generated from CPython Python/stdlib_module_names.h. DO NOT EDIT. +// +// CPython: Python/stdlib_module_names.h _Py_stdlib_module_names +package sys + +// stdlibModuleNames is the verbatim list backing sys.stdlib_module_names. +var stdlibModuleNames = []string{ + "__future__", + "_abc", + "_aix_support", + "_android_support", + "_apple_support", + "_ast", + "_ast_unparse", + "_asyncio", + "_bisect", + "_blake2", + "_bz2", + "_codecs", + "_codecs_cn", + "_codecs_hk", + "_codecs_iso2022", + "_codecs_jp", + "_codecs_kr", + "_codecs_tw", + "_collections", + "_collections_abc", + "_colorize", + "_compat_pickle", + "_contextvars", + "_csv", + "_ctypes", + "_curses", + "_curses_panel", + "_datetime", + "_dbm", + "_decimal", + "_elementtree", + "_frozen_importlib", + "_frozen_importlib_external", + "_functools", + "_gdbm", + "_hashlib", + "_heapq", + "_hmac", + "_imp", + "_interpchannels", + "_interpqueues", + "_interpreters", + "_io", + "_ios_support", + "_json", + "_locale", + "_lsprof", + "_lzma", + "_markupbase", + "_md5", + "_multibytecodec", + "_multiprocessing", + "_opcode", + "_opcode_metadata", + "_operator", + "_osx_support", + "_overlapped", + "_pickle", + "_posixshmem", + "_posixsubprocess", + "_py_abc", + "_py_warnings", + "_pydatetime", + "_pydecimal", + "_pyio", + "_pylong", + "_pyrepl", + "_queue", + "_random", + "_remote_debugging", + "_scproxy", + "_sha1", + "_sha2", + "_sha3", + "_signal", + "_sitebuiltins", + "_socket", + "_sqlite3", + "_sre", + "_ssl", + "_stat", + "_statistics", + "_string", + "_strptime", + "_struct", + "_suggestions", + "_symtable", + "_sysconfig", + "_thread", + "_threading_local", + "_tkinter", + "_tokenize", + "_tracemalloc", + "_types", + "_typing", + "_uuid", + "_warnings", + "_weakref", + "_weakrefset", + "_winapi", + "_wmi", + "_zoneinfo", + "_zstd", + "abc", + "annotationlib", + "antigravity", + "argparse", + "array", + "ast", + "asyncio", + "atexit", + "base64", + "bdb", + "binascii", + "bisect", + "builtins", + "bz2", + "cProfile", + "calendar", + "cmath", + "cmd", + "code", + "codecs", + "codeop", + "collections", + "colorsys", + "compileall", + "compression", + "concurrent", + "configparser", + "contextlib", + "contextvars", + "copy", + "copyreg", + "csv", + "ctypes", + "curses", + "dataclasses", + "datetime", + "dbm", + "decimal", + "difflib", + "dis", + "doctest", + "email", + "encodings", + "ensurepip", + "enum", + "errno", + "faulthandler", + "fcntl", + "filecmp", + "fileinput", + "fnmatch", + "fractions", + "ftplib", + "functools", + "gc", + "genericpath", + "getopt", + "getpass", + "gettext", + "glob", + "graphlib", + "grp", + "gzip", + "hashlib", + "heapq", + "hmac", + "html", + "http", + "idlelib", + "imaplib", + "importlib", + "inspect", + "io", + "ipaddress", + "itertools", + "json", + "keyword", + "linecache", + "locale", + "logging", + "lzma", + "mailbox", + "marshal", + "math", + "mimetypes", + "mmap", + "modulefinder", + "msvcrt", + "multiprocessing", + "netrc", + "nt", + "ntpath", + "nturl2path", + "numbers", + "opcode", + "operator", + "optparse", + "os", + "pathlib", + "pdb", + "pickle", + "pickletools", + "pkgutil", + "platform", + "plistlib", + "poplib", + "posix", + "posixpath", + "pprint", + "profile", + "pstats", + "pty", + "pwd", + "py_compile", + "pyclbr", + "pydoc", + "pydoc_data", + "pyexpat", + "queue", + "quopri", + "random", + "re", + "readline", + "reprlib", + "resource", + "rlcompleter", + "runpy", + "sched", + "secrets", + "select", + "selectors", + "shelve", + "shlex", + "shutil", + "signal", + "site", + "smtplib", + "socket", + "socketserver", + "sqlite3", + "sre_compile", + "sre_constants", + "sre_parse", + "ssl", + "stat", + "statistics", + "string", + "stringprep", + "struct", + "subprocess", + "symtable", + "sys", + "sysconfig", + "syslog", + "tabnanny", + "tarfile", + "tempfile", + "termios", + "textwrap", + "this", + "threading", + "time", + "timeit", + "tkinter", + "token", + "tokenize", + "tomllib", + "trace", + "traceback", + "tracemalloc", + "tty", + "turtle", + "turtledemo", + "types", + "typing", + "unicodedata", + "unittest", + "urllib", + "uuid", + "venv", + "warnings", + "wave", + "weakref", + "webbrowser", + "winreg", + "winsound", + "wsgiref", + "xml", + "xmlrpc", + "zipapp", + "zipfile", + "zipimport", + "zlib", + "zoneinfo", +} diff --git a/module/sys/sys.go b/module/sys/sys.go index bc68d2681..196c0e621 100644 --- a/module/sys/sys.go +++ b/module/sys/sys.go @@ -16,9 +16,13 @@ package sys import ( + "fmt" + "runtime" + "sort" "strconv" "github.com/tamnd/gopy/build" + "github.com/tamnd/gopy/imp" "github.com/tamnd/gopy/objects" ) @@ -51,6 +55,18 @@ func Init() (*objects.Dict, error) { if err := setStr(d, "float_repr_style", "short"); err != nil { return nil, err } + // sys.winver is the Windows-only DLL version string (MS_DLL_ID, the + // major.minor "3.14"). site._get_path reads it to build the per-user + // site-packages path under os.name == 'nt', so the bootstrap needs it + // before site runs. CPython sets it only on Windows. + // + // CPython: Python/sysmodule.c:3869 SET_SYS_FROM_STRING("winver", PyWin_DLLVersionString) + if runtime.GOOS == "windows" { + winver := strconv.Itoa(build.PythonMajorVersion) + "." + strconv.Itoa(build.PythonMinorVersion) + if err := setStr(d, "winver", winver); err != nil { + return nil, err + } + } if err := setInt(d, "hexversion", hexVersion()); err != nil { return nil, err @@ -74,7 +90,15 @@ func Init() (*objects.Dict, error) { if err := setItem(d, "builtin_module_names", builtinModuleNames()); err != nil { return nil, err } - if err := setItem(d, "stdlib_module_names", objects.NewTuple(nil)); err != nil { + stdlibNames := make([]objects.Object, len(stdlibModuleNames)) + for i, n := range stdlibModuleNames { + stdlibNames[i] = objects.NewStr(n) + } + stdlibSet, err := objects.NewFrozenset(stdlibNames) + if err != nil { + return nil, err + } + if err := setItem(d, "stdlib_module_names", stdlibSet); err != nil { return nil, err } if err := setItem(d, "hash_info", hashInfo()); err != nil { @@ -120,6 +144,69 @@ func Init() (*objects.Dict, error) { return nil, err } + // Private helper that strips the __dict__ and __weakref__ descriptors + // from a mutable type's dict and refreshes its caches. dataclasses + // calls it in _add_slots before rebuilding the class with __slots__, + // so the original (descriptor-bearing) class can be garbage collected + // (gh-135228). Immutable types are rejected. + // + // CPython: Python/sysmodule.c:2658 sys__clear_type_descriptors_impl + if err := setItem(d, "_clear_type_descriptors", objects.NewBuiltinFunction("_clear_type_descriptors", func(args []objects.Object, _ map[string]objects.Object) (objects.Object, error) { + if len(args) != 1 { + return nil, fmt.Errorf("TypeError: _clear_type_descriptors() takes exactly one argument (%d given)", len(args)) + } + t, ok := args[0].(*objects.Type) + if !ok { + return nil, fmt.Errorf("TypeError: _clear_type_descriptors() argument 1 must be type, not %s", args[0].Type().Name) + } + if t.TpFlags&objects.TpFlagImmutable != 0 { + return nil, fmt.Errorf("TypeError: argument is immutable") + } + objects.DelTypeDescr(t, "__dict__") + objects.DelTypeDescr(t, "__weakref__") + // Fire PyType_Modified unconditionally, matching CPython which + // calls it after the pops even when neither descriptor was present. + t.InvalidateVersionTag() + return objects.None(), nil + })); err != nil { + return nil, err + } + + // Import-system state the runtime exposes at the top level. CPython + // stamps these in PySys_Create / the import bootstrap; runpy and + // pkgutil read them directly. gopy's import is Go-side so the hooks + // list and the importer cache stay empty, but the source loaders do + // write __pycache__/..pyc files, so the default matches + // CPython: bytecode writing is on unless -B / PYTHONDONTWRITEBYTECODE. + // + // CPython: Python/sysmodule.c _PySys_AddObject path_hooks/path_importer_cache + if err := setItem(d, "dont_write_bytecode", objects.NewBool(false)); err != nil { + return nil, err + } + // pycache_prefix controls where the import machinery writes .pyc + // caches; None means alongside the source. cache_from_source reads it. + // + // CPython: Python/sysmodule.c sets sys.pycache_prefix from PyConfig + if err := setItem(d, "pycache_prefix", objects.None()); err != nil { + return nil, err + } + if err := setItem(d, "path_hooks", objects.NewList(nil)); err != nil { + return nil, err + } + // meta_path is the meta-path finder list. CPython seeds it with + // BuiltinImporter, FrozenImporter and PathFinder; gopy resolves those + // three Go-side, so the list starts empty. It still has to exist as a + // real list: import_helper saves and restores it around every test, + // and user code is free to append custom finders. + // + // CPython: Python/pylifecycle.c init_importlib (sys.meta_path) + if err := setItem(d, "meta_path", objects.NewList(nil)); err != nil { + return nil, err + } + if err := setItem(d, "path_importer_cache", objects.NewDict()); err != nil { + return nil, err + } + return d, nil } @@ -193,13 +280,28 @@ func maxsize() int64 { return 1<<31 - 1 } -// versionInfo returns sys.version_info as a five-tuple -// (major, minor, micro, releaselevel, serial). The struct-sequence -// named-tuple lands with 1651-sys-C; v0.7 uses a plain tuple. +// versionInfoType is the struct-sequence type behind sys.version_info: +// a five-field named tuple (major, minor, micro, releaselevel, serial) +// whose type repr reads sys.version_info(major=3, minor=14, ...). It +// subclasses tuple so isinstance(sys.version_info, tuple) holds and the +// values stay index-addressable, while sys.version_info.minor and the +// other named members resolve through the struct-sequence members. +// +// CPython: Python/sysmodule.c:850 version_info_type / make_version_info +var versionInfoType = objects.NewStructSeqType("sys.version_info", []objects.StructSeqField{ + {Name: "major", Doc: "Major release number"}, + {Name: "minor", Doc: "Minor release number"}, + {Name: "micro", Doc: "Patch release number"}, + {Name: "releaselevel", Doc: "'alpha', 'beta', 'candidate', or 'final'"}, + {Name: "serial", Doc: "Serial release number"}, +}) + +// versionInfo returns sys.version_info as the named struct-sequence +// (major, minor, micro, releaselevel, serial). // // CPython: Python/sysmodule.c:3884 make_version_info -func versionInfo() *objects.Tuple { - return objects.NewTuple([]objects.Object{ +func versionInfo() *objects.StructSeq { + return objects.NewStructSeq(versionInfoType, []objects.Object{ objects.NewInt(int64(build.PythonMajorVersion)), objects.NewInt(int64(build.PythonMinorVersion)), objects.NewInt(0), @@ -242,17 +344,29 @@ func implementation() *objects.Namespace { return n } -// builtinModuleNames returns the tuple of module names that are -// compiled into the interpreter. Until 1623 lands the import system -// the list contains just the modules gopy initializes statically -// (builtins, sys). The slice grows as 1651 lands more modules. +// builtinModuleNames returns the sorted tuple of module names compiled +// into the interpreter. CPython builds this directly from +// PyImport_Inittab; gopy statically links every extension module into +// the binary, so the table is the inittab snapshot minus the handful of +// pure-Python modules gopy registers there only as an import shortcut +// (imp.ShadowedByStdlib), keeping this list in lockstep with +// _imp.is_builtin. // // CPython: Python/sysmodule.c:3859 list_builtin_module_names func builtinModuleNames() *objects.Tuple { - return objects.NewTuple([]objects.Object{ - objects.NewStr("builtins"), - objects.NewStr("sys"), - }) + names := make([]string, 0, 64) + for _, e := range imp.InittabSnapshot() { + if imp.ShadowedByStdlib(e.Name) { + continue + } + names = append(names, e.Name) + } + sort.Strings(names) + items := make([]objects.Object, len(names)) + for i, n := range names { + items[i] = objects.NewStr(n) + } + return objects.NewTuple(items) } // hashInfo is sys.hash_info as a SimpleNamespace. The field order diff --git a/module/sys/sys_test.go b/module/sys/sys_test.go index f44fbe1d8..63071436d 100644 --- a/module/sys/sys_test.go +++ b/module/sys/sys_test.go @@ -4,6 +4,7 @@ import ( "strings" "testing" + "github.com/tamnd/gopy/imp" "github.com/tamnd/gopy/objects" ) @@ -42,10 +43,11 @@ func TestInitVersionInfoShape(t *testing.T) { if err != nil { t.Fatalf("GetItem(version_info): %v", err) } - tup, ok := v.(*objects.Tuple) + ss, ok := v.(*objects.StructSeq) if !ok { - t.Fatalf("version_info is %T, want *Tuple", v) + t.Fatalf("version_info is %T, want *StructSeq", v) } + tup := ss.AsTuple() if tup.Len() != 5 { t.Fatalf("version_info has %d items, want 5", tup.Len()) } @@ -136,6 +138,15 @@ func TestInitMaxsizePositive(t *testing.T) { // builtins and sys are static-init, so they are advertised here. // Once 1623 lands the import system, this list grows. func TestInitBuiltinModuleNamesIncludesSys(t *testing.T) { + // builtin_module_names mirrors the live inittab. The full binary + // links the builtins module; register a stub here so the snapshot + // advertises it without importing module/builtins (which would form + // an import cycle through builtins -> module/sys). + if !imp.IsBuiltinName("builtins") { + _ = imp.AppendInittab("builtins", func() (*objects.Module, error) { + return objects.NewModule("builtins"), nil + }) + } d, err := Init() if err != nil { t.Fatalf("Init: %v", err) diff --git a/module/winreg/module.go b/module/winreg/module.go new file mode 100644 index 000000000..048829ff7 --- /dev/null +++ b/module/winreg/module.go @@ -0,0 +1,146 @@ +// Package winreg is the gopy port of CPython's PC/winreg.c. +// CPython only registers winreg on Windows. gopy registers it on every +// platform so importlib._bootstrap_external (which does `import winreg` +// at module top level under `if sys.platform == 'win32'`) imports +// regardless; stdlib consumers gate their use behind a win32 check so the +// module is only actually loaded on Windows. The exposed surface is the +// HKEY_*/KEY_*/REG_* integer constants and the `error` alias that +// _bootstrap_external's WindowsRegistryFinder reads at find_spec time. +// The function surface (OpenKey, QueryValue, EnumKey, CreateKey, the PyHKEY +// type, etc.) is not yet ported: it requires the Windows registry syscalls +// (advapi32), which gopy has no host binding for, and the default meta_path +// never installs WindowsRegistryFinder, so those names are never reached. +// Attribute lookups for the unported names raise AttributeError; that +// surfaces at call time on Windows for the deprecated registry finder only. +// +// CPython: PC/winreg.c:1 winreg module +// CPython: PC/winreg.c:2121 exec_module (constant registration) +package winreg + +import ( + "math/big" + "runtime" + + "github.com/tamnd/gopy/errors" + "github.com/tamnd/gopy/imp" + "github.com/tamnd/gopy/objects" +) + +func init() { + // CPython only builds winreg on Windows (it lives in PC/winreg.c and is + // registered through PC/config.c's _PyImport_Inittab only for the + // Windows build). _bootstrap_external imports it solely under + // `if sys.platform == 'win32'`, so on other platforms the name must stay + // absent: test.support.import_helper.import_module('winreg') then raises + // SkipTest, exactly as it does on CPython/macOS. + if runtime.GOOS != "windows" { + return + } + _ = imp.AppendInittab("winreg", buildModule) +} + +// buildModule constructs the winreg module dict with the constants +// CPython's exec_module registers. The HKEY_* predefined handles are the +// sign-extended 64-bit pointer values PyLong_FromVoidPtr yields on a 64-bit +// build (e.g. HKEY_CLASSES_ROOT == (HKEY)0x80000000 widened to +// 0xFFFFFFFF80000000). The KEY_*/REG_* access and value-type constants are +// the documented Windows SDK (winnt.h, winreg.h) literals inskey/ADD_INT +// register. +// +// CPython: PC/winreg.c:2121 exec_module +func buildModule() (*objects.Module, error) { + m := objects.NewModule("winreg") + d := m.Dict() + + // Predefined HKEY handles. CPython: PC/winreg.c:2125-2132 inskey. + // On a 64-bit build PyLong_FromVoidPtr sign-extends the (HKEY)0x8000000N + // pointer to a 64-bit value that overflows int64, so they are built from + // big.Int. + hkeys := map[string]uint64{ + "HKEY_CLASSES_ROOT": 0xFFFFFFFF80000000, + "HKEY_CURRENT_USER": 0xFFFFFFFF80000001, + "HKEY_LOCAL_MACHINE": 0xFFFFFFFF80000002, + "HKEY_USERS": 0xFFFFFFFF80000003, + "HKEY_PERFORMANCE_DATA": 0xFFFFFFFF80000004, + "HKEY_CURRENT_CONFIG": 0xFFFFFFFF80000005, + "HKEY_DYN_DATA": 0xFFFFFFFF80000006, + } + for name, value := range hkeys { + b := new(big.Int).SetUint64(value) + if err := d.SetItem(objects.NewStr(name), objects.NewIntFromBig(b)); err != nil { + return nil, err + } + } + + // Access-right and value-type constants. CPython: PC/winreg.c:2135-2210 + // ADD_INT. Values are the winnt.h / winreg.h public literals. + consts := map[string]int64{ + // Registry access rights (winnt.h). + "KEY_QUERY_VALUE": 0x0001, + "KEY_SET_VALUE": 0x0002, + "KEY_CREATE_SUB_KEY": 0x0004, + "KEY_ENUMERATE_SUB_KEYS": 0x0008, + "KEY_NOTIFY": 0x0010, + "KEY_CREATE_LINK": 0x0020, + "KEY_WOW64_64KEY": 0x0100, + "KEY_WOW64_32KEY": 0x0200, + "KEY_READ": 0x20019, + "KEY_WRITE": 0x20006, + "KEY_EXECUTE": 0x20019, + "KEY_ALL_ACCESS": 0xF003F, + + // RegCreateKeyEx / RegOpenKeyEx options (winnt.h). + "REG_OPTION_RESERVED": 0x0000, + "REG_OPTION_NON_VOLATILE": 0x0000, + "REG_OPTION_VOLATILE": 0x0001, + "REG_OPTION_CREATE_LINK": 0x0002, + "REG_OPTION_BACKUP_RESTORE": 0x0004, + "REG_OPTION_OPEN_LINK": 0x0008, + "REG_LEGAL_OPTION": 0x000F, + + // RegCreateKeyEx disposition (winnt.h). + "REG_CREATED_NEW_KEY": 0x00000001, + "REG_OPENED_EXISTING_KEY": 0x00000002, + + // RegRestoreKey / RegReplaceKey flags (winnt.h). + "REG_WHOLE_HIVE_VOLATILE": 0x00000001, + "REG_REFRESH_HIVE": 0x00000002, + "REG_NO_LAZY_FLUSH": 0x00000004, + + // RegNotifyChangeKeyValue filter (winnt.h). + "REG_NOTIFY_CHANGE_NAME": 0x00000001, + "REG_NOTIFY_CHANGE_ATTRIBUTES": 0x00000002, + "REG_NOTIFY_CHANGE_LAST_SET": 0x00000004, + "REG_NOTIFY_CHANGE_SECURITY": 0x00000008, + "REG_LEGAL_CHANGE_FILTER": 0x0000000F, + + // Registry value types (winnt.h). + "REG_NONE": 0, + "REG_SZ": 1, + "REG_EXPAND_SZ": 2, + "REG_BINARY": 3, + "REG_DWORD": 4, + "REG_DWORD_LITTLE_ENDIAN": 4, + "REG_DWORD_BIG_ENDIAN": 5, + "REG_LINK": 6, + "REG_MULTI_SZ": 7, + "REG_RESOURCE_LIST": 8, + "REG_FULL_RESOURCE_DESCRIPTOR": 9, + "REG_RESOURCE_REQUIREMENTS_LIST": 10, + "REG_QWORD": 11, + "REG_QWORD_LITTLE_ENDIAN": 11, + } + for name, value := range consts { + if err := d.SetItem(objects.NewStr(name), objects.NewInt(value)); err != nil { + return nil, err + } + } + + // winreg.error is an alias of OSError. CPython: PC/winreg.c:2218 + // (st->PyHKEY_Type aside, the module sets error = PyExc_OSError). + if err := d.SetItem(objects.NewStr("error"), errors.PyExc_OSError); err != nil { + return nil, err + } + + return m, nil +} diff --git a/module/zlib/module.go b/module/zlib/module.go index 201955f3b..91c708067 100644 --- a/module/zlib/module.go +++ b/module/zlib/module.go @@ -151,7 +151,14 @@ func compressObjFlush(args []objects.Object, _ map[string]objects.Object) (objec return nil, fmt.Errorf("ValueError: compressor has already been flushed") } - mode := zSyncFlush + // flush()'s mode argument defaults to Z_FINISH, not Z_SYNC_FLUSH: + // the common `compressobj().compress(x) + flush()` idiom must emit a + // complete stream (final deflate block) so a one-shot decompressor can + // read it back. zipfile relies on this when it stores compressed + // members, and zipimport then decompresses them with raw inflate. + // + // CPython: Modules/zlibmodule.c:478 zlib_Compress_flush_impl (mode=Z_FINISH) + mode := zFinish if len(args) >= 2 { m, err := intFromObj(args[1]) if err != nil { @@ -512,8 +519,11 @@ func zlibCRC32(args []objects.Object, kwargs map[string]objects.Object) (objects } result := crc32.Update(prev, crc32.IEEETable, data) - // CPython returns a signed 32-bit integer widened to Python int. - return objects.NewInt(int64(int32(result))), nil + // CPython returns the checksum as an unsigned 32-bit value widened to + // a Python int (PyLong_FromUnsignedLong(value & 0xffffffffU)). + // + // CPython: Modules/zlibmodule.c:1901 zlib_crc32_impl + return objects.NewInt(int64(uint64(result))), nil } // zlibAdler32 computes the Adler-32 checksum, optionally updating a previous value. @@ -545,7 +555,11 @@ func zlibAdler32(args []objects.Object, kwargs map[string]objects.Object) (objec } result := adler32Update(prev, data) - return objects.NewInt(int64(int32(result))), nil + // CPython returns the checksum as an unsigned 32-bit value widened to + // a Python int (PyLong_FromUnsignedLong(value & 0xffffffffU)). + // + // CPython: Modules/zlibmodule.c:1901 zlib_adler32_impl + return objects.NewInt(int64(uint64(result))), nil } // zlibCompressobj returns a streaming Compress object. @@ -752,6 +766,8 @@ func toBytes(o objects.Object) ([]byte, error) { switch v := o.(type) { case *objects.Bytes: return v.Bytes(), nil + case *objects.ByteArray: + return v.Bytes(), nil case *objects.Unicode: s, err := objects.Str(o) if err != nil { diff --git a/monitor/basecode.go b/monitor/basecode.go new file mode 100644 index 000000000..d8c669e17 --- /dev/null +++ b/monitor/basecode.go @@ -0,0 +1,70 @@ +// Recovering the underlying bytecode from a live code object. The +// interpreter rewrites bytecode in place for both specialization +// (quickening) and instrumentation (the INSTRUMENTED_ markers plus +// the INSTRUMENTED_LINE side table). marshal needs the original, +// un-instrumented, un-specialized bytes so a .pyc never captures the +// transient monitoring state of the process that wrote it. +// +// CPython: Python/instrumentation.c:637 _Py_GetBaseCodeUnit +// CPython: Objects/codeobject.c:2293 deopt_code + +package monitor + +import ( + "github.com/tamnd/gopy/compile" + "github.com/tamnd/gopy/objects" + "github.com/tamnd/gopy/specialize" +) + +// GetBaseCodeUnit returns the underlying opcode at codeunit i, stripping +// both specialization and instrumentation. The oparg byte is returned +// unchanged for the common case; instrumentation markers carry no oparg +// rewrite (ENTER_EXECUTOR, the only opcode that does, is JIT-only and +// never appears in gopy bytecode). +// +// CPython: Python/instrumentation.c:637 _Py_GetBaseCodeUnit +func GetBaseCodeUnit(code *objects.Code, i int) compile.Opcode { + opcode := compile.Opcode(code.Code[2*i]) + + // Below the instrumented range it is purely a specialized opcode: + // deopt to the adaptive parent and we are done. + if !IsInstrumented(opcode) { + return specialize.Deopt(opcode) + } + + data := CoMonitoring(code) + if opcode == compile.INSTRUMENTED_LINE && data != nil && data.Lines != nil { + opcode = compile.Opcode(getOriginalOpcode(data.Lines, i)) + } + if opcode == compile.INSTRUMENTED_INSTRUCTION && data != nil && i < len(data.PerInstructionOpcodes) { + opcode = compile.Opcode(data.PerInstructionOpcodes[i]) + } + if base := deinstrument[opcode]; base != 0 { + return base + } + return specialize.Deopt(opcode) +} + +// BaseCode returns a fresh slice holding code's bytecode with all +// specialization and instrumentation removed and every inline cache +// cell zeroed. This is the byte sequence marshal must write so .pyc +// output is independent of the warming/monitoring state of the +// process. On load specialize.Enable re-quickens the adaptive form. +// +// CPython: Objects/codeobject.c:2293 deopt_code (via _PyCode_GetCode) +func BaseCode(code *objects.Code) []byte { + out := make([]byte, len(code.Code)) + copy(out, code.Code) + size := len(out) / 2 + for i := 0; i < size; i++ { + base := GetBaseCodeUnit(code, i) + out[2*i] = byte(base) + caches := specialize.CacheCount(base) + for j := 1; j <= caches && i+j < size; j++ { + out[2*(i+j)] = 0 + out[2*(i+j)+1] = 0 + } + i += caches + } + return out +} diff --git a/monitor/install.go b/monitor/install.go index 74787f60d..b76fcfaa1 100644 --- a/monitor/install.go +++ b/monitor/install.go @@ -62,8 +62,7 @@ func forceInstrument(code *objects.Code, interp *InterpState) error { } for instr := 0; instr < len(data.Tools); { - op := compile.Opcode(byteAt(code.Code, instr)) - base := DeInstrument(specialize.Deopt(op)) + base := GetBaseCodeUnit(code, instr) if !OpcodeHasEvent(base) { instr += 1 + cacheCount(base, code) continue diff --git a/monitor/line.go b/monitor/line.go index 57994462d..df85b88df 100644 --- a/monitor/line.go +++ b/monitor/line.go @@ -136,8 +136,7 @@ func initializeLines(code *objects.Code, line *LineInstrumentationData) { codeLen := instructionCount(code) currentLine := -1 for i := 0; i < codeLen; { - op := compile.Opcode(byteAt(code.Code, i)) - base := DeInstrument(specialize.Deopt(op)) + base := GetBaseCodeUnit(code, i) line2 := LineForOffset(code, i) setLineDelta(line, i, computeLineDelta(code, line2)) length := 1 + cacheCount(base, code) diff --git a/objects/classmethod_descr.go b/objects/classmethod_descr.go index 8e007c788..af4b9eb37 100644 --- a/objects/classmethod_descr.go +++ b/objects/classmethod_descr.go @@ -105,11 +105,12 @@ func classMethodDescrGet2(descr Object, obj Object, ownerType *Type) (Object, er } Incref(t) bf := &BuiltinFunction{ - Name: d.def.Name, - Conv: MethVarargs | MethKeywords, - Self: t, - ownsSelf: true, - Doc: d.def.Doc, + Name: d.def.Name, + Conv: MethVarargs | MethKeywords, + Self: t, + ownsSelf: true, + methOrigin: d, + Doc: d.def.Doc, Fn: func(args []Object, kwargs map[string]Object) (Object, error) { return cfunctionCall(cf, args, kwargs) }, diff --git a/objects/copyreg_hook.go b/objects/copyreg_hook.go index 3cd45eb0d..8f138b0ab 100644 --- a/objects/copyreg_hook.go +++ b/objects/copyreg_hook.go @@ -38,3 +38,12 @@ var CurrentBuiltinsHook func() Object // // CPython: Python/import.c:1450 PyImport_ImportModule var ImportModuleHook func(name string) (Object, error) + +// ModuleReprHook formats a module's repr by delegating to the vendored +// importlib._bootstrap._module_repr, exactly as CPython's C +// module_repr forwards to _PyImport_ImportlibModuleRepr. Wired by +// vm.init(); nil during early bootstrap, where moduleRepr falls back to +// a minimal Go rendering. +// +// CPython: Python/import.c:3346 _PyImport_ImportlibModuleRepr +var ModuleReprHook func(m Object) (string, error) diff --git a/objects/dict_mutate.go b/objects/dict_mutate.go index 31d8d9cbf..2ae7bf5c7 100644 --- a/objects/dict_mutate.go +++ b/objects/dict_mutate.go @@ -303,6 +303,18 @@ func dictResize(d *Dict, minNew int) error { return err } } + // A resize rebuilds the table, so a key's slot index changes even + // though no key was added or removed. CPython hands the resized dict + // a freshly allocated PyDictKeysObject whose dk_version starts at 0, + // which invalidates every inline cache stamped against the old keys. + // gopy reuses the same *Dict, so reset the version here; otherwise a + // value-replacement that triggers a resize (the load check runs + // before the replace-vs-insert branch in dictInsert) would leave the + // stale dk_version in place and LOAD_ATTR_INSTANCE_VALUE would read + // the wrong slot. + // + // CPython: Objects/dictobject.c:2065 dictresize (new_keys->dk_version = 0) + d.invalidateKeysVersion() return nil } diff --git a/objects/file.go b/objects/file.go index 0ddf1c93d..b2a35cea9 100644 --- a/objects/file.go +++ b/objects/file.go @@ -22,8 +22,29 @@ import ( "fmt" "io" "os" + "runtime" + "unsafe" ) +// ClearOSFileFinalizer disarms the close finalizer that os.NewFile / +// os.OpenFile arm on a borrowed descriptor. The finalizer is set on the +// unexported inner *os.file, not on the returned *os.File, so +// runtime.SetFinalizer(f, nil) on the outer handle is a no-op and leaves the +// close live: a later GC then closes a descriptor whose integer was already +// freed and reused by another open file, surfacing as a spurious EBADF +// ("bad file descriptor") on the unrelated file's next write. os.File is +// struct{ file *file } with the inner pointer at offset 0, so read that +// pointer and clear the finalizer on the object it actually points at. +func ClearOSFileFinalizer(f *os.File) { + if f == nil { + return + } + inner := *(*unsafe.Pointer)(unsafe.Pointer(f)) + if inner != nil { + runtime.SetFinalizer((*byte)(inner), nil) + } +} + // File mirrors the union of FileIO + the buffer + TextIOWrapper. The // read/write side is decided at open time and does not change; mixing // '+' modes wires both rd and wr at construction. wr is io.Writer @@ -51,6 +72,16 @@ type File struct { f *os.File rd *bufio.Reader wr io.Writer + + // noCloseFd marks a borrowed descriptor (the standard streams wrap + // os.Stdout/os.Stderr or an inherited pipe). fileno() still reports + // f's fd, but Close() must not close it: tearing down the sys.stdout + // wrapper, or letting a transient wrapper be collected, must leave the + // process's real fd 1/2 open. Mirrors CPython opening the std streams + // with closefd=False. + // + // CPython: Modules/_io/fileio.c:399 _io_FileIO___init___impl (closefd) + noCloseFd bool } // FileType is the type singleton for File. CPython exposes three or @@ -115,6 +146,21 @@ func NewWriterFile(w io.Writer, name, mode string) *File { errors: "strict", wr: w, } + // A caller-supplied writer that is really an *os.File (the normal CLI + // case, where sys.stdout/stderr wrap os.Stdout/os.Stderr, and the + // subprocess case, where they wrap an inherited pipe fd) keeps a live + // descriptor. Record it so fileno() returns the real fd, matching + // CPython's fd-backed standard streams. Writes still pass straight + // through w (no bufio layer) so output ordering is unchanged; only a + // non-fd writer such as a test bytes.Buffer leaves f nil and makes + // fileno() raise io.UnsupportedOperation, as CPython does for a + // stream with no underlying descriptor. + // + // CPython: Python/sysmodule.c:3795 sys_init_streams (fd-backed FileIO) + if osf, ok := w.(*os.File); ok { + fi.f = osf + fi.noCloseFd = true + } fi.init(FileType) return fi } @@ -277,7 +323,7 @@ func (fi *File) Close() error { firstErr = ioErr(err) } } - if fi.f != nil { + if fi.f != nil && !fi.noCloseFd { if err := fi.f.Close(); err != nil && firstErr == nil { firstErr = ioErr(err) } @@ -397,7 +443,14 @@ func fileGetattr(o Object, name Object) (Object, error) { if fn := fileMethod(fi, n.v); fn != nil { return fn, nil } - return nil, fmt.Errorf("AttributeError: '%s' object has no attribute '%s'", FileType.Name, n.v) + // Anything the custom table does not handle (dunders such as + // __class__, __hash__, __eq__, the rich-compare set) resolves through + // the type the way PyObject_GenericGetAttr walks the MRO. Without this + // fallback, isinstance()/abc.__instancecheck__ probes against a file + // object raise spuriously because __class__ is missing. + // + // CPython: Objects/object.c:1430 _PyObject_GenericGetAttrWithDict + return GenericGetAttr(o, name) } // fileSetattr supports a single mutable attribute today: the mode diff --git a/objects/function.go b/objects/function.go index 1ee2e71c3..3ba671a12 100644 --- a/objects/function.go +++ b/objects/function.go @@ -738,6 +738,16 @@ func funcGetAttr(o Object, name Object) (Object, error) { if fn.Dict != nil { v, err := fn.Dict.GetItem(name) if err == nil && v != nil { + // func_getattro reads the attribute out of the function's + // __dict__ through PyDict_GetItemWithError and then Py_XINCREFs + // it before returning, so the caller owns the reference. Without + // the Incref the caller's arg-drop decrefs a value still held by + // __dict__ toward zero; a stored list then gets emptied by + // list_dealloc, so a second read of the same attribute sees an + // empty list (this is what drained mock's patched.patchings). + // + // CPython: Objects/funcobject.c:705 func_getattro (Py_XINCREF) + Incref(v) return v, nil } } diff --git a/objects/function_builtin.go b/objects/function_builtin.go index 0b0ce57f2..2e411e7bc 100644 --- a/objects/function_builtin.go +++ b/objects/function_builtin.go @@ -73,6 +73,16 @@ type BuiltinFunction struct { // CPython: Objects/descrobject.c:230 method_get (PyCMethod_New) boundDescr *MethodDescr + // methOrigin is the stable descriptor a builtin method was minted from + // when no *MethodDescr drives its call path: classmethod_get binds a + // classmethod_descriptor (PyCMethod_New) into a builtin_function_or_method + // whose m_ml is the descriptor's PyMethodDef, shared across every binding. + // methFuncIdentical / builtinFunctionHash use it as the m_ml proxy so + // int.from_bytes == int.from_bytes even though the bindings are distinct. + // + // CPython: Objects/descrobject.c:95 classmethod_get (a->m_ml == b->m_ml) + methOrigin Object + // kwParams, when non-nil, names every keyword the Argument Clinic // signature accepts. builtinFunctionVectorcall runs the AC // extraneous-keyword scan over the original kwnames objects before @@ -338,6 +348,9 @@ func methFuncIdentical(a, b *BuiltinFunction) bool { if a.boundDescr != nil || b.boundDescr != nil { return a.boundDescr == b.boundDescr } + if a.methOrigin != nil || b.methOrigin != nil { + return a.methOrigin == b.methOrigin + } return a == b } @@ -382,9 +395,12 @@ func builtinFunctionHash(o Object) (int64, error) { } var y int64 var err error - if bf.boundDescr != nil { + switch { + case bf.boundDescr != nil: y, err = identityHash(bf.boundDescr) - } else { + case bf.methOrigin != nil: + y, err = identityHash(bf.methOrigin) + default: y, err = identityHash(bf) } if err != nil { diff --git a/objects/module.go b/objects/module.go index b2323ae70..8869dfd59 100644 --- a/objects/module.go +++ b/objects/module.go @@ -41,6 +41,16 @@ func init() { // // CPython: Objects/moduleobject.c:1416 PyModule_Type (tp_dictoffset set) ModuleType.HasDict = true + // PyModule_Type ships a __dict__ getset in module_getset. A bare module + // answers __dict__ through moduleGetattr, but a ModuleType subclass that + // reaches __dict__ via the generic path (importlib.util._LazyModule does + // `object.__getattribute__(self, '__dict__')`) needs the descriptor in + // the MRO. type_new_descriptors skips installing one on the subclass + // because the dict slot is inherited, so the descriptor must live on + // ModuleType itself. + // + // CPython: Objects/moduleobject.c:728 module_getset (__dict__ getset) + installInstanceDictDescr(ModuleType) // A module also carries md_weaklist (a non-zero tp_weaklistoffset), so // a subclass inherits the weakref slot rather than adding its own. This // keeps a ModuleType subclass layout-compatible with module, which @@ -50,6 +60,15 @@ func init() { ModuleType.HasWeakref = true ModuleType.Repr = moduleRepr ModuleType.Str = moduleRepr + // A module owns its md_dict, so the cycle collector must follow that + // edge: a module whose __dict__ holds functions whose __globals__ is + // that same dict forms a reference cycle (the common case for any + // executed module). Without tp_traverse the collector treats md_dict + // as externally rooted and never reclaims the cycle, so __del__ of a + // cyclic object defined in the module body would never run. + // + // CPython: Objects/moduleobject.c:739 module_traverse + ModuleType.TpTraverse = moduleTraverse // Modules are hashable by identity in CPython (tp_hash = PyObject_GenericHash). // CPython: Objects/moduleobject.c:766 PyModule_Type (tp_hash not overridden → id-based) ModuleType.Hash = IdentityHash @@ -62,6 +81,9 @@ func init() { ModuleType.TpNew = func(cls *Type, args []Object, kwargs map[string]Object) (Object, error) { m := &Module{dict: NewDict()} m.init(cls) + if h := GCTrackHook; h != nil { + h(m) + } return m, nil } @@ -162,6 +184,9 @@ func NewModule(name string) *Module { m := &Module{dict: NewDict()} m.init(ModuleType) _ = m.dict.SetItem(NewStr("__name__"), NewStr(name)) + if h := GCTrackHook; h != nil { + h(m) + } return m } @@ -180,9 +205,27 @@ func NewModuleWithDict(name string, d *Dict) *Module { if has, _ := d.Contains(NewStr("__name__")); !has { _ = d.SetItem(NewStr("__name__"), NewStr(name)) } + if h := GCTrackHook; h != nil { + h(m) + } return m } +// moduleTraverse visits the module's __dict__ (md_dict) and per-module +// state so the cycle collector can account for the references a module +// holds. CPython's module_traverse also visits md_dict. +// +// CPython: Objects/moduleobject.c:739 module_traverse +func moduleTraverse(o Object, visit Visitor) error { + m := o.(*Module) + if m.dict != nil { + if err := visit(m.dict); err != nil { + return err + } + } + return nil +} + // Dict returns the module's attribute dict (__dict__). // // CPython: Objects/moduleobject.c:459 PyModule_GetDict @@ -253,6 +296,16 @@ func (m *Module) State() any { return m.state } // CPython: Objects/moduleobject.c:486 PyModule_SetState (gopy analog) func (m *Module) SetState(s any) { m.state = s } +// ModuleAttrErrorHook, when set, builds the AttributeError raised for a +// module attribute miss. The import system (package imp) installs it so +// the message can surface the stdlib-shadowing and circular-import hints +// from _Py_module_getattro_impl, which depend on sys state the objects +// package cannot reach directly. Nil in unit tests that exercise objects +// in isolation; module.go then falls back to the plain message. +// +// CPython: Objects/moduleobject.c:1024 _Py_module_getattro_impl +var ModuleAttrErrorHook func(m *Module, name string) error + // moduleGetattr implements __getattr__ for module objects. It checks // __dict__ first, then falls back to the PEP 562 __getattr__ callable // stored in __dict__ under "__getattr__". @@ -335,9 +388,16 @@ func moduleGetattr(o Object, name Object) (Object, error) { if gaErr == nil { return callOneArg(gaObj, name) } - // Best-effort error message mirroring module_getattro's tail. + // Best-effort error message mirroring module_getattro's tail. The + // import system registers ModuleAttrErrorHook to surface the + // stdlib-shadowing and circular-import hints (_Py_module_getattro_impl), + // which need sys.path / sys.flags / sys.stdlib_module_names access the + // objects package cannot reach without an import cycle. // - // CPython: Objects/moduleobject.c:1042 PyErr_Format module has no attribute + // CPython: Objects/moduleobject.c:1024 _Py_module_getattro_impl (error tail) + if ModuleAttrErrorHook != nil { + return nil, ModuleAttrErrorHook(m, key) + } if modName := moduleStrAttr(m, "__name__"); modName != "" { return nil, fmt.Errorf("AttributeError: module '%s' has no attribute '%s'", modName, key) } @@ -418,31 +478,42 @@ func moduleSetattr(o Object, name, value Object) error { return m.dict.SetItem(name, value) } -// moduleRepr returns the canonical module repr. -// Four forms mirror CPython: -// - when __file__ is set -// - when __spec__.origin == 'built-in' -// - when __spec__.origin == 'frozen' -// - otherwise +// moduleRepr returns a module's repr by forwarding to the vendored +// importlib._bootstrap._module_repr, exactly as CPython's C module_repr +// delegates through _PyImport_ImportlibModuleRepr. The Python +// implementation handles the __spec__, __loader__ and __file__ variants +// (including namespace packages and the '?' name fallback) so the +// rendering matches CPython byte-for-byte. +// +// During early bootstrap (before vm.init wires the hook) the importlib +// machinery is not yet usable, so fall back to a minimal Go rendering +// that mirrors the catch-all branch of _module_repr. // -// CPython: Objects/moduleobject.c:228 module_repr +// CPython: Objects/moduleobject.c:848 module_repr +// CPython: Python/import.c:3346 _PyImport_ImportlibModuleRepr func moduleRepr(o Object) (string, error) { + if ModuleReprHook != nil { + return ModuleReprHook(o) + } + return ModuleReprFallback(o) +} + +// ModuleReprFallback renders the catch-all branch of +// importlib._bootstrap._module_repr without importing anything, for use +// before the import machinery is available. +// +// CPython: Lib/importlib/_bootstrap.py:544 _module_repr +func ModuleReprFallback(o Object) (string, error) { m := o.(*Module) - name := moduleStrAttr(m, "__name__") + name := "?" + if n, err := m.dict.GetItem(NewStr("__name__")); err == nil && n != nil { + if s, ok := n.(*Unicode); ok { + name = s.v + } + } if file := moduleStrAttr(m, "__file__"); file != "" { return fmt.Sprintf("", name, file), nil } - if spec, err := m.dict.GetItem(NewStr("__spec__")); err == nil && spec != nil { - if sm, ok := spec.(*Module); ok { - origin := moduleStrAttr(sm, "origin") - if origin == "built-in" { - return fmt.Sprintf("", name), nil - } - if origin == "frozen" { - return fmt.Sprintf("", name), nil - } - } - } return fmt.Sprintf("", name), nil } diff --git a/objects/object.go b/objects/object.go index 7e75b664f..f62d61c34 100644 --- a/objects/object.go +++ b/objects/object.go @@ -38,6 +38,17 @@ func init() { objectType.Repr = objectRepr objectType.Str = objectStr objectType.Hash = identityHash + // tp_getattro / tp_setattro. PyBaseObject_Type wires both to the + // generic implementations, so every type that does not override them + // inherits GenericGetAttr / GenericSetAttr. A plain object() therefore + // raises AttributeError ("'object' object has no attribute %r and no + // __dict__ for setting new attributes") on attribute assignment, not a + // bare TypeError. + // + // CPython: Objects/typeobject.c:7970 PyBaseObject_Type (tp_getattro = + // PyObject_GenericGetAttr, tp_setattro = PyObject_GenericSetAttr) + objectType.Getattro = GenericGetAttr + objectType.Setattro = GenericSetAttr // object_methods table. // @@ -986,6 +997,16 @@ func objectGetWeakref(o Object) (Object, error) { func objectGetDict(o Object) (Object, error) { switch v := o.(type) { + case *Module: + // A module always carries md_dict, even a user subclass of + // ModuleType that never sets tp_dictoffset (HasDict false). The + // generic object.__getattribute__ path reaches here for + // `object.__getattribute__(mod, '__dict__')` (importlib's + // _LazyModule does exactly this), so return md_dict directly + // rather than gating on HasDict like the AttrDictHolder arm below. + // + // CPython: Objects/moduleobject.c module_dict getset (md_dict) + return v.Dict(), nil case *Instance: if v.dict == nil { if !v.Type().HasDict { @@ -1005,6 +1026,18 @@ func objectGetDict(o Object) (Object, error) { // managed dict over the inline values, leaving them to be detached // in _PyObject_FreeInstanceAttributes at dealloc. v.dictExposed = true + // Handing the dict to Python code drops the inline-values fast + // path: code can now store straight into the mapping (e.g. + // vars(self).update(...)) without routing through instanceSetAttr, + // so gopy can no longer keep the type's cached keys in sync. CPython + // materializes a combined dict here and clears values->valid, which + // deopts the LOAD_ATTR_*_WITH_VALUES arms; mirror that by flipping + // inlineValid so a class attribute can no longer be served from the + // cache while a direct instance store shadows it. + // + // CPython: Objects/dictobject.c:6857 make_dict_from_instance_attributes + // (PyDictValues stops being valid once the dict is built) + v.inlineValid = false return v.dict, nil case *Int: // The builtin int type has no tp_dictoffset, so (42).__dict__ diff --git a/objects/type_specialize.go b/objects/type_specialize.go index 65f76d73e..789a96a88 100644 --- a/objects/type_specialize.go +++ b/objects/type_specialize.go @@ -6,11 +6,24 @@ package objects // 32-bit version on first call. Returns 0 when the global counter // has wrapped (the specializer treats this as "give up"). // -// CPython: Python/typeobject.c:L312 _PyType_AssignVersionTag +// To respect the invariant that a type carries a valid version tag +// only when every one of its bases does, the tag is first assigned to +// all super classes. If any base cannot be assigned one (counter +// wrapped), this type gives up too. The invariant is what lets +// InvalidateVersionTag early-return on a zero tag: a base with tag 0 +// can have no subclass holding a live tag, so there is nothing cached +// to clear. +// +// CPython: Objects/typeobject.c:1344 assign_version_tag func (t *Type) VersionTag() uint32 { if t.versionTag != 0 { return t.versionTag } + for _, b := range t.Bases { + if b != nil && b.VersionTag() == 0 { + return 0 + } + } v := allocTypeVersionTag() if v == 0 { return 0 diff --git a/stdlib/__hello__.py b/stdlib/__hello__.py new file mode 100644 index 000000000..c09d6a4f5 --- /dev/null +++ b/stdlib/__hello__.py @@ -0,0 +1,16 @@ +initialized = True + +class TestFrozenUtf8_1: + """\u00b6""" + +class TestFrozenUtf8_2: + """\u03c0""" + +class TestFrozenUtf8_4: + """\U0001f600""" + +def main(): + print("Hello world!") + +if __name__ == '__main__': + main() diff --git a/stdlib/__phello__/__init__.py b/stdlib/__phello__/__init__.py new file mode 100644 index 000000000..d37bd2766 --- /dev/null +++ b/stdlib/__phello__/__init__.py @@ -0,0 +1,7 @@ +initialized = True + +def main(): + print("Hello world!") + +if __name__ == '__main__': + main() diff --git a/stdlib/__phello__/ham/__init__.py b/stdlib/__phello__/ham/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/stdlib/__phello__/ham/eggs.py b/stdlib/__phello__/ham/eggs.py new file mode 100644 index 000000000..e69de29bb diff --git a/stdlib/__phello__/spam.py b/stdlib/__phello__/spam.py new file mode 100644 index 000000000..d37bd2766 --- /dev/null +++ b/stdlib/__phello__/spam.py @@ -0,0 +1,7 @@ +initialized = True + +def main(): + print("Hello world!") + +if __name__ == '__main__': + main() diff --git a/stdlib/_pyio.py b/stdlib/_pyio.py new file mode 100644 index 000000000..116ce4f37 --- /dev/null +++ b/stdlib/_pyio.py @@ -0,0 +1,2754 @@ +""" +Python implementation of the io module. +""" + +import os +import abc +import codecs +import errno +import stat +import sys +# Import _thread instead of threading to reduce startup cost +from _thread import allocate_lock as Lock +if sys.platform in {'win32', 'cygwin'}: + from msvcrt import setmode as _setmode +else: + _setmode = None + +import io +from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END, Reader, Writer) # noqa: F401 + +valid_seek_flags = {0, 1, 2} # Hardwired values +if hasattr(os, 'SEEK_HOLE') : + valid_seek_flags.add(os.SEEK_HOLE) + valid_seek_flags.add(os.SEEK_DATA) + +# open() uses max(min(blocksize, 8 MiB), DEFAULT_BUFFER_SIZE) +# when the device block size is available. +DEFAULT_BUFFER_SIZE = 128 * 1024 # bytes + +# NOTE: Base classes defined here are registered with the "official" ABCs +# defined in io.py. We don't use real inheritance though, because we don't want +# to inherit the C implementations. + +# Rebind for compatibility +BlockingIOError = BlockingIOError + +# Does open() check its 'errors' argument? +_CHECK_ERRORS = (hasattr(sys, "gettotalrefcount") or sys.flags.dev_mode) + + +def text_encoding(encoding, stacklevel=2): + """ + A helper function to choose the text encoding. + + When encoding is not None, this function returns it. + Otherwise, this function returns the default text encoding + (i.e. "locale" or "utf-8" depends on UTF-8 mode). + + This function emits an EncodingWarning if *encoding* is None and + sys.flags.warn_default_encoding is true. + + This can be used in APIs with an encoding=None parameter + that pass it to TextIOWrapper or open. + However, please consider using encoding="utf-8" for new APIs. + """ + if encoding is None: + if sys.flags.utf8_mode: + encoding = "utf-8" + else: + encoding = "locale" + if sys.flags.warn_default_encoding: + import warnings + warnings.warn("'encoding' argument not specified.", + EncodingWarning, stacklevel + 1) + return encoding + + +# Wrapper for builtins.open +# +# Trick so that open() won't become a bound method when stored +# as a class variable (as dbm.dumb does). +# +# See init_set_builtins_open() in Python/pylifecycle.c. +@staticmethod +def open(file, mode="r", buffering=-1, encoding=None, errors=None, + newline=None, closefd=True, opener=None): + + r"""Open file and return a stream. Raise OSError upon failure. + + file is either a text or byte string giving the name (and the path + if the file isn't in the current working directory) of the file to + be opened or an integer file descriptor of the file to be + wrapped. (If a file descriptor is given, it is closed when the + returned I/O object is closed, unless closefd is set to False.) + + mode is an optional string that specifies the mode in which the file is + opened. It defaults to 'r' which means open for reading in text mode. Other + common values are 'w' for writing (truncating the file if it already + exists), 'x' for exclusive creation of a new file, and 'a' for appending + (which on some Unix systems, means that all writes append to the end of the + file regardless of the current seek position). In text mode, if encoding is + not specified the encoding used is platform dependent. (For reading and + writing raw bytes use binary mode and leave encoding unspecified.) The + available modes are: + + ========= =============================================================== + Character Meaning + --------- --------------------------------------------------------------- + 'r' open for reading (default) + 'w' open for writing, truncating the file first + 'x' create a new file and open it for writing + 'a' open for writing, appending to the end of the file if it exists + 'b' binary mode + 't' text mode (default) + '+' open a disk file for updating (reading and writing) + ========= =============================================================== + + The default mode is 'rt' (open for reading text). For binary random + access, the mode 'w+b' opens and truncates the file to 0 bytes, while + 'r+b' opens the file without truncation. The 'x' mode implies 'w' and + raises an `FileExistsError` if the file already exists. + + Python distinguishes between files opened in binary and text modes, + even when the underlying operating system doesn't. Files opened in + binary mode (appending 'b' to the mode argument) return contents as + bytes objects without any decoding. In text mode (the default, or when + 't' is appended to the mode argument), the contents of the file are + returned as strings, the bytes having been first decoded using a + platform-dependent encoding or using the specified encoding if given. + + buffering is an optional integer used to set the buffering policy. + Pass 0 to switch buffering off (only allowed in binary mode), 1 to select + line buffering (only usable in text mode), and an integer > 1 to indicate + the size of a fixed-size chunk buffer. When no buffering argument is + given, the default buffering policy works as follows: + + * Binary files are buffered in fixed-size chunks; the size of the buffer + is max(min(blocksize, 8 MiB), DEFAULT_BUFFER_SIZE) + when the device block size is available. + On most systems, the buffer will typically be 128 kilobytes long. + + * "Interactive" text files (files for which isatty() returns True) + use line buffering. Other text files use the policy described above + for binary files. + + encoding is the str name of the encoding used to decode or encode the + file. This should only be used in text mode. The default encoding is + platform dependent, but any encoding supported by Python can be + passed. See the codecs module for the list of supported encodings. + + errors is an optional string that specifies how encoding errors are to + be handled---this argument should not be used in binary mode. Pass + 'strict' to raise a ValueError exception if there is an encoding error + (the default of None has the same effect), or pass 'ignore' to ignore + errors. (Note that ignoring encoding errors can lead to data loss.) + See the documentation for codecs.register for a list of the permitted + encoding error strings. + + newline is a string controlling how universal newlines works (it only + applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works + as follows: + + * On input, if newline is None, universal newlines mode is + enabled. Lines in the input can end in '\n', '\r', or '\r\n', and + these are translated into '\n' before being returned to the + caller. If it is '', universal newline mode is enabled, but line + endings are returned to the caller untranslated. If it has any of + the other legal values, input lines are only terminated by the given + string, and the line ending is returned to the caller untranslated. + + * On output, if newline is None, any '\n' characters written are + translated to the system default line separator, os.linesep. If + newline is '', no translation takes place. If newline is any of the + other legal values, any '\n' characters written are translated to + the given string. + + closedfd is a bool. If closefd is False, the underlying file descriptor will + be kept open when the file is closed. This does not work when a file name is + given and must be True in that case. + + The newly created file is non-inheritable. + + A custom opener can be used by passing a callable as *opener*. The + underlying file descriptor for the file object is then obtained by calling + *opener* with (*file*, *flags*). *opener* must return an open file + descriptor (passing os.open as *opener* results in functionality similar to + passing None). + + open() returns a file object whose type depends on the mode, and + through which the standard file operations such as reading and writing + are performed. When open() is used to open a file in a text mode ('w', + 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open + a file in a binary mode, the returned class varies: in read binary + mode, it returns a BufferedReader; in write binary and append binary + modes, it returns a BufferedWriter, and in read/write mode, it returns + a BufferedRandom. + + It is also possible to use a string or bytearray as a file for both + reading and writing. For strings StringIO can be used like a file + opened in a text mode, and for bytes a BytesIO can be used like a file + opened in a binary mode. + """ + if not isinstance(file, int): + file = os.fspath(file) + if not isinstance(file, (str, bytes, int)): + raise TypeError("invalid file: %r" % file) + if not isinstance(mode, str): + raise TypeError("invalid mode: %r" % mode) + if not isinstance(buffering, int): + raise TypeError("invalid buffering: %r" % buffering) + if encoding is not None and not isinstance(encoding, str): + raise TypeError("invalid encoding: %r" % encoding) + if errors is not None and not isinstance(errors, str): + raise TypeError("invalid errors: %r" % errors) + modes = set(mode) + if modes - set("axrwb+t") or len(mode) > len(modes): + raise ValueError("invalid mode: %r" % mode) + creating = "x" in modes + reading = "r" in modes + writing = "w" in modes + appending = "a" in modes + updating = "+" in modes + text = "t" in modes + binary = "b" in modes + if text and binary: + raise ValueError("can't have text and binary mode at once") + if creating + reading + writing + appending > 1: + raise ValueError("can't have read/write/append mode at once") + if not (creating or reading or writing or appending): + raise ValueError("must have exactly one of read/write/append mode") + if binary and encoding is not None: + raise ValueError("binary mode doesn't take an encoding argument") + if binary and errors is not None: + raise ValueError("binary mode doesn't take an errors argument") + if binary and newline is not None: + raise ValueError("binary mode doesn't take a newline argument") + if binary and buffering == 1: + import warnings + warnings.warn("line buffering (buffering=1) isn't supported in binary " + "mode, the default buffer size will be used", + RuntimeWarning, 2) + raw = FileIO(file, + (creating and "x" or "") + + (reading and "r" or "") + + (writing and "w" or "") + + (appending and "a" or "") + + (updating and "+" or ""), + closefd, opener=opener) + result = raw + try: + line_buffering = False + if buffering == 1 or buffering < 0 and raw._isatty_open_only(): + buffering = -1 + line_buffering = True + if buffering < 0: + buffering = max(min(raw._blksize, 8192 * 1024), DEFAULT_BUFFER_SIZE) + if buffering < 0: + raise ValueError("invalid buffering size") + if buffering == 0: + if binary: + return result + raise ValueError("can't have unbuffered text I/O") + if updating: + buffer = BufferedRandom(raw, buffering) + elif creating or writing or appending: + buffer = BufferedWriter(raw, buffering) + elif reading: + buffer = BufferedReader(raw, buffering) + else: + raise ValueError("unknown mode: %r" % mode) + result = buffer + if binary: + return result + encoding = text_encoding(encoding) + text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering) + result = text + text.mode = mode + return result + except: + result.close() + raise + +# Define a default pure-Python implementation for open_code() +# that does not allow hooks. Warn on first use. Defined for tests. +def _open_code_with_warning(path): + """Opens the provided file with mode ``'rb'``. This function + should be used when the intent is to treat the contents as + executable code. + + ``path`` should be an absolute path. + + When supported by the runtime, this function can be hooked + in order to allow embedders more control over code files. + This functionality is not supported on the current runtime. + """ + import warnings + warnings.warn("_pyio.open_code() may not be using hooks", + RuntimeWarning, 2) + return open(path, "rb") + +try: + open_code = io.open_code +except AttributeError: + open_code = _open_code_with_warning + + +# In normal operation, both `UnsupportedOperation`s should be bound to the +# same object. +try: + UnsupportedOperation = io.UnsupportedOperation +except AttributeError: + class UnsupportedOperation(OSError, ValueError): + pass + + +class IOBase(metaclass=abc.ABCMeta): + + """The abstract base class for all I/O classes. + + This class provides dummy implementations for many methods that + derived classes can override selectively; the default implementations + represent a file that cannot be read, written or seeked. + + Even though IOBase does not declare read or write because + their signatures will vary, implementations and clients should + consider those methods part of the interface. Also, implementations + may raise UnsupportedOperation when operations they do not support are + called. + + The basic type used for binary data read from or written to a file is + bytes. Other bytes-like objects are accepted as method arguments too. + Text I/O classes work with str data. + + Note that calling any method (even inquiries) on a closed stream is + undefined. Implementations may raise OSError in this case. + + IOBase (and its subclasses) support the iterator protocol, meaning + that an IOBase object can be iterated over yielding the lines in a + stream. + + IOBase also supports the :keyword:`with` statement. In this example, + fp is closed after the suite of the with statement is complete: + + with open('spam.txt', 'r') as fp: + fp.write('Spam and eggs!') + """ + + ### Internal ### + + def _unsupported(self, name): + """Internal: raise an OSError exception for unsupported operations.""" + raise UnsupportedOperation("%s.%s() not supported" % + (self.__class__.__name__, name)) + + ### Positioning ### + + def seek(self, pos, whence=0): + """Change stream position. + + Change the stream position to byte offset pos. Argument pos is + interpreted relative to the position indicated by whence. Values + for whence are ints: + + * 0 -- start of stream (the default); offset should be zero or positive + * 1 -- current stream position; offset may be negative + * 2 -- end of stream; offset is usually negative + Some operating systems / file systems could provide additional values. + + Return an int indicating the new absolute position. + """ + self._unsupported("seek") + + def tell(self): + """Return an int indicating the current stream position.""" + return self.seek(0, 1) + + def truncate(self, pos=None): + """Truncate file to size bytes. + + Size defaults to the current IO position as reported by tell(). Return + the new size. + """ + self._unsupported("truncate") + + ### Flush and close ### + + def flush(self): + """Flush write buffers, if applicable. + + This is not implemented for read-only and non-blocking streams. + """ + self._checkClosed() + # XXX Should this return the number of bytes written??? + + __closed = False + + def close(self): + """Flush and close the IO object. + + This method has no effect if the file is already closed. + """ + if not self.__closed: + try: + self.flush() + finally: + self.__closed = True + + def __del__(self): + """Destructor. Calls close().""" + try: + closed = self.closed + except AttributeError: + # If getting closed fails, then the object is probably + # in an unusable state, so ignore. + return + + if closed: + return + + if dealloc_warn := getattr(self, "_dealloc_warn", None): + dealloc_warn(self) + + # If close() fails, the caller logs the exception with + # sys.unraisablehook. close() must be called at the end at __del__(). + self.close() + + ### Inquiries ### + + def seekable(self): + """Return a bool indicating whether object supports random access. + + If False, seek(), tell() and truncate() will raise OSError. + This method may need to do a test seek(). + """ + return False + + def _checkSeekable(self, msg=None): + """Internal: raise UnsupportedOperation if file is not seekable + """ + if not self.seekable(): + raise UnsupportedOperation("File or stream is not seekable." + if msg is None else msg) + + def readable(self): + """Return a bool indicating whether object was opened for reading. + + If False, read() will raise OSError. + """ + return False + + def _checkReadable(self, msg=None): + """Internal: raise UnsupportedOperation if file is not readable + """ + if not self.readable(): + raise UnsupportedOperation("File or stream is not readable." + if msg is None else msg) + + def writable(self): + """Return a bool indicating whether object was opened for writing. + + If False, write() and truncate() will raise OSError. + """ + return False + + def _checkWritable(self, msg=None): + """Internal: raise UnsupportedOperation if file is not writable + """ + if not self.writable(): + raise UnsupportedOperation("File or stream is not writable." + if msg is None else msg) + + @property + def closed(self): + """closed: bool. True iff the file has been closed. + + For backwards compatibility, this is a property, not a predicate. + """ + return self.__closed + + def _checkClosed(self, msg=None): + """Internal: raise a ValueError if file is closed + """ + if self.closed: + raise ValueError("I/O operation on closed file." + if msg is None else msg) + + ### Context manager ### + + def __enter__(self): # That's a forward reference + """Context management protocol. Returns self (an instance of IOBase).""" + self._checkClosed() + return self + + def __exit__(self, *args): + """Context management protocol. Calls close()""" + self.close() + + ### Lower-level APIs ### + + # XXX Should these be present even if unimplemented? + + def fileno(self): + """Returns underlying file descriptor (an int) if one exists. + + An OSError is raised if the IO object does not use a file descriptor. + """ + self._unsupported("fileno") + + def isatty(self): + """Return a bool indicating whether this is an 'interactive' stream. + + Return False if it can't be determined. + """ + self._checkClosed() + return False + + ### Readline[s] and writelines ### + + def readline(self, size=-1): + r"""Read and return a line of bytes from the stream. + + If size is specified, at most size bytes will be read. + Size should be an int. + + The line terminator is always b'\n' for binary files; for text + files, the newlines argument to open can be used to select the line + terminator(s) recognized. + """ + # For backwards compatibility, a (slowish) readline(). + if hasattr(self, "peek"): + def nreadahead(): + readahead = self.peek(1) + if not readahead: + return 1 + n = (readahead.find(b"\n") + 1) or len(readahead) + if size >= 0: + n = min(n, size) + return n + else: + def nreadahead(): + return 1 + if size is None: + size = -1 + else: + try: + size_index = size.__index__ + except AttributeError: + raise TypeError(f"{size!r} is not an integer") + else: + size = size_index() + res = bytearray() + while size < 0 or len(res) < size: + b = self.read(nreadahead()) + if not b: + break + res += b + if res.endswith(b"\n"): + break + return bytes(res) + + def __iter__(self): + self._checkClosed() + return self + + def __next__(self): + line = self.readline() + if not line: + raise StopIteration + return line + + def readlines(self, hint=None): + """Return a list of lines from the stream. + + hint can be specified to control the number of lines read: no more + lines will be read if the total size (in bytes/characters) of all + lines so far exceeds hint. + """ + if hint is None or hint <= 0: + return list(self) + n = 0 + lines = [] + for line in self: + lines.append(line) + n += len(line) + if n >= hint: + break + return lines + + def writelines(self, lines): + """Write a list of lines to the stream. + + Line separators are not added, so it is usual for each of the lines + provided to have a line separator at the end. + """ + self._checkClosed() + for line in lines: + self.write(line) + +io.IOBase.register(IOBase) + + +class RawIOBase(IOBase): + + """Base class for raw binary I/O.""" + + # The read() method is implemented by calling readinto(); derived + # classes that want to support read() only need to implement + # readinto() as a primitive operation. In general, readinto() can be + # more efficient than read(). + + # (It would be tempting to also provide an implementation of + # readinto() in terms of read(), in case the latter is a more suitable + # primitive operation, but that would lead to nasty recursion in case + # a subclass doesn't implement either.) + + def read(self, size=-1): + """Read and return up to size bytes, where size is an int. + + Returns an empty bytes object on EOF, or None if the object is + set not to block and has no data to read. + """ + if size is None: + size = -1 + if size < 0: + return self.readall() + b = bytearray(size.__index__()) + n = self.readinto(b) + if n is None: + return None + if n < 0 or n > len(b): + raise ValueError(f"readinto returned {n} outside buffer size {len(b)}") + del b[n:] + return bytes(b) + + def readall(self): + """Read until EOF, using multiple read() call.""" + res = bytearray() + while data := self.read(DEFAULT_BUFFER_SIZE): + res += data + if res: + return bytes(res) + else: + # b'' or None + return data + + def readinto(self, b): + """Read bytes into a pre-allocated bytes-like object b. + + Returns an int representing the number of bytes read (0 for EOF), or + None if the object is set not to block and has no data to read. + """ + self._unsupported("readinto") + + def write(self, b): + """Write the given buffer to the IO stream. + + Returns the number of bytes written, which may be less than the + length of b in bytes. + """ + self._unsupported("write") + +io.RawIOBase.register(RawIOBase) + + +class BufferedIOBase(IOBase): + + """Base class for buffered IO objects. + + The main difference with RawIOBase is that the read() method + supports omitting the size argument, and does not have a default + implementation that defers to readinto(). + + In addition, read(), readinto() and write() may raise + BlockingIOError if the underlying raw stream is in non-blocking + mode and not ready; unlike their raw counterparts, they will never + return None. + + A typical implementation should not inherit from a RawIOBase + implementation, but wrap one. + """ + + def read(self, size=-1): + """Read and return up to size bytes, where size is an int. + + If the argument is omitted, None, or negative, reads and + returns all data until EOF. + + If the argument is positive, and the underlying raw stream is + not 'interactive', multiple raw reads may be issued to satisfy + the byte count (unless EOF is reached first). But for + interactive raw streams (XXX and for pipes?), at most one raw + read will be issued, and a short result does not imply that + EOF is imminent. + + Returns an empty bytes array on EOF. + + Raises BlockingIOError if the underlying raw stream has no + data at the moment. + """ + self._unsupported("read") + + def read1(self, size=-1): + """Read up to size bytes with at most one read() system call, + where size is an int. + """ + self._unsupported("read1") + + def readinto(self, b): + """Read bytes into a pre-allocated bytes-like object b. + + Like read(), this may issue multiple reads to the underlying raw + stream, unless the latter is 'interactive'. + + Returns an int representing the number of bytes read (0 for EOF). + + Raises BlockingIOError if the underlying raw stream has no + data at the moment. + """ + + return self._readinto(b, read1=False) + + def readinto1(self, b): + """Read bytes into buffer *b*, using at most one system call + + Returns an int representing the number of bytes read (0 for EOF). + + Raises BlockingIOError if the underlying raw stream has no + data at the moment. + """ + + return self._readinto(b, read1=True) + + def _readinto(self, b, read1): + if not isinstance(b, memoryview): + b = memoryview(b) + b = b.cast('B') + + if read1: + data = self.read1(len(b)) + else: + data = self.read(len(b)) + n = len(data) + + b[:n] = data + + return n + + def write(self, b): + """Write the given bytes buffer to the IO stream. + + Return the number of bytes written, which is always the length of b + in bytes. + + Raises BlockingIOError if the buffer is full and the + underlying raw stream cannot accept more data at the moment. + """ + self._unsupported("write") + + def detach(self): + """ + Separate the underlying raw stream from the buffer and return it. + + After the raw stream has been detached, the buffer is in an unusable + state. + """ + self._unsupported("detach") + +io.BufferedIOBase.register(BufferedIOBase) + + +class _BufferedIOMixin(BufferedIOBase): + + """A mixin implementation of BufferedIOBase with an underlying raw stream. + + This passes most requests on to the underlying raw stream. It + does *not* provide implementations of read(), readinto() or + write(). + """ + + def __init__(self, raw): + self._raw = raw + + ### Positioning ### + + def seek(self, pos, whence=0): + new_position = self.raw.seek(pos, whence) + if new_position < 0: + raise OSError("seek() returned an invalid position") + return new_position + + def tell(self): + pos = self.raw.tell() + if pos < 0: + raise OSError("tell() returned an invalid position") + return pos + + def truncate(self, pos=None): + self._checkClosed() + self._checkWritable() + + # Flush the stream. We're mixing buffered I/O with lower-level I/O, + # and a flush may be necessary to synch both views of the current + # file state. + self.flush() + + if pos is None: + pos = self.tell() + # XXX: Should seek() be used, instead of passing the position + # XXX directly to truncate? + return self.raw.truncate(pos) + + ### Flush and close ### + + def flush(self): + if self.closed: + raise ValueError("flush on closed file") + self.raw.flush() + + def close(self): + if self.raw is not None and not self.closed: + try: + # may raise BlockingIOError or BrokenPipeError etc + self.flush() + finally: + self.raw.close() + + def detach(self): + if self.raw is None: + raise ValueError("raw stream already detached") + self.flush() + raw = self._raw + self._raw = None + return raw + + ### Inquiries ### + + def seekable(self): + return self.raw.seekable() + + @property + def raw(self): + return self._raw + + @property + def closed(self): + return self.raw.closed + + @property + def name(self): + return self.raw.name + + @property + def mode(self): + return self.raw.mode + + def __getstate__(self): + raise TypeError(f"cannot pickle {self.__class__.__name__!r} object") + + def __repr__(self): + modname = self.__class__.__module__ + clsname = self.__class__.__qualname__ + try: + name = self.name + except AttributeError: + return "<{}.{}>".format(modname, clsname) + else: + return "<{}.{} name={!r}>".format(modname, clsname, name) + + def _dealloc_warn(self, source): + if dealloc_warn := getattr(self.raw, "_dealloc_warn", None): + dealloc_warn(source) + + ### Lower-level APIs ### + + def fileno(self): + return self.raw.fileno() + + def isatty(self): + return self.raw.isatty() + + +class BytesIO(BufferedIOBase): + + """Buffered I/O implementation using an in-memory bytes buffer.""" + + # Initialize _buffer as soon as possible since it's used by __del__() + # which calls close() + _buffer = None + + def __init__(self, initial_bytes=None): + buf = bytearray() + if initial_bytes is not None: + buf += initial_bytes + self._buffer = buf + self._pos = 0 + + def __getstate__(self): + if self.closed: + raise ValueError("__getstate__ on closed file") + return self.__dict__.copy() + + def getvalue(self): + """Return the bytes value (contents) of the buffer + """ + if self.closed: + raise ValueError("getvalue on closed file") + return bytes(self._buffer) + + def getbuffer(self): + """Return a readable and writable view of the buffer. + """ + if self.closed: + raise ValueError("getbuffer on closed file") + return memoryview(self._buffer) + + def close(self): + if self._buffer is not None: + self._buffer.clear() + super().close() + + def read(self, size=-1): + if self.closed: + raise ValueError("read from closed file") + if size is None: + size = -1 + else: + try: + size_index = size.__index__ + except AttributeError: + raise TypeError(f"{size!r} is not an integer") + else: + size = size_index() + if size < 0: + size = len(self._buffer) + if len(self._buffer) <= self._pos: + return b"" + newpos = min(len(self._buffer), self._pos + size) + b = self._buffer[self._pos : newpos] + self._pos = newpos + return bytes(b) + + def read1(self, size=-1): + """This is the same as read. + """ + return self.read(size) + + def write(self, b): + if isinstance(b, str): + raise TypeError("can't write str to binary stream") + with memoryview(b) as view: + if self.closed: + raise ValueError("write to closed file") + + n = view.nbytes # Size of any bytes-like object + if n == 0: + return 0 + + pos = self._pos + if pos > len(self._buffer): + # Pad buffer to pos with null bytes. + self._buffer.resize(pos) + self._buffer[pos:pos + n] = view + self._pos += n + return n + + def seek(self, pos, whence=0): + if self.closed: + raise ValueError("seek on closed file") + try: + pos_index = pos.__index__ + except AttributeError: + raise TypeError(f"{pos!r} is not an integer") + else: + pos = pos_index() + if whence == 0: + if pos < 0: + raise ValueError("negative seek position %r" % (pos,)) + self._pos = pos + elif whence == 1: + self._pos = max(0, self._pos + pos) + elif whence == 2: + self._pos = max(0, len(self._buffer) + pos) + else: + raise ValueError("unsupported whence value") + return self._pos + + def tell(self): + if self.closed: + raise ValueError("tell on closed file") + return self._pos + + def truncate(self, pos=None): + if self.closed: + raise ValueError("truncate on closed file") + if pos is None: + pos = self._pos + else: + try: + pos_index = pos.__index__ + except AttributeError: + raise TypeError(f"{pos!r} is not an integer") + else: + pos = pos_index() + if pos < 0: + raise ValueError("negative truncate position %r" % (pos,)) + del self._buffer[pos:] + return pos + + def readable(self): + if self.closed: + raise ValueError("I/O operation on closed file.") + return True + + def writable(self): + if self.closed: + raise ValueError("I/O operation on closed file.") + return True + + def seekable(self): + if self.closed: + raise ValueError("I/O operation on closed file.") + return True + + +class BufferedReader(_BufferedIOMixin): + + """BufferedReader(raw[, buffer_size]) + + A buffer for a readable, sequential BaseRawIO object. + + The constructor creates a BufferedReader for the given readable raw + stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE + is used. + """ + + def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): + """Create a new buffered reader using the given readable raw IO object. + """ + if not raw.readable(): + raise OSError('"raw" argument must be readable.') + + _BufferedIOMixin.__init__(self, raw) + if buffer_size <= 0: + raise ValueError("invalid buffer size") + self.buffer_size = buffer_size + self._reset_read_buf() + self._read_lock = Lock() + + def readable(self): + return self.raw.readable() + + def _reset_read_buf(self): + self._read_buf = b"" + self._read_pos = 0 + + def read(self, size=None): + """Read size bytes. + + Returns exactly size bytes of data unless the underlying raw IO + stream reaches EOF or if the call would block in non-blocking + mode. If size is negative, read until EOF or until read() would + block. + """ + if size is not None and size < -1: + raise ValueError("invalid number of bytes to read") + with self._read_lock: + return self._read_unlocked(size) + + def _read_unlocked(self, n=None): + nodata_val = b"" + empty_values = (b"", None) + buf = self._read_buf + pos = self._read_pos + + # Special case for when the number of bytes to read is unspecified. + if n is None or n == -1: + self._reset_read_buf() + if hasattr(self.raw, 'readall'): + chunk = self.raw.readall() + if chunk is None: + return buf[pos:] or None + else: + return buf[pos:] + chunk + chunks = [buf[pos:]] # Strip the consumed bytes. + current_size = 0 + while True: + # Read until EOF or until read() would block. + chunk = self.raw.read() + if chunk in empty_values: + nodata_val = chunk + break + current_size += len(chunk) + chunks.append(chunk) + return b"".join(chunks) or nodata_val + + # The number of bytes to read is specified, return at most n bytes. + avail = len(buf) - pos # Length of the available buffered data. + if n <= avail: + # Fast path: the data to read is fully buffered. + self._read_pos += n + return buf[pos:pos+n] + # Slow path: read from the stream until enough bytes are read, + # or until an EOF occurs or until read() would block. + chunks = [buf[pos:]] + wanted = max(self.buffer_size, n) + while avail < n: + chunk = self.raw.read(wanted) + if chunk in empty_values: + nodata_val = chunk + break + avail += len(chunk) + chunks.append(chunk) + # n is more than avail only when an EOF occurred or when + # read() would have blocked. + n = min(n, avail) + out = b"".join(chunks) + self._read_buf = out[n:] # Save the extra data in the buffer. + self._read_pos = 0 + return out[:n] if out else nodata_val + + def peek(self, size=0): + """Returns buffered bytes without advancing the position. + + The argument indicates a desired minimal number of bytes; we + do at most one raw read to satisfy it. We never return more + than self.buffer_size. + """ + self._checkClosed("peek of closed file") + with self._read_lock: + return self._peek_unlocked(size) + + def _peek_unlocked(self, n=0): + want = min(n, self.buffer_size) + have = len(self._read_buf) - self._read_pos + if have < want or have <= 0: + to_read = self.buffer_size - have + current = self.raw.read(to_read) + if current: + self._read_buf = self._read_buf[self._read_pos:] + current + self._read_pos = 0 + return self._read_buf[self._read_pos:] + + def read1(self, size=-1): + """Reads up to size bytes, with at most one read() system call.""" + # Returns up to size bytes. If at least one byte is buffered, we + # only return buffered bytes. Otherwise, we do one raw read. + self._checkClosed("read of closed file") + if size < 0: + size = self.buffer_size + if size == 0: + return b"" + with self._read_lock: + self._peek_unlocked(1) + return self._read_unlocked( + min(size, len(self._read_buf) - self._read_pos)) + + # Implementing readinto() and readinto1() is not strictly necessary (we + # could rely on the base class that provides an implementation in terms of + # read() and read1()). We do it anyway to keep the _pyio implementation + # similar to the io implementation (which implements the methods for + # performance reasons). + def _readinto(self, buf, read1): + """Read data into *buf* with at most one system call.""" + + self._checkClosed("readinto of closed file") + + # Need to create a memoryview object of type 'b', otherwise + # we may not be able to assign bytes to it, and slicing it + # would create a new object. + if not isinstance(buf, memoryview): + buf = memoryview(buf) + if buf.nbytes == 0: + return 0 + buf = buf.cast('B') + + written = 0 + with self._read_lock: + while written < len(buf): + + # First try to read from internal buffer + avail = min(len(self._read_buf) - self._read_pos, len(buf)) + if avail: + buf[written:written+avail] = \ + self._read_buf[self._read_pos:self._read_pos+avail] + self._read_pos += avail + written += avail + if written == len(buf): + break + + # If remaining space in callers buffer is larger than + # internal buffer, read directly into callers buffer + if len(buf) - written > self.buffer_size: + n = self.raw.readinto(buf[written:]) + if not n: + break # eof + written += n + + # Otherwise refill internal buffer - unless we're + # in read1 mode and already got some data + elif not (read1 and written): + if not self._peek_unlocked(1): + break # eof + + # In readinto1 mode, return as soon as we have some data + if read1 and written: + break + + return written + + def tell(self): + # GH-95782: Keep return value non-negative + return max(_BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos, 0) + + def seek(self, pos, whence=0): + if whence not in valid_seek_flags: + raise ValueError("invalid whence value") + self._checkClosed("seek of closed file") + with self._read_lock: + if whence == 1: + pos -= len(self._read_buf) - self._read_pos + pos = _BufferedIOMixin.seek(self, pos, whence) + self._reset_read_buf() + return pos + +class BufferedWriter(_BufferedIOMixin): + + """A buffer for a writeable sequential RawIO object. + + The constructor creates a BufferedWriter for the given writeable raw + stream. If the buffer_size is not given, it defaults to + DEFAULT_BUFFER_SIZE. + """ + + def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): + if not raw.writable(): + raise OSError('"raw" argument must be writable.') + + _BufferedIOMixin.__init__(self, raw) + if buffer_size <= 0: + raise ValueError("invalid buffer size") + self.buffer_size = buffer_size + self._write_buf = bytearray() + self._write_lock = Lock() + + def writable(self): + return self.raw.writable() + + def write(self, b): + if isinstance(b, str): + raise TypeError("can't write str to binary stream") + with self._write_lock: + if self.closed: + raise ValueError("write to closed file") + # XXX we can implement some more tricks to try and avoid + # partial writes + if len(self._write_buf) > self.buffer_size: + # We're full, so let's pre-flush the buffer. (This may + # raise BlockingIOError with characters_written == 0.) + self._flush_unlocked() + before = len(self._write_buf) + self._write_buf.extend(b) + written = len(self._write_buf) - before + if len(self._write_buf) > self.buffer_size: + try: + self._flush_unlocked() + except BlockingIOError as e: + if len(self._write_buf) > self.buffer_size: + # We've hit the buffer_size. We have to accept a partial + # write and cut back our buffer. + overage = len(self._write_buf) - self.buffer_size + written -= overage + self._write_buf = self._write_buf[:self.buffer_size] + raise BlockingIOError(e.errno, e.strerror, written) + return written + + def truncate(self, pos=None): + with self._write_lock: + self._flush_unlocked() + if pos is None: + pos = self.raw.tell() + return self.raw.truncate(pos) + + def flush(self): + with self._write_lock: + self._flush_unlocked() + + def _flush_unlocked(self): + if self.closed: + raise ValueError("flush on closed file") + while self._write_buf: + try: + n = self.raw.write(self._write_buf) + except BlockingIOError: + raise RuntimeError("self.raw should implement RawIOBase: it " + "should not raise BlockingIOError") + if n is None: + raise BlockingIOError( + errno.EAGAIN, + "write could not complete without blocking", 0) + if n > len(self._write_buf) or n < 0: + raise OSError("write() returned incorrect number of bytes") + del self._write_buf[:n] + + def tell(self): + return _BufferedIOMixin.tell(self) + len(self._write_buf) + + def seek(self, pos, whence=0): + if whence not in valid_seek_flags: + raise ValueError("invalid whence value") + with self._write_lock: + self._flush_unlocked() + return _BufferedIOMixin.seek(self, pos, whence) + + def close(self): + with self._write_lock: + if self.raw is None or self.closed: + return + # We have to release the lock and call self.flush() (which will + # probably just re-take the lock) in case flush has been overridden in + # a subclass or the user set self.flush to something. This is the same + # behavior as the C implementation. + try: + # may raise BlockingIOError or BrokenPipeError etc + self.flush() + finally: + with self._write_lock: + self.raw.close() + + +class BufferedRWPair(BufferedIOBase): + + """A buffered reader and writer object together. + + A buffered reader object and buffered writer object put together to + form a sequential IO object that can read and write. This is typically + used with a socket or two-way pipe. + + reader and writer are RawIOBase objects that are readable and + writeable respectively. If the buffer_size is omitted it defaults to + DEFAULT_BUFFER_SIZE. + """ + + # XXX The usefulness of this (compared to having two separate IO + # objects) is questionable. + + def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE): + """Constructor. + + The arguments are two RawIO instances. + """ + if not reader.readable(): + raise OSError('"reader" argument must be readable.') + + if not writer.writable(): + raise OSError('"writer" argument must be writable.') + + self.reader = BufferedReader(reader, buffer_size) + self.writer = BufferedWriter(writer, buffer_size) + + def read(self, size=-1): + if size is None: + size = -1 + return self.reader.read(size) + + def readinto(self, b): + return self.reader.readinto(b) + + def write(self, b): + return self.writer.write(b) + + def peek(self, size=0): + return self.reader.peek(size) + + def read1(self, size=-1): + return self.reader.read1(size) + + def readinto1(self, b): + return self.reader.readinto1(b) + + def readable(self): + return self.reader.readable() + + def writable(self): + return self.writer.writable() + + def flush(self): + return self.writer.flush() + + def close(self): + try: + self.writer.close() + finally: + self.reader.close() + + def isatty(self): + return self.reader.isatty() or self.writer.isatty() + + @property + def closed(self): + return self.writer.closed + + +class BufferedRandom(BufferedWriter, BufferedReader): + + """A buffered interface to random access streams. + + The constructor creates a reader and writer for a seekable stream, + raw, given in the first argument. If the buffer_size is omitted it + defaults to DEFAULT_BUFFER_SIZE. + """ + + def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): + raw._checkSeekable() + BufferedReader.__init__(self, raw, buffer_size) + BufferedWriter.__init__(self, raw, buffer_size) + + def seek(self, pos, whence=0): + if whence not in valid_seek_flags: + raise ValueError("invalid whence value") + self.flush() + if self._read_buf: + # Undo read ahead. + with self._read_lock: + self.raw.seek(self._read_pos - len(self._read_buf), 1) + # First do the raw seek, then empty the read buffer, so that + # if the raw seek fails, we don't lose buffered data forever. + pos = self.raw.seek(pos, whence) + with self._read_lock: + self._reset_read_buf() + if pos < 0: + raise OSError("seek() returned invalid position") + return pos + + def tell(self): + if self._write_buf: + return BufferedWriter.tell(self) + else: + return BufferedReader.tell(self) + + def truncate(self, pos=None): + if pos is None: + pos = self.tell() + # Use seek to flush the read buffer. + return BufferedWriter.truncate(self, pos) + + def read(self, size=None): + if size is None: + size = -1 + self.flush() + return BufferedReader.read(self, size) + + def readinto(self, b): + self.flush() + return BufferedReader.readinto(self, b) + + def peek(self, size=0): + self.flush() + return BufferedReader.peek(self, size) + + def read1(self, size=-1): + self.flush() + return BufferedReader.read1(self, size) + + def readinto1(self, b): + self.flush() + return BufferedReader.readinto1(self, b) + + def write(self, b): + if self._read_buf: + # Undo readahead + with self._read_lock: + self.raw.seek(self._read_pos - len(self._read_buf), 1) + self._reset_read_buf() + return BufferedWriter.write(self, b) + + +def _new_buffersize(bytes_read): + # Parallels _io/fileio.c new_buffersize + if bytes_read > 65536: + addend = bytes_read >> 3 + else: + addend = 256 + bytes_read + if addend < DEFAULT_BUFFER_SIZE: + addend = DEFAULT_BUFFER_SIZE + return bytes_read + addend + + +class FileIO(RawIOBase): + _fd = -1 + _created = False + _readable = False + _writable = False + _appending = False + _seekable = None + _closefd = True + + def __init__(self, file, mode='r', closefd=True, opener=None): + """Open a file. The mode can be 'r' (default), 'w', 'x' or 'a' for reading, + writing, exclusive creation or appending. The file will be created if it + doesn't exist when opened for writing or appending; it will be truncated + when opened for writing. A FileExistsError will be raised if it already + exists when opened for creating. Opening a file for creating implies + writing so this mode behaves in a similar way to 'w'. Add a '+' to the mode + to allow simultaneous reading and writing. A custom opener can be used by + passing a callable as *opener*. The underlying file descriptor for the file + object is then obtained by calling opener with (*name*, *flags*). + *opener* must return an open file descriptor (passing os.open as *opener* + results in functionality similar to passing None). + """ + if self._fd >= 0: + # Have to close the existing file first. + self._stat_atopen = None + try: + if self._closefd: + os.close(self._fd) + finally: + self._fd = -1 + + if isinstance(file, float): + raise TypeError('integer argument expected, got float') + if isinstance(file, int): + if isinstance(file, bool): + import warnings + warnings.warn("bool is used as a file descriptor", + RuntimeWarning, stacklevel=2) + file = int(file) + fd = file + if fd < 0: + raise ValueError('negative file descriptor') + else: + fd = -1 + + if not isinstance(mode, str): + raise TypeError('invalid mode: %s' % (mode,)) + if not set(mode) <= set('xrwab+'): + raise ValueError('invalid mode: %s' % (mode,)) + if sum(c in 'rwax' for c in mode) != 1 or mode.count('+') > 1: + raise ValueError('Must have exactly one of create/read/write/append ' + 'mode and at most one plus') + + if 'x' in mode: + self._created = True + self._writable = True + flags = os.O_EXCL | os.O_CREAT + elif 'r' in mode: + self._readable = True + flags = 0 + elif 'w' in mode: + self._writable = True + flags = os.O_CREAT | os.O_TRUNC + elif 'a' in mode: + self._writable = True + self._appending = True + flags = os.O_APPEND | os.O_CREAT + + if '+' in mode: + self._readable = True + self._writable = True + + if self._readable and self._writable: + flags |= os.O_RDWR + elif self._readable: + flags |= os.O_RDONLY + else: + flags |= os.O_WRONLY + + flags |= getattr(os, 'O_BINARY', 0) + + noinherit_flag = (getattr(os, 'O_NOINHERIT', 0) or + getattr(os, 'O_CLOEXEC', 0)) + flags |= noinherit_flag + + owned_fd = None + try: + if fd < 0: + if not closefd: + raise ValueError('Cannot use closefd=False with file name') + if opener is None: + fd = os.open(file, flags, 0o666) + else: + fd = opener(file, flags) + if not isinstance(fd, int): + raise TypeError('expected integer from opener') + if fd < 0: + # bpo-27066: Raise a ValueError for bad value. + raise ValueError(f'opener returned {fd}') + owned_fd = fd + if not noinherit_flag: + os.set_inheritable(fd, False) + + self._closefd = closefd + self._stat_atopen = os.fstat(fd) + try: + if stat.S_ISDIR(self._stat_atopen.st_mode): + raise IsADirectoryError(errno.EISDIR, + os.strerror(errno.EISDIR), file) + except AttributeError: + # Ignore the AttributeError if stat.S_ISDIR or errno.EISDIR + # don't exist. + pass + + if _setmode: + # don't translate newlines (\r\n <=> \n) + _setmode(fd, os.O_BINARY) + + self.name = file + if self._appending: + # For consistent behaviour, we explicitly seek to the + # end of file (otherwise, it might be done only on the + # first write()). + try: + os.lseek(fd, 0, SEEK_END) + except OSError as e: + if e.errno != errno.ESPIPE: + raise + except: + self._stat_atopen = None + if owned_fd is not None: + os.close(owned_fd) + raise + self._fd = fd + + def _dealloc_warn(self, source): + if self._fd >= 0 and self._closefd and not self.closed: + import warnings + warnings.warn(f'unclosed file {source!r}', ResourceWarning, + stacklevel=2, source=self) + + def __getstate__(self): + raise TypeError(f"cannot pickle {self.__class__.__name__!r} object") + + def __repr__(self): + class_name = '%s.%s' % (self.__class__.__module__, + self.__class__.__qualname__) + if self.closed: + return '<%s [closed]>' % class_name + try: + name = self.name + except AttributeError: + return ('<%s fd=%d mode=%r closefd=%r>' % + (class_name, self._fd, self.mode, self._closefd)) + else: + return ('<%s name=%r mode=%r closefd=%r>' % + (class_name, name, self.mode, self._closefd)) + + @property + def _blksize(self): + if self._stat_atopen is None: + return DEFAULT_BUFFER_SIZE + + blksize = getattr(self._stat_atopen, "st_blksize", 0) + # WASI sets blsize to 0 + if not blksize: + return DEFAULT_BUFFER_SIZE + return blksize + + def _checkReadable(self): + if not self._readable: + raise UnsupportedOperation('File not open for reading') + + def _checkWritable(self, msg=None): + if not self._writable: + raise UnsupportedOperation('File not open for writing') + + def read(self, size=None): + """Read at most size bytes, returned as bytes. + + If size is less than 0, read all bytes in the file making + multiple read calls. See ``FileIO.readall``. + + Attempts to make only one system call, retrying only per + PEP 475 (EINTR). This means less data may be returned than + requested. + + In non-blocking mode, returns None if no data is available. + Return an empty bytes object at EOF. + """ + self._checkClosed() + self._checkReadable() + if size is None or size < 0: + return self.readall() + try: + return os.read(self._fd, size) + except BlockingIOError: + return None + + def readall(self): + """Read all data from the file, returned as bytes. + + Reads until either there is an error or read() returns size 0 + (indicates EOF). If the file is already at EOF, returns an + empty bytes object. + + In non-blocking mode, returns as much data as could be read + before EAGAIN. If no data is available (EAGAIN is returned + before bytes are read) returns None. + """ + self._checkClosed() + self._checkReadable() + if self._stat_atopen is None or self._stat_atopen.st_size <= 0: + bufsize = DEFAULT_BUFFER_SIZE + else: + # In order to detect end of file, need a read() of at least 1 + # byte which returns size 0. Oversize the buffer by 1 byte so the + # I/O can be completed with two read() calls (one for all data, one + # for EOF) without needing to resize the buffer. + bufsize = self._stat_atopen.st_size + 1 + + if self._stat_atopen.st_size > 65536: + try: + pos = os.lseek(self._fd, 0, SEEK_CUR) + if self._stat_atopen.st_size >= pos: + bufsize = self._stat_atopen.st_size - pos + 1 + except OSError: + pass + + result = bytearray(bufsize) + bytes_read = 0 + try: + while n := os.readinto(self._fd, memoryview(result)[bytes_read:]): + bytes_read += n + if bytes_read >= len(result): + result.resize(_new_buffersize(bytes_read)) + except BlockingIOError: + if not bytes_read: + return None + + assert len(result) - bytes_read >= 1, \ + "os.readinto buffer size 0 will result in erroneous EOF / returns 0" + result.resize(bytes_read) + return bytes(result) + + def readinto(self, buffer): + """Same as RawIOBase.readinto().""" + self._checkClosed() + self._checkReadable() + try: + return os.readinto(self._fd, buffer) + except BlockingIOError: + return None + + def write(self, b): + """Write bytes b to file, return number written. + + Only makes one system call, so not all of the data may be written. + The number of bytes actually written is returned. In non-blocking mode, + returns None if the write would block. + """ + self._checkClosed() + self._checkWritable() + try: + return os.write(self._fd, b) + except BlockingIOError: + return None + + def seek(self, pos, whence=SEEK_SET): + """Move to new file position. + + Argument offset is a byte count. Optional argument whence defaults to + SEEK_SET or 0 (offset from start of file, offset should be >= 0); other values + are SEEK_CUR or 1 (move relative to current position, positive or negative), + and SEEK_END or 2 (move relative to end of file, usually negative, although + many platforms allow seeking beyond the end of a file). + + Note that not all file objects are seekable. + """ + if isinstance(pos, float): + raise TypeError('an integer is required') + self._checkClosed() + return os.lseek(self._fd, pos, whence) + + def tell(self): + """tell() -> int. Current file position. + + Can raise OSError for non seekable files.""" + self._checkClosed() + return os.lseek(self._fd, 0, SEEK_CUR) + + def truncate(self, size=None): + """Truncate the file to at most size bytes. + + Size defaults to the current file position, as returned by tell(). + The current file position is changed to the value of size. + """ + self._checkClosed() + self._checkWritable() + if size is None: + size = self.tell() + os.ftruncate(self._fd, size) + self._stat_atopen = None + return size + + def close(self): + """Close the file. + + A closed file cannot be used for further I/O operations. close() may be + called more than once without error. + """ + if not self.closed: + self._stat_atopen = None + try: + if self._closefd and self._fd >= 0: + os.close(self._fd) + finally: + super().close() + + def seekable(self): + """True if file supports random-access.""" + self._checkClosed() + if self._seekable is None: + try: + self.tell() + except OSError: + self._seekable = False + else: + self._seekable = True + return self._seekable + + def readable(self): + """True if file was opened in a read mode.""" + self._checkClosed() + return self._readable + + def writable(self): + """True if file was opened in a write mode.""" + self._checkClosed() + return self._writable + + def fileno(self): + """Return the underlying file descriptor (an integer).""" + self._checkClosed() + return self._fd + + def isatty(self): + """True if the file is connected to a TTY device.""" + self._checkClosed() + return os.isatty(self._fd) + + def _isatty_open_only(self): + """Checks whether the file is a TTY using an open-only optimization. + + TTYs are always character devices. If the interpreter knows a file is + not a character device when it would call ``isatty``, can skip that + call. Inside ``open()`` there is a fresh stat result that contains that + information. Use the stat result to skip a system call. Outside of that + context TOCTOU issues (the fd could be arbitrarily modified by + surrounding code). + """ + if (self._stat_atopen is not None + and not stat.S_ISCHR(self._stat_atopen.st_mode)): + return False + return os.isatty(self._fd) + + @property + def closefd(self): + """True if the file descriptor will be closed by close().""" + return self._closefd + + @property + def mode(self): + """String giving the file mode""" + if self._created: + if self._readable: + return 'xb+' + else: + return 'xb' + elif self._appending: + if self._readable: + return 'ab+' + else: + return 'ab' + elif self._readable: + if self._writable: + return 'rb+' + else: + return 'rb' + else: + return 'wb' + + +class TextIOBase(IOBase): + + """Base class for text I/O. + + This class provides a character and line based interface to stream + I/O. + """ + + def read(self, size=-1): + """Read at most size characters from stream, where size is an int. + + Read from underlying buffer until we have size characters or we hit EOF. + If size is negative or omitted, read until EOF. + + Returns a string. + """ + self._unsupported("read") + + def write(self, s): + """Write string s to stream and returning an int.""" + self._unsupported("write") + + def truncate(self, pos=None): + """Truncate size to pos, where pos is an int.""" + self._unsupported("truncate") + + def readline(self): + """Read until newline or EOF. + + Returns an empty string if EOF is hit immediately. + """ + self._unsupported("readline") + + def detach(self): + """ + Separate the underlying buffer from the TextIOBase and return it. + + After the underlying buffer has been detached, the TextIO is in an + unusable state. + """ + self._unsupported("detach") + + @property + def encoding(self): + """Subclasses should override.""" + return None + + @property + def newlines(self): + """Line endings translated so far. + + Only line endings translated during reading are considered. + + Subclasses should override. + """ + return None + + @property + def errors(self): + """Error setting of the decoder or encoder. + + Subclasses should override.""" + return None + +io.TextIOBase.register(TextIOBase) + + +class IncrementalNewlineDecoder(codecs.IncrementalDecoder): + r"""Codec used when reading a file in universal newlines mode. It wraps + another incremental decoder, translating \r\n and \r into \n. It also + records the types of newlines encountered. When used with + translate=False, it ensures that the newline sequence is returned in + one piece. + """ + def __init__(self, decoder, translate, errors='strict'): + codecs.IncrementalDecoder.__init__(self, errors=errors) + self.translate = translate + self.decoder = decoder + self.seennl = 0 + self.pendingcr = False + + def decode(self, input, final=False): + # decode input (with the eventual \r from a previous pass) + if self.decoder is None: + output = input + else: + output = self.decoder.decode(input, final=final) + if self.pendingcr and (output or final): + output = "\r" + output + self.pendingcr = False + + # retain last \r even when not translating data: + # then readline() is sure to get \r\n in one pass + if output.endswith("\r") and not final: + output = output[:-1] + self.pendingcr = True + + # Record which newlines are read + crlf = output.count('\r\n') + cr = output.count('\r') - crlf + lf = output.count('\n') - crlf + self.seennl |= (lf and self._LF) | (cr and self._CR) \ + | (crlf and self._CRLF) + + if self.translate: + if crlf: + output = output.replace("\r\n", "\n") + if cr: + output = output.replace("\r", "\n") + + return output + + def getstate(self): + if self.decoder is None: + buf = b"" + flag = 0 + else: + buf, flag = self.decoder.getstate() + flag <<= 1 + if self.pendingcr: + flag |= 1 + return buf, flag + + def setstate(self, state): + buf, flag = state + self.pendingcr = bool(flag & 1) + if self.decoder is not None: + self.decoder.setstate((buf, flag >> 1)) + + def reset(self): + self.seennl = 0 + self.pendingcr = False + if self.decoder is not None: + self.decoder.reset() + + _LF = 1 + _CR = 2 + _CRLF = 4 + + @property + def newlines(self): + return (None, + "\n", + "\r", + ("\r", "\n"), + "\r\n", + ("\n", "\r\n"), + ("\r", "\r\n"), + ("\r", "\n", "\r\n") + )[self.seennl] + + +class TextIOWrapper(TextIOBase): + + r"""Character and line based layer over a BufferedIOBase object, buffer. + + encoding gives the name of the encoding that the stream will be + decoded or encoded with. It defaults to locale.getencoding(). + + errors determines the strictness of encoding and decoding (see the + codecs.register) and defaults to "strict". + + newline can be None, '', '\n', '\r', or '\r\n'. It controls the + handling of line endings. If it is None, universal newlines is + enabled. With this enabled, on input, the lines endings '\n', '\r', + or '\r\n' are translated to '\n' before being returned to the + caller. Conversely, on output, '\n' is translated to the system + default line separator, os.linesep. If newline is any other of its + legal values, that newline becomes the newline when the file is read + and it is returned untranslated. On output, '\n' is converted to the + newline. + + If line_buffering is True, a call to flush is implied when a call to + write contains a newline character. + """ + + _CHUNK_SIZE = 2048 + + # Initialize _buffer as soon as possible since it's used by __del__() + # which calls close() + _buffer = None + + # The write_through argument has no effect here since this + # implementation always writes through. The argument is present only + # so that the signature can match the signature of the C version. + def __init__(self, buffer, encoding=None, errors=None, newline=None, + line_buffering=False, write_through=False): + self._check_newline(newline) + encoding = text_encoding(encoding) + + if encoding == "locale": + encoding = self._get_locale_encoding() + + if not isinstance(encoding, str): + raise ValueError("invalid encoding: %r" % encoding) + + if not codecs.lookup(encoding)._is_text_encoding: + msg = "%r is not a text encoding" + raise LookupError(msg % encoding) + + if errors is None: + errors = "strict" + else: + if not isinstance(errors, str): + raise ValueError("invalid errors: %r" % errors) + if _CHECK_ERRORS: + codecs.lookup_error(errors) + + self._buffer = buffer + self._decoded_chars = '' # buffer for text returned from decoder + self._decoded_chars_used = 0 # offset into _decoded_chars for read() + self._snapshot = None # info for reconstructing decoder state + self._seekable = self._telling = self.buffer.seekable() + self._has_read1 = hasattr(self.buffer, 'read1') + self._configure(encoding, errors, newline, + line_buffering, write_through) + + def _check_newline(self, newline): + if newline is not None and not isinstance(newline, str): + raise TypeError("illegal newline type: %r" % (type(newline),)) + if newline not in (None, "", "\n", "\r", "\r\n"): + raise ValueError("illegal newline value: %r" % (newline,)) + + def _configure(self, encoding=None, errors=None, newline=None, + line_buffering=False, write_through=False): + self._encoding = encoding + self._errors = errors + self._encoder = None + self._decoder = None + self._b2cratio = 0.0 + + self._readuniversal = not newline + self._readtranslate = newline is None + self._readnl = newline + self._writetranslate = newline != '' + self._writenl = newline or os.linesep + + self._line_buffering = line_buffering + self._write_through = write_through + + # don't write a BOM in the middle of a file + if self._seekable and self.writable(): + position = self.buffer.tell() + if position != 0: + try: + self._get_encoder().setstate(0) + except LookupError: + # Sometimes the encoder doesn't exist + pass + + # self._snapshot is either None, or a tuple (dec_flags, next_input) + # where dec_flags is the second (integer) item of the decoder state + # and next_input is the chunk of input bytes that comes next after the + # snapshot point. We use this to reconstruct decoder states in tell(). + + # Naming convention: + # - "bytes_..." for integer variables that count input bytes + # - "chars_..." for integer variables that count decoded characters + + def __repr__(self): + result = "<{}.{}".format(self.__class__.__module__, + self.__class__.__qualname__) + try: + name = self.name + except AttributeError: + pass + else: + result += " name={0!r}".format(name) + try: + mode = self.mode + except AttributeError: + pass + else: + result += " mode={0!r}".format(mode) + return result + " encoding={0!r}>".format(self.encoding) + + @property + def encoding(self): + return self._encoding + + @property + def errors(self): + return self._errors + + @property + def line_buffering(self): + return self._line_buffering + + @property + def write_through(self): + return self._write_through + + @property + def buffer(self): + return self._buffer + + def reconfigure(self, *, + encoding=None, errors=None, newline=Ellipsis, + line_buffering=None, write_through=None): + """Reconfigure the text stream with new parameters. + + This also flushes the stream. + """ + if (self._decoder is not None + and (encoding is not None or errors is not None + or newline is not Ellipsis)): + raise UnsupportedOperation( + "It is not possible to set the encoding or newline of stream " + "after the first read") + + if errors is None: + if encoding is None: + errors = self._errors + else: + errors = 'strict' + elif not isinstance(errors, str): + raise TypeError("invalid errors: %r" % errors) + + if encoding is None: + encoding = self._encoding + else: + if not isinstance(encoding, str): + raise TypeError("invalid encoding: %r" % encoding) + if encoding == "locale": + encoding = self._get_locale_encoding() + + if newline is Ellipsis: + newline = self._readnl + self._check_newline(newline) + + if line_buffering is None: + line_buffering = self.line_buffering + if write_through is None: + write_through = self.write_through + + self.flush() + self._configure(encoding, errors, newline, + line_buffering, write_through) + + def seekable(self): + if self.closed: + raise ValueError("I/O operation on closed file.") + return self._seekable + + def readable(self): + return self.buffer.readable() + + def writable(self): + return self.buffer.writable() + + def flush(self): + self.buffer.flush() + self._telling = self._seekable + + def close(self): + if self.buffer is not None and not self.closed: + try: + self.flush() + finally: + self.buffer.close() + + @property + def closed(self): + return self.buffer.closed + + @property + def name(self): + return self.buffer.name + + def fileno(self): + return self.buffer.fileno() + + def isatty(self): + return self.buffer.isatty() + + def write(self, s): + 'Write data, where s is a str' + if self.closed: + raise ValueError("write to closed file") + if not isinstance(s, str): + raise TypeError("can't write %s to text stream" % + s.__class__.__name__) + length = len(s) + haslf = (self._writetranslate or self._line_buffering) and "\n" in s + if haslf and self._writetranslate and self._writenl != "\n": + s = s.replace("\n", self._writenl) + encoder = self._encoder or self._get_encoder() + # XXX What if we were just reading? + b = encoder.encode(s) + self.buffer.write(b) + if self._line_buffering and (haslf or "\r" in s): + self.flush() + if self._snapshot is not None: + self._set_decoded_chars('') + self._snapshot = None + if self._decoder: + self._decoder.reset() + return length + + def _get_encoder(self): + make_encoder = codecs.getincrementalencoder(self._encoding) + self._encoder = make_encoder(self._errors) + return self._encoder + + def _get_decoder(self): + make_decoder = codecs.getincrementaldecoder(self._encoding) + decoder = make_decoder(self._errors) + if self._readuniversal: + decoder = IncrementalNewlineDecoder(decoder, self._readtranslate) + self._decoder = decoder + return decoder + + # The following three methods implement an ADT for _decoded_chars. + # Text returned from the decoder is buffered here until the client + # requests it by calling our read() or readline() method. + def _set_decoded_chars(self, chars): + """Set the _decoded_chars buffer.""" + self._decoded_chars = chars + self._decoded_chars_used = 0 + + def _get_decoded_chars(self, n=None): + """Advance into the _decoded_chars buffer.""" + offset = self._decoded_chars_used + if n is None: + chars = self._decoded_chars[offset:] + else: + chars = self._decoded_chars[offset:offset + n] + self._decoded_chars_used += len(chars) + return chars + + def _get_locale_encoding(self): + try: + import locale + except ImportError: + # Importing locale may fail if Python is being built + return "utf-8" + else: + return locale.getencoding() + + def _rewind_decoded_chars(self, n): + """Rewind the _decoded_chars buffer.""" + if self._decoded_chars_used < n: + raise AssertionError("rewind decoded_chars out of bounds") + self._decoded_chars_used -= n + + def _read_chunk(self): + """ + Read and decode the next chunk of data from the BufferedReader. + """ + + # The return value is True unless EOF was reached. The decoded + # string is placed in self._decoded_chars (replacing its previous + # value). The entire input chunk is sent to the decoder, though + # some of it may remain buffered in the decoder, yet to be + # converted. + + if self._decoder is None: + raise ValueError("no decoder") + + if self._telling: + # To prepare for tell(), we need to snapshot a point in the + # file where the decoder's input buffer is empty. + + dec_buffer, dec_flags = self._decoder.getstate() + # Given this, we know there was a valid snapshot point + # len(dec_buffer) bytes ago with decoder state (b'', dec_flags). + + # Read a chunk, decode it, and put the result in self._decoded_chars. + if self._has_read1: + input_chunk = self.buffer.read1(self._CHUNK_SIZE) + else: + input_chunk = self.buffer.read(self._CHUNK_SIZE) + eof = not input_chunk + decoded_chars = self._decoder.decode(input_chunk, eof) + self._set_decoded_chars(decoded_chars) + if decoded_chars: + self._b2cratio = len(input_chunk) / len(self._decoded_chars) + else: + self._b2cratio = 0.0 + + if self._telling: + # At the snapshot point, len(dec_buffer) bytes before the read, + # the next input to be decoded is dec_buffer + input_chunk. + self._snapshot = (dec_flags, dec_buffer + input_chunk) + + return not eof + + def _pack_cookie(self, position, dec_flags=0, + bytes_to_feed=0, need_eof=False, chars_to_skip=0): + # The meaning of a tell() cookie is: seek to position, set the + # decoder flags to dec_flags, read bytes_to_feed bytes, feed them + # into the decoder with need_eof as the EOF flag, then skip + # chars_to_skip characters of the decoded result. For most simple + # decoders, tell() will often just give a byte offset in the file. + return (position | (dec_flags<<64) | (bytes_to_feed<<128) | + (chars_to_skip<<192) | bool(need_eof)<<256) + + def _unpack_cookie(self, bigint): + rest, position = divmod(bigint, 1<<64) + rest, dec_flags = divmod(rest, 1<<64) + rest, bytes_to_feed = divmod(rest, 1<<64) + need_eof, chars_to_skip = divmod(rest, 1<<64) + return position, dec_flags, bytes_to_feed, bool(need_eof), chars_to_skip + + def tell(self): + if not self._seekable: + raise UnsupportedOperation("underlying stream is not seekable") + if not self._telling: + raise OSError("telling position disabled by next() call") + self.flush() + position = self.buffer.tell() + decoder = self._decoder + if decoder is None or self._snapshot is None: + if self._decoded_chars: + # This should never happen. + raise AssertionError("pending decoded text") + return position + + # Skip backward to the snapshot point (see _read_chunk). + dec_flags, next_input = self._snapshot + position -= len(next_input) + + # How many decoded characters have been used up since the snapshot? + chars_to_skip = self._decoded_chars_used + if chars_to_skip == 0: + # We haven't moved from the snapshot point. + return self._pack_cookie(position, dec_flags) + + # Starting from the snapshot position, we will walk the decoder + # forward until it gives us enough decoded characters. + saved_state = decoder.getstate() + try: + # Fast search for an acceptable start point, close to our + # current pos. + # Rationale: calling decoder.decode() has a large overhead + # regardless of chunk size; we want the number of such calls to + # be O(1) in most situations (common decoders, sensible input). + # Actually, it will be exactly 1 for fixed-size codecs (all + # 8-bit codecs, also UTF-16 and UTF-32). + skip_bytes = int(self._b2cratio * chars_to_skip) + skip_back = 1 + assert skip_bytes <= len(next_input) + while skip_bytes > 0: + decoder.setstate((b'', dec_flags)) + # Decode up to temptative start point + n = len(decoder.decode(next_input[:skip_bytes])) + if n <= chars_to_skip: + b, d = decoder.getstate() + if not b: + # Before pos and no bytes buffered in decoder => OK + dec_flags = d + chars_to_skip -= n + break + # Skip back by buffered amount and reset heuristic + skip_bytes -= len(b) + skip_back = 1 + else: + # We're too far ahead, skip back a bit + skip_bytes -= skip_back + skip_back = skip_back * 2 + else: + skip_bytes = 0 + decoder.setstate((b'', dec_flags)) + + # Note our initial start point. + start_pos = position + skip_bytes + start_flags = dec_flags + if chars_to_skip == 0: + # We haven't moved from the start point. + return self._pack_cookie(start_pos, start_flags) + + # Feed the decoder one byte at a time. As we go, note the + # nearest "safe start point" before the current location + # (a point where the decoder has nothing buffered, so seek() + # can safely start from there and advance to this location). + bytes_fed = 0 + need_eof = False + # Chars decoded since `start_pos` + chars_decoded = 0 + for i in range(skip_bytes, len(next_input)): + bytes_fed += 1 + chars_decoded += len(decoder.decode(next_input[i:i+1])) + dec_buffer, dec_flags = decoder.getstate() + if not dec_buffer and chars_decoded <= chars_to_skip: + # Decoder buffer is empty, so this is a safe start point. + start_pos += bytes_fed + chars_to_skip -= chars_decoded + start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0 + if chars_decoded >= chars_to_skip: + break + else: + # We didn't get enough decoded data; signal EOF to get more. + chars_decoded += len(decoder.decode(b'', final=True)) + need_eof = True + if chars_decoded < chars_to_skip: + raise OSError("can't reconstruct logical file position") + + # The returned cookie corresponds to the last safe start point. + return self._pack_cookie( + start_pos, start_flags, bytes_fed, need_eof, chars_to_skip) + finally: + decoder.setstate(saved_state) + + def truncate(self, pos=None): + self.flush() + if pos is None: + pos = self.tell() + return self.buffer.truncate(pos) + + def detach(self): + if self.buffer is None: + raise ValueError("buffer is already detached") + self.flush() + buffer = self._buffer + self._buffer = None + return buffer + + def seek(self, cookie, whence=0): + def _reset_encoder(position): + """Reset the encoder (merely useful for proper BOM handling)""" + try: + encoder = self._encoder or self._get_encoder() + except LookupError: + # Sometimes the encoder doesn't exist + pass + else: + if position != 0: + encoder.setstate(0) + else: + encoder.reset() + + if self.closed: + raise ValueError("tell on closed file") + if not self._seekable: + raise UnsupportedOperation("underlying stream is not seekable") + if whence == SEEK_CUR: + if cookie != 0: + raise UnsupportedOperation("can't do nonzero cur-relative seeks") + # Seeking to the current position should attempt to + # sync the underlying buffer with the current position. + whence = 0 + cookie = self.tell() + elif whence == SEEK_END: + if cookie != 0: + raise UnsupportedOperation("can't do nonzero end-relative seeks") + self.flush() + position = self.buffer.seek(0, whence) + self._set_decoded_chars('') + self._snapshot = None + if self._decoder: + self._decoder.reset() + _reset_encoder(position) + return position + if whence != 0: + raise ValueError("unsupported whence (%r)" % (whence,)) + if cookie < 0: + raise ValueError("negative seek position %r" % (cookie,)) + self.flush() + + # The strategy of seek() is to go back to the safe start point + # and replay the effect of read(chars_to_skip) from there. + start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \ + self._unpack_cookie(cookie) + + # Seek back to the safe start point. + self.buffer.seek(start_pos) + self._set_decoded_chars('') + self._snapshot = None + + # Restore the decoder to its state from the safe start point. + if cookie == 0 and self._decoder: + self._decoder.reset() + elif self._decoder or dec_flags or chars_to_skip: + self._decoder = self._decoder or self._get_decoder() + self._decoder.setstate((b'', dec_flags)) + self._snapshot = (dec_flags, b'') + + if chars_to_skip: + # Just like _read_chunk, feed the decoder and save a snapshot. + input_chunk = self.buffer.read(bytes_to_feed) + self._set_decoded_chars( + self._decoder.decode(input_chunk, need_eof)) + self._snapshot = (dec_flags, input_chunk) + + # Skip chars_to_skip of the decoded characters. + if len(self._decoded_chars) < chars_to_skip: + raise OSError("can't restore logical file position") + self._decoded_chars_used = chars_to_skip + + _reset_encoder(cookie) + return cookie + + def read(self, size=None): + self._checkReadable() + if size is None: + size = -1 + else: + try: + size_index = size.__index__ + except AttributeError: + raise TypeError(f"{size!r} is not an integer") + else: + size = size_index() + decoder = self._decoder or self._get_decoder() + if size < 0: + chunk = self.buffer.read() + if chunk is None: + raise BlockingIOError("Read returned None.") + # Read everything. + result = (self._get_decoded_chars() + + decoder.decode(chunk, final=True)) + if self._snapshot is not None: + self._set_decoded_chars('') + self._snapshot = None + return result + else: + # Keep reading chunks until we have size characters to return. + eof = False + result = self._get_decoded_chars(size) + while len(result) < size and not eof: + eof = not self._read_chunk() + result += self._get_decoded_chars(size - len(result)) + return result + + def __next__(self): + self._telling = False + line = self.readline() + if not line: + self._snapshot = None + self._telling = self._seekable + raise StopIteration + return line + + def readline(self, size=None): + if self.closed: + raise ValueError("read from closed file") + if size is None: + size = -1 + else: + try: + size_index = size.__index__ + except AttributeError: + raise TypeError(f"{size!r} is not an integer") + else: + size = size_index() + + # Grab all the decoded text (we will rewind any extra bits later). + line = self._get_decoded_chars() + + start = 0 + # Make the decoder if it doesn't already exist. + if not self._decoder: + self._get_decoder() + + pos = endpos = None + while True: + if self._readtranslate: + # Newlines are already translated, only search for \n + pos = line.find('\n', start) + if pos >= 0: + endpos = pos + 1 + break + else: + start = len(line) + + elif self._readuniversal: + # Universal newline search. Find any of \r, \r\n, \n + # The decoder ensures that \r\n are not split in two pieces + + # In C we'd look for these in parallel of course. + nlpos = line.find("\n", start) + crpos = line.find("\r", start) + if crpos == -1: + if nlpos == -1: + # Nothing found + start = len(line) + else: + # Found \n + endpos = nlpos + 1 + break + elif nlpos == -1: + # Found lone \r + endpos = crpos + 1 + break + elif nlpos < crpos: + # Found \n + endpos = nlpos + 1 + break + elif nlpos == crpos + 1: + # Found \r\n + endpos = crpos + 2 + break + else: + # Found \r + endpos = crpos + 1 + break + else: + # non-universal + pos = line.find(self._readnl) + if pos >= 0: + endpos = pos + len(self._readnl) + break + + if size >= 0 and len(line) >= size: + endpos = size # reached length size + break + + # No line ending seen yet - get more data' + while self._read_chunk(): + if self._decoded_chars: + break + if self._decoded_chars: + line += self._get_decoded_chars() + else: + # end of file + self._set_decoded_chars('') + self._snapshot = None + return line + + if size >= 0 and endpos > size: + endpos = size # don't exceed size + + # Rewind _decoded_chars to just after the line ending we found. + self._rewind_decoded_chars(len(line) - endpos) + return line[:endpos] + + @property + def newlines(self): + return self._decoder.newlines if self._decoder else None + + def _dealloc_warn(self, source): + if dealloc_warn := getattr(self.buffer, "_dealloc_warn", None): + dealloc_warn(source) + + +class StringIO(TextIOWrapper): + """Text I/O implementation using an in-memory buffer. + + The initial_value argument sets the value of object. The newline + argument is like the one of TextIOWrapper's constructor. + """ + + def __init__(self, initial_value="", newline="\n"): + super(StringIO, self).__init__(BytesIO(), + encoding="utf-8", + errors="surrogatepass", + newline=newline) + # Issue #5645: make universal newlines semantics the same as in the + # C version, even under Windows. + if newline is None: + self._writetranslate = False + if initial_value is not None: + if not isinstance(initial_value, str): + raise TypeError("initial_value must be str or None, not {0}" + .format(type(initial_value).__name__)) + self.write(initial_value) + self.seek(0) + + def getvalue(self): + self.flush() + decoder = self._decoder or self._get_decoder() + old_state = decoder.getstate() + decoder.reset() + try: + return decoder.decode(self.buffer.getvalue(), final=True) + finally: + decoder.setstate(old_state) + + def __repr__(self): + # TextIOWrapper tells the encoding in its repr. In StringIO, + # that's an implementation detail. + return object.__repr__(self) + + @property + def errors(self): + return None + + @property + def encoding(self): + return None + + def detach(self): + # This doesn't make sense on StringIO. + self._unsupported("detach") diff --git a/stdlib/_sitebuiltins.py b/stdlib/_sitebuiltins.py new file mode 100644 index 000000000..81b36efc6 --- /dev/null +++ b/stdlib/_sitebuiltins.py @@ -0,0 +1,91 @@ +""" +The objects used by the site module to add custom builtins. +""" + +# Those objects are almost immortal and they keep a reference to their module +# globals. Defining them in the site module would keep too many references +# alive. +# Note this means this module should also avoid keep things alive in its +# globals. + +import sys + +class Quitter(object): + def __init__(self, name, eof): + self.name = name + self.eof = eof + def __repr__(self): + return 'Use %s() or %s to exit' % (self.name, self.eof) + def __call__(self, code=None): + # Shells like IDLE catch the SystemExit, but listen when their + # stdin wrapper is closed. + try: + sys.stdin.close() + except: + pass + raise SystemExit(code) + + +class _Printer(object): + """interactive prompt objects for printing the license text, a list of + contributors and the copyright notice.""" + + MAXLINES = 23 + + def __init__(self, name, data, files=(), dirs=()): + import os + self.__name = name + self.__data = data + self.__lines = [] + self.__filenames = [os.path.join(dir, filename) + for dir in dirs + for filename in files] + + def __setup(self): + if self.__lines: + return + data = None + for filename in self.__filenames: + try: + with open(filename, encoding='utf-8') as fp: + data = fp.read() + break + except OSError: + pass + if not data: + data = self.__data + self.__lines = data.split('\n') + self.__linecnt = len(self.__lines) + + def __repr__(self): + self.__setup() + if len(self.__lines) <= self.MAXLINES: + return "\n".join(self.__lines) + else: + return "Type %s() to see the full %s text" % ((self.__name,)*2) + + def __call__(self): + from _pyrepl.pager import get_pager + self.__setup() + + pager = get_pager() + text = "\n".join(self.__lines) + pager(text, title=self.__name) + + +class _Helper(object): + """Define the builtin 'help'. + + This is a wrapper around pydoc.help that provides a helpful message + when 'help' is typed at the Python interactive prompt. + + Calling help() at the Python prompt starts an interactive help session. + Calling help(thing) prints help for the python object 'thing'. + """ + + def __repr__(self): + return "Type help() for interactive help, " \ + "or help(object) for help about object." + def __call__(self, *args, **kwds): + import pydoc + return pydoc.help(*args, **kwds) diff --git a/stdlib/importlib/__init__.py b/stdlib/importlib/__init__.py index 6b3e6d195..a7d57561e 100644 --- a/stdlib/importlib/__init__.py +++ b/stdlib/importlib/__init__.py @@ -1,49 +1,136 @@ -"""importlib: gopy-side stub. +"""A pure Python implementation of import.""" +__all__ = ['__import__', 'import_module', 'invalidate_caches', 'reload'] -CPython ships a multi-module package whose top-level __init__.py -re-exports a handful of names from ._bootstrap and ._bootstrap_external. -gopy's import system is the Go side of the runtime so the bootstrap -plumbing has no analogue here. Until the full importlib port lands, -this module exists so `import importlib` and `import importlib.machinery` -resolve for downstream consumers (notably inspect.py, which only reads -the SUFFIXES constants and all_suffixes()). +# Bootstrap help ##################################################### -CPython: Lib/importlib/__init__.py -""" +# Until bootstrapping is complete, DO NOT import any modules that attempt +# to import importlib._bootstrap (directly or indirectly). Since this +# partially initialised package would be present in sys.modules, those +# modules would get an uninitialised copy of the source version, instead +# of a fully initialised version (either the frozen one or the one +# initialised below if the frozen one is not available). +import _imp # Just the builtin component, NOT the full Python module +import sys -from . import machinery # bind importlib.machinery attribute eagerly +try: + import _frozen_importlib as _bootstrap +except ImportError: + from . import _bootstrap + _bootstrap._setup(sys, _imp) +else: + # importlib._bootstrap is the built-in import, ensure we don't create + # a second copy of the module. + _bootstrap.__name__ = 'importlib._bootstrap' + _bootstrap.__package__ = 'importlib' + try: + _bootstrap.__file__ = __file__.replace('__init__.py', '_bootstrap.py') + except NameError: + # __file__ is not guaranteed to be defined, e.g. if this code gets + # frozen by a tool like cx_Freeze. + pass + sys.modules['importlib._bootstrap'] = _bootstrap -__all__ = ['import_module', 'invalidate_caches', 'machinery', 'reload'] +try: + import _frozen_importlib_external as _bootstrap_external +except ImportError: + from . import _bootstrap_external + _bootstrap_external._set_bootstrap_module(_bootstrap) + _bootstrap._bootstrap_external = _bootstrap_external +else: + _bootstrap_external.__name__ = 'importlib._bootstrap_external' + _bootstrap_external.__package__ = 'importlib' + try: + _bootstrap_external.__file__ = __file__.replace('__init__.py', '_bootstrap_external.py') + except NameError: + # __file__ is not guaranteed to be defined, e.g. if this code gets + # frozen by a tool like cx_Freeze. + pass + sys.modules['importlib._bootstrap_external'] = _bootstrap_external + +# To simplify imports in test code +_pack_uint32 = _bootstrap_external._pack_uint32 +_unpack_uint32 = _bootstrap_external._unpack_uint32 + +# Fully bootstrapped at this point, import whatever you like, circular +# dependencies and startup overhead minimisation permitting :) + + +# Public API ######################################################### + +from ._bootstrap import __import__ + + +def invalidate_caches(): + """Call the invalidate_caches() method on all meta path finders stored in + sys.meta_path (where implemented).""" + for finder in sys.meta_path: + if hasattr(finder, 'invalidate_caches'): + finder.invalidate_caches() def import_module(name, package=None): - """Import a module by name. Mirrors importlib.import_module.""" - if package is not None and name.startswith('.'): - name = _resolve_name(name, package) - __import__(name) - import sys - return sys.modules[name] + """Import a module. + The 'package' argument is required when performing a relative import. It + specifies the package to use as the anchor point from which to resolve the + relative import to an absolute import. -def _resolve_name(name, package): + """ level = 0 - while level < len(name) and name[level] == '.': - level += 1 - if level == 0: - return name - bits = package.rsplit('.', level - 1) - if len(bits) < level: - raise ImportError("attempted relative import beyond top-level package") - base = bits[0] - return '{}.{}'.format(base, name[level:]) if name[level:] else base + if name.startswith('.'): + if not package: + raise TypeError("the 'package' argument is required to perform a " + f"relative import for {name!r}") + for character in name: + if character != '.': + break + level += 1 + return _bootstrap._gcd_import(name[level:], package, level) -def invalidate_caches(): - """No-op: gopy's import system has no path-importer cache to flush.""" - return None +_RELOADING = {} def reload(module): - """Reload a module. The actual machinery is wired up Go-side.""" - import _imp - return _imp.reload(module) + """Reload the module and return it. + + The module must have been successfully imported before. + + """ + try: + name = module.__spec__.name + except AttributeError: + try: + name = module.__name__ + except AttributeError: + raise TypeError("reload() argument must be a module") from None + + if sys.modules.get(name) is not module: + raise ImportError(f"module {name} not in sys.modules", name=name) + if name in _RELOADING: + return _RELOADING[name] + _RELOADING[name] = module + try: + parent_name = name.rpartition('.')[0] + if parent_name: + try: + parent = sys.modules[parent_name] + except KeyError: + raise ImportError(f"parent {parent_name!r} not in sys.modules", + name=parent_name) from None + else: + pkgpath = parent.__path__ + else: + pkgpath = None + target = module + spec = module.__spec__ = _bootstrap._find_spec(name, pkgpath, target) + if spec is None: + raise ModuleNotFoundError(f"spec not found for the module {name!r}", name=name) + _bootstrap._exec(spec, module) + # The module may have replaced itself in sys.modules! + return sys.modules[name] + finally: + try: + del _RELOADING[name] + except KeyError: + pass diff --git a/stdlib/importlib/_abc.py b/stdlib/importlib/_abc.py new file mode 100644 index 000000000..693b46611 --- /dev/null +++ b/stdlib/importlib/_abc.py @@ -0,0 +1,39 @@ +"""Subset of importlib.abc used to reduce importlib.util imports.""" +from . import _bootstrap +import abc + + +class Loader(metaclass=abc.ABCMeta): + + """Abstract base class for import loaders.""" + + def create_module(self, spec): + """Return a module to initialize and into which to load. + + This method should raise ImportError if anything prevents it + from creating a new module. It may return None to indicate + that the spec should create the new module. + """ + # By default, defer to default semantics for the new module. + return None + + # We don't define exec_module() here since that would break + # hasattr checks we do to support backward compatibility. + + def load_module(self, fullname): + """Return the loaded module. + + The module must be added to sys.modules and have import-related + attributes set properly. The fullname is a str. + + ImportError is raised on failure. + + This method is deprecated in favor of loader.exec_module(). If + exec_module() exists then it is used to provide a backwards-compatible + functionality for this method. + + """ + if not hasattr(self, 'exec_module'): + raise ImportError + # Warning implemented in _load_module_shim(). + return _bootstrap._load_module_shim(self, fullname) diff --git a/stdlib/importlib/_bootstrap_external.py b/stdlib/importlib/_bootstrap_external.py index 6b6686eac..6a828ae75 100644 --- a/stdlib/importlib/_bootstrap_external.py +++ b/stdlib/importlib/_bootstrap_external.py @@ -1,21 +1,43 @@ -"""Pyc-writer slice of CPython's Lib/importlib/_bootstrap_external.py. +"""Core implementation of path-based import. -The vendored file ships only the parts py_compile needs: MAGIC_NUMBER, -_pack_uint32 / _unpack_uint*, _calc_mode, _write_atomic, _classify_pyc -plus the two pyc-data builders _code_to_timestamp_pyc / -_code_to_hash_pyc. The path / loader / spec scaffolding lives in the -companion util.py stub until spec 1711 wires the full module. +This module is NOT meant to be directly imported! It has been designed such +that it can be bootstrapped into Python as the implementation of import. As +such it requires the injection of specific modules and attributes in order to +work. One should use importlib as the public-facing version of this module. -CPython: Lib/importlib/_bootstrap_external.py """ +# IMPORTANT: Whenever making changes to this module, be sure to run a top-level +# `make regen-importlib` followed by `make` in order to get the frozen version +# of the module updated. Not doing so will result in the Makefile to fail for +# all others who don't have a ./python around to freeze the module in the early +# stages of compilation. +# + +# See importlib._setup() for what is injected into the global namespace. + +# When editing this code be aware that code executed at import time CANNOT +# reference any injected objects! This includes not only global code but also +# anything specified at the class level. + +# Module injected manually by _set_bootstrap_module() +_bootstrap = None +# Import builtin modules import _imp -import marshal -import os as _os +import _io import sys +import _warnings +import marshal _MS_WINDOWS = (sys.platform == 'win32') +if _MS_WINDOWS: + import nt as _os + import winreg +else: + import posix as _os + + if _MS_WINDOWS: path_separators = ['\\', '/'] else: @@ -25,45 +47,168 @@ path_sep = path_separators[0] path_sep_tuple = tuple(path_separators) path_separators = ''.join(path_separators) +_pathseps_with_colon = {f':{s}' for s in path_separators} + + +# Bootstrap-related code ###################################################### +_CASE_INSENSITIVE_PLATFORMS_STR_KEY = 'win', +_CASE_INSENSITIVE_PLATFORMS_BYTES_KEY = 'cygwin', 'darwin', 'ios', 'tvos', 'watchos' +_CASE_INSENSITIVE_PLATFORMS = (_CASE_INSENSITIVE_PLATFORMS_BYTES_KEY + + _CASE_INSENSITIVE_PLATFORMS_STR_KEY) + + +def _make_relax_case(): + if sys.platform.startswith(_CASE_INSENSITIVE_PLATFORMS): + if sys.platform.startswith(_CASE_INSENSITIVE_PLATFORMS_STR_KEY): + key = 'PYTHONCASEOK' + else: + key = b'PYTHONCASEOK' + + def _relax_case(): + """True if filenames must be checked case-insensitively and ignore environment flags are not set.""" + return not sys.flags.ignore_environment and key in _os.environ + else: + def _relax_case(): + """True if filenames must be checked case-insensitively.""" + return False + return _relax_case + +_relax_case = _make_relax_case() -# CPython: Lib/importlib/_bootstrap_external.py:79 _pack_uint32 def _pack_uint32(x): """Convert a 32-bit integer to little-endian.""" return (int(x) & 0xFFFFFFFF).to_bytes(4, 'little') -# CPython: Lib/importlib/_bootstrap_external.py:84 _unpack_uint64 def _unpack_uint64(data): """Convert 8 bytes in little-endian to an integer.""" assert len(data) == 8 return int.from_bytes(data, 'little') - -# CPython: Lib/importlib/_bootstrap_external.py:89 _unpack_uint32 def _unpack_uint32(data): """Convert 4 bytes in little-endian to an integer.""" assert len(data) == 4 return int.from_bytes(data, 'little') - -# CPython: Lib/importlib/_bootstrap_external.py:94 _unpack_uint16 def _unpack_uint16(data): """Convert 2 bytes in little-endian to an integer.""" assert len(data) == 2 return int.from_bytes(data, 'little') -# CPython: Lib/importlib/_bootstrap_external.py:200 _write_atomic +if _MS_WINDOWS: + def _path_join(*path_parts): + """Replacement for os.path.join().""" + if not path_parts: + return "" + if len(path_parts) == 1: + return path_parts[0] + root = "" + path = [] + for new_root, tail in map(_os._path_splitroot, path_parts): + if new_root.startswith(path_sep_tuple) or new_root.endswith(path_sep_tuple): + root = new_root.rstrip(path_separators) or root + path = [path_sep + tail] + elif new_root.endswith(':'): + if root.casefold() != new_root.casefold(): + # Drive relative paths have to be resolved by the OS, so we reset the + # tail but do not add a path_sep prefix. + root = new_root + path = [tail] + else: + path.append(tail) + else: + root = new_root or root + path.append(tail) + path = [p.rstrip(path_separators) for p in path if p] + if len(path) == 1 and not path[0]: + # Avoid losing the root's trailing separator when joining with nothing + return root + path_sep + return root + path_sep.join(path) + +else: + def _path_join(*path_parts): + """Replacement for os.path.join().""" + return path_sep.join([part.rstrip(path_separators) + for part in path_parts if part]) + + +def _path_split(path): + """Replacement for os.path.split().""" + i = max(path.rfind(p) for p in path_separators) + if i < 0: + return '', path + return path[:i], path[i + 1:] + + +def _path_stat(path): + """Stat the path. + + Made a separate function to make it easier to override in experiments + (e.g. cache stat results). + + """ + return _os.stat(path) + + +def _path_is_mode_type(path, mode): + """Test whether the path is the specified mode type.""" + try: + stat_info = _path_stat(path) + except OSError: + return False + return (stat_info.st_mode & 0o170000) == mode + + +def _path_isfile(path): + """Replacement for os.path.isfile.""" + return _path_is_mode_type(path, 0o100000) + + +def _path_isdir(path): + """Replacement for os.path.isdir.""" + if not path: + path = _os.getcwd() + return _path_is_mode_type(path, 0o040000) + + +if _MS_WINDOWS: + def _path_isabs(path): + """Replacement for os.path.isabs.""" + if not path: + return False + root = _os._path_splitroot(path)[0].replace('/', '\\') + return len(root) > 1 and (root.startswith('\\\\') or root.endswith('\\')) + +else: + def _path_isabs(path): + """Replacement for os.path.isabs.""" + return path.startswith(path_separators) + + +def _path_abspath(path): + """Replacement for os.path.abspath.""" + if not _path_isabs(path): + for sep in path_separators: + path = path.removeprefix(f".{sep}") + return _path_join(_os.getcwd(), path) + else: + return path + + def _write_atomic(path, data, mode=0o666): """Best-effort function to write data to a path atomically. Be prepared to handle a FileExistsError if concurrent writing of the temporary file is attempted.""" + # id() is used to generate a pseudo-random filename. path_tmp = f'{path}.{id(path)}' fd = _os.open(path_tmp, _os.O_EXCL | _os.O_CREAT | _os.O_WRONLY, mode & 0o666) try: - with open(fd, 'wb') as file: + # We first write data to a temporary file, and then use os.replace() to + # perform an atomic rename. + with _io.open(fd, 'wb') as file: file.write(data) _os.replace(path_tmp, path) except OSError: @@ -74,21 +219,169 @@ def _write_atomic(path, data, mode=0o666): raise -# CPython: Lib/importlib/_bootstrap_external.py:224 MAGIC_NUMBER +_code_type = type(_write_atomic.__code__) + MAGIC_NUMBER = _imp.pyc_magic_number_token.to_bytes(4, 'little') _PYCACHE = '__pycache__' _OPT = 'opt-' SOURCE_SUFFIXES = ['.py'] +if _MS_WINDOWS: + SOURCE_SUFFIXES.append('.pyw') + +EXTENSION_SUFFIXES = _imp.extension_suffixes() + BYTECODE_SUFFIXES = ['.pyc'] +# Deprecated. +DEBUG_BYTECODE_SUFFIXES = OPTIMIZED_BYTECODE_SUFFIXES = BYTECODE_SUFFIXES + +def cache_from_source(path, debug_override=None, *, optimization=None): + """Given the path to a .py file, return the path to its .pyc file. + + The .py file does not need to exist; this simply returns the path to the + .pyc file calculated as if the .py file were imported. + + The 'optimization' parameter controls the presumed optimization level of + the bytecode file. If 'optimization' is not None, the string representation + of the argument is taken and verified to be alphanumeric (else ValueError + is raised). + + The debug_override parameter is deprecated. If debug_override is not None, + a True value is the same as setting 'optimization' to the empty string + while a False value is equivalent to setting 'optimization' to '1'. + + If sys.implementation.cache_tag is None then NotImplementedError is raised. + + """ + if debug_override is not None: + _warnings.warn('the debug_override parameter is deprecated; use ' + "'optimization' instead", DeprecationWarning) + if optimization is not None: + message = 'debug_override or optimization must be set to None' + raise TypeError(message) + optimization = '' if debug_override else 1 + path = _os.fspath(path) + head, tail = _path_split(path) + base, sep, rest = tail.rpartition('.') + tag = sys.implementation.cache_tag + if tag is None: + raise NotImplementedError('sys.implementation.cache_tag is None') + almost_filename = ''.join([(base if base else rest), sep, tag]) + if optimization is None: + if sys.flags.optimize == 0: + optimization = '' + else: + optimization = sys.flags.optimize + optimization = str(optimization) + if optimization != '': + if not optimization.isalnum(): + raise ValueError(f'{optimization!r} is not alphanumeric') + almost_filename = f'{almost_filename}.{_OPT}{optimization}' + filename = almost_filename + BYTECODE_SUFFIXES[0] + if sys.pycache_prefix is not None: + # We need an absolute path to the py file to avoid the possibility of + # collisions within sys.pycache_prefix, if someone has two different + # `foo/bar.py` on their system and they import both of them using the + # same sys.pycache_prefix. Let's say sys.pycache_prefix is + # `C:\Bytecode`; the idea here is that if we get `Foo\Bar`, we first + # make it absolute (`C:\Somewhere\Foo\Bar`), then make it root-relative + # (`Somewhere\Foo\Bar`), so we end up placing the bytecode file in an + # unambiguous `C:\Bytecode\Somewhere\Foo\Bar\`. + head = _path_abspath(head) + + # Strip initial drive from a Windows path. We know we have an absolute + # path here, so the second part of the check rules out a POSIX path that + # happens to contain a colon at the second character. + # Slicing avoids issues with an empty (or short) `head`. + if head[1:2] == ':' and head[0:1] not in path_separators: + head = head[2:] + + # Strip initial path separator from `head` to complete the conversion + # back to a root-relative path before joining. + return _path_join( + sys.pycache_prefix, + head.lstrip(path_separators), + filename, + ) + return _path_join(head, _PYCACHE, filename) + + +def source_from_cache(path): + """Given the path to a .pyc. file, return the path to its .py file. + + The .pyc file does not need to exist; this simply returns the path to + the .py file calculated to correspond to the .pyc file. If path does + not conform to PEP 3147/488 format, ValueError will be raised. If + sys.implementation.cache_tag is None then NotImplementedError is raised. + + """ + if sys.implementation.cache_tag is None: + raise NotImplementedError('sys.implementation.cache_tag is None') + path = _os.fspath(path) + head, pycache_filename = _path_split(path) + found_in_pycache_prefix = False + if sys.pycache_prefix is not None: + stripped_path = sys.pycache_prefix.rstrip(path_separators) + if head.startswith(stripped_path + path_sep): + head = head[len(stripped_path):] + found_in_pycache_prefix = True + if not found_in_pycache_prefix: + head, pycache = _path_split(head) + if pycache != _PYCACHE: + raise ValueError(f'{_PYCACHE} not bottom-level directory in ' + f'{path!r}') + dot_count = pycache_filename.count('.') + if dot_count not in {2, 3}: + raise ValueError(f'expected only 2 or 3 dots in {pycache_filename!r}') + elif dot_count == 3: + optimization = pycache_filename.rsplit('.', 2)[-2] + if not optimization.startswith(_OPT): + raise ValueError("optimization portion of filename does not start " + f"with {_OPT!r}") + opt_level = optimization[len(_OPT):] + if not opt_level.isalnum(): + raise ValueError(f"optimization level {optimization!r} is not an " + "alphanumeric value") + base_filename = pycache_filename.partition('.')[0] + return _path_join(head, base_filename + SOURCE_SUFFIXES[0]) + + +def _get_sourcefile(bytecode_path): + """Convert a bytecode file path to a source path (if possible). + + This function exists purely for backwards-compatibility for + PyImport_ExecCodeModuleWithFilenames() in the C API. + + """ + if len(bytecode_path) == 0: + return None + rest, _, extension = bytecode_path.rpartition('.') + if not rest or extension.lower()[-3:-1] != 'py': + return bytecode_path + try: + source_path = source_from_cache(bytecode_path) + except (NotImplementedError, ValueError): + source_path = bytecode_path[:-1] + return source_path if _path_isfile(source_path) else bytecode_path + + +def _get_cached(filename): + if filename.endswith(tuple(SOURCE_SUFFIXES)): + try: + return cache_from_source(filename) + except NotImplementedError: + pass + elif filename.endswith(tuple(BYTECODE_SUFFIXES)): + return filename + else: + return None -# CPython: Lib/importlib/_bootstrap_external.py:381 _calc_mode def _calc_mode(path): """Calculate the mode permissions for a bytecode file.""" try: - mode = _os.stat(path).st_mode + mode = _path_stat(path).st_mode except OSError: mode = 0o666 # We always ensure write access so we can update cached files @@ -97,7 +390,37 @@ def _calc_mode(path): return mode -# CPython: Lib/importlib/_bootstrap_external.py:424 _classify_pyc +def _check_name(method): + """Decorator to verify that the module being requested matches the one the + loader can handle. + + The first argument (self) must define _name which the second argument is + compared against. If the comparison fails then ImportError is raised. + + """ + def _check_name_wrapper(self, name=None, *args, **kwargs): + if name is None: + name = self.name + elif self.name != name: + raise ImportError('loader for %s cannot handle %s' % + (self.name, name), name=name) + return method(self, name, *args, **kwargs) + + # FIXME: @_check_name is used to define class methods before the + # _bootstrap module is set by _set_bootstrap_module(). + if _bootstrap is not None: + _wrap = _bootstrap._wrap + else: + def _wrap(new, old): + for replace in ['__module__', '__name__', '__qualname__', '__doc__']: + if hasattr(old, replace): + setattr(new, replace, getattr(old, replace)) + new.__dict__.update(old.__dict__) + + _wrap(_check_name_wrapper, method) + return _check_name_wrapper + + def _classify_pyc(data, name, exc_details): """Perform basic validity checking of a pyc header and return the flags field, which determines how the pyc should be further validated against the source. @@ -117,9 +440,11 @@ def _classify_pyc(data, name, exc_details): magic = data[:4] if magic != MAGIC_NUMBER: message = f'bad magic number in {name!r}: {magic!r}' + _bootstrap._verbose_message('{}', message) raise ImportError(message, **exc_details) if len(data) < 16: message = f'reached EOF while reading pyc header of {name!r}' + _bootstrap._verbose_message('{}', message) raise EOFError(message) flags = _unpack_uint32(data[4:8]) # Only the first two flags are defined. @@ -129,22 +454,50 @@ def _classify_pyc(data, name, exc_details): return flags -# CPython: Lib/importlib/_bootstrap_external.py:457 _validate_timestamp_pyc def _validate_timestamp_pyc(data, source_mtime, source_size, name, exc_details): - """Validate a pyc against the source last-modified time.""" + """Validate a pyc against the source last-modified time. + + *data* is the contents of the pyc file. (Only the first 16 bytes are + required.) + + *source_mtime* is the last modified timestamp of the source file. + + *source_size* is None or the size of the source file in bytes. + + *name* is the name of the module being imported. It is used for logging. + + *exc_details* is a dictionary passed to ImportError if it raised for + improved debugging. + + An ImportError is raised if the bytecode is stale. + + """ if _unpack_uint32(data[8:12]) != (source_mtime & 0xFFFFFFFF): message = f'bytecode is stale for {name!r}' + _bootstrap._verbose_message('{}', message) raise ImportError(message, **exc_details) if (source_size is not None and _unpack_uint32(data[12:16]) != (source_size & 0xFFFFFFFF)): raise ImportError(f'bytecode is stale for {name!r}', **exc_details) -# CPython: Lib/importlib/_bootstrap_external.py:485 _validate_hash_pyc def _validate_hash_pyc(data, source_hash, name, exc_details): """Validate a hash-based pyc by checking the real source hash against the one in the pyc header. + + *data* is the contents of the pyc file. (Only the first 16 bytes are + required.) + + *source_hash* is the importlib.util.source_hash() of the source file. + + *name* is the name of the module being imported. It is used for logging. + + *exc_details* is a dictionary passed to ImportError if it raised for + improved debugging. + + An ImportError is raised if the bytecode is stale. + """ if data[8:16] != source_hash: raise ImportError( @@ -153,7 +506,19 @@ def _validate_hash_pyc(data, source_hash, name, exc_details): ) -# CPython: Lib/importlib/_bootstrap_external.py:522 _code_to_timestamp_pyc +def _compile_bytecode(data, name=None, bytecode_path=None, source_path=None): + """Compile bytecode as found in a pyc.""" + code = marshal.loads(data) + if isinstance(code, _code_type): + _bootstrap._verbose_message('code object from {!r}', bytecode_path) + if source_path is not None: + _imp._fix_co_filename(code, source_path) + return code + else: + raise ImportError(f'Non-code object in {bytecode_path!r}', + name=name, path=bytecode_path) + + def _code_to_timestamp_pyc(code, mtime=0, source_size=0): "Produce the data for a timestamp-based pyc." data = bytearray(MAGIC_NUMBER) @@ -164,7 +529,6 @@ def _code_to_timestamp_pyc(code, mtime=0, source_size=0): return data -# CPython: Lib/importlib/_bootstrap_external.py:532 _code_to_hash_pyc def _code_to_hash_pyc(code, source_hash, checked=True): "Produce the data for a hash-based pyc." data = bytearray(MAGIC_NUMBER) @@ -176,216 +540,1023 @@ def _code_to_hash_pyc(code, source_hash, checked=True): return data -# CPython exposes this as importlib.util.source_hash. We thread it -# through the same _imp builtin _bootstrap_external relies on. -# -# CPython: Lib/importlib/util.py source_hash (re-export of _imp.source_hash) -def source_hash(source_bytes): - """Return the hash of *source_bytes* as bytes.""" - return _imp.source_hash(_RAW_MAGIC_NUMBER, source_bytes) +def decode_source(source_bytes): + """Decode bytes representing source code and return the string. + Universal newline support is used in the decoding. + """ + import tokenize # To avoid bootstrap issues. + source_bytes_readline = _io.BytesIO(source_bytes).readline + encoding = tokenize.detect_encoding(source_bytes_readline) + newline_decoder = _io.IncrementalNewlineDecoder(None, True) + return newline_decoder.decode(source_bytes.decode(encoding[0])) -# _RAW_MAGIC_NUMBER mirrors CPython: the integer form of MAGIC_NUMBER is -# fed straight back into _imp.source_hash as the SipHash key. Keeping -# the conversion in one place avoids endian-swap mistakes at call sites. -# -# CPython: Lib/importlib/_bootstrap_external.py:223 _RAW_MAGIC_NUMBER -_RAW_MAGIC_NUMBER = _imp.pyc_magic_number_token +# Module specifications ####################################################### -# CPython: Lib/importlib/_bootstrap_external.py:543 decode_source -def decode_source(source_bytes): - """Decode bytes representing source code and return the string. +_POPULATE = object() + + +def spec_from_file_location(name, location=None, *, loader=None, + submodule_search_locations=_POPULATE): + """Return a module spec based on a file location. + + To indicate that the module is a package, set + submodule_search_locations to a list of directory paths. An + empty list is sufficient, though its not otherwise useful to the + import system. + + The loader must take a spec as its only __init__() arg. - Universal newline support is used in the decoding. """ - # gopy doesn't have tokenize.detect_encoding wired through this path - # yet, so fall back to utf-8 (matching what test.support feeds in). - if isinstance(source_bytes, str): - return source_bytes - return source_bytes.decode('utf-8') + if location is None: + # The caller may simply want a partially populated location- + # oriented spec. So we set the location to a bogus value and + # fill in as much as we can. + location = '' + if hasattr(loader, 'get_filename'): + # ExecutionLoader + try: + location = loader.get_filename(name) + except ImportError: + pass + else: + location = _os.fspath(location) + try: + location = _path_abspath(location) + except OSError: + pass + + # If the location is on the filesystem, but doesn't actually exist, + # we could return None here, indicating that the location is not + # valid. However, we don't have a good way of testing since an + # indirect location (e.g. a zip file or URL) will look like a + # non-existent file relative to the filesystem. + + spec = _bootstrap.ModuleSpec(name, loader, origin=location) + spec._set_fileattr = True + + # Pick a loader if one wasn't provided. + if loader is None: + for loader_class, suffixes in _get_supported_file_loaders(): + if location.endswith(tuple(suffixes)): + loader = loader_class(name, location) + spec.loader = loader + break + else: + return None + + # Set submodule_search_paths appropriately. + if submodule_search_locations is _POPULATE: + # Check the loader. + if hasattr(loader, 'is_package'): + try: + is_package = loader.is_package(name) + except ImportError: + pass + else: + if is_package: + spec.submodule_search_locations = [] + else: + spec.submodule_search_locations = submodule_search_locations + if spec.submodule_search_locations == []: + if location: + dirname = _path_split(location)[0] + spec.submodule_search_locations.append(dirname) + return spec -# CPython: Lib/importlib/_bootstrap_external.py:912 FileLoader -class FileLoader: - """Base file loader class. - The gopy port skips the readers/finders machinery and keeps only the - file-access shape py_compile needs. +def _bless_my_loader(module_globals): + """Helper function for _warnings.c + + See GH#97850 for details. """ + # 2022-10-06(warsaw): For now, this helper is only used in _warnings.c and + # that use case only has the module globals. This function could be + # extended to accept either that or a module object. However, in the + # latter case, it would be better to raise certain exceptions when looking + # at a module, which should have either a __loader__ or __spec__.loader. + # For backward compatibility, it is possible that we'll get an empty + # dictionary for the module globals, and that cannot raise an exception. + if not isinstance(module_globals, dict): + return None + + missing = object() + loader = module_globals.get('__loader__', None) + spec = module_globals.get('__spec__', missing) + + if loader is None: + if spec is missing: + # If working with a module: + # raise AttributeError('Module globals is missing a __spec__') + return None + elif spec is None: + raise ValueError('Module globals is missing a __spec__.loader') + + spec_loader = getattr(spec, 'loader', missing) + + if spec_loader in (missing, None): + if loader is None: + exc = AttributeError if spec_loader is missing else ValueError + raise exc('Module globals is missing a __spec__.loader') + _warnings.warn( + 'Module globals is missing a __spec__.loader', + DeprecationWarning) + spec_loader = loader + + assert spec_loader is not None + if loader is not None and loader != spec_loader: + _warnings.warn( + 'Module globals; __loader__ != __spec__.loader', + DeprecationWarning) + return loader + + return spec_loader + + +# Loaders ##################################################################### + +class WindowsRegistryFinder: + + """Meta path finder for modules declared in the Windows registry.""" + + REGISTRY_KEY = ( + 'Software\\Python\\PythonCore\\{sys_version}' + '\\Modules\\{fullname}') + REGISTRY_KEY_DEBUG = ( + 'Software\\Python\\PythonCore\\{sys_version}' + '\\Modules\\{fullname}\\Debug') + DEBUG_BUILD = (_MS_WINDOWS and '_d.pyd' in EXTENSION_SUFFIXES) + + @staticmethod + def _open_registry(key): + try: + return winreg.OpenKey(winreg.HKEY_CURRENT_USER, key) + except OSError: + return winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, key) + + @classmethod + def _search_registry(cls, fullname): + if cls.DEBUG_BUILD: + registry_key = cls.REGISTRY_KEY_DEBUG + else: + registry_key = cls.REGISTRY_KEY + key = registry_key.format(fullname=fullname, + sys_version='%d.%d' % sys.version_info[:2]) + try: + with cls._open_registry(key) as hkey: + filepath = winreg.QueryValue(hkey, '') + except OSError: + return None + return filepath + + @classmethod + def find_spec(cls, fullname, path=None, target=None): + _warnings.warn('importlib.machinery.WindowsRegistryFinder is ' + 'deprecated; use site configuration instead. ' + 'Future versions of Python may not enable this ' + 'finder by default.', + DeprecationWarning, stacklevel=2) + + filepath = cls._search_registry(fullname) + if filepath is None: + return None + try: + _path_stat(filepath) + except OSError: + return None + for loader, suffixes in _get_supported_file_loaders(): + if filepath.endswith(tuple(suffixes)): + spec = _bootstrap.spec_from_loader(fullname, + loader(fullname, filepath), + origin=filepath) + return spec + + +class _LoaderBasics: + + """Base class of common code needed by both SourceLoader and + SourcelessFileLoader.""" + + def is_package(self, fullname): + """Concrete implementation of InspectLoader.is_package by checking if + the path returned by get_filename has a filename of '__init__.py'.""" + filename = _path_split(self.get_filename(fullname))[1] + filename_base = filename.rsplit('.', 1)[0] + tail_name = fullname.rpartition('.')[2] + return filename_base == '__init__' and tail_name != '__init__' + + def create_module(self, spec): + """Use default semantics for module creation.""" + + def exec_module(self, module): + """Execute the module.""" + code = self.get_code(module.__name__) + if code is None: + raise ImportError(f'cannot load module {module.__name__!r} when ' + 'get_code() returns None') + _bootstrap._call_with_frames_removed(exec, code, module.__dict__) + + def load_module(self, fullname): + """This method is deprecated.""" + # Warning implemented in _load_module_shim(). + return _bootstrap._load_module_shim(self, fullname) + + +class SourceLoader(_LoaderBasics): + + def path_mtime(self, path): + """Optional method that returns the modification time (an int) for the + specified path (a str). + + Raises OSError when the path cannot be handled. + """ + raise OSError + + def path_stats(self, path): + """Optional method returning a metadata dict for the specified + path (a str). + + Possible keys: + - 'mtime' (mandatory) is the numeric timestamp of last source + code modification; + - 'size' (optional) is the size in bytes of the source code. + + Implementing this method allows the loader to read bytecode files. + Raises OSError when the path cannot be handled. + """ + return {'mtime': self.path_mtime(path)} + + def _cache_bytecode(self, source_path, cache_path, data): + """Optional method which writes data (bytes) to a file path (a str). + + Implementing this method allows for the writing of bytecode files. + + The source path is needed in order to correctly transfer permissions + """ + # For backwards compatibility, we delegate to set_data() + return self.set_data(cache_path, data) + + def set_data(self, path, data): + """Optional method which writes data (bytes) to a file path (a str). + + Implementing this method allows for the writing of bytecode files. + """ + + + def get_source(self, fullname): + """Concrete implementation of InspectLoader.get_source.""" + path = self.get_filename(fullname) + try: + source_bytes = self.get_data(path) + except OSError as exc: + raise ImportError('source not available through get_data()', + name=fullname) from exc + return decode_source(source_bytes) + + def source_to_code(self, data, path, *, _optimize=-1): + """Return the code object compiled from source. + + The 'data' argument can be any object type that compile() supports. + """ + return _bootstrap._call_with_frames_removed(compile, data, path, 'exec', + dont_inherit=True, optimize=_optimize) + + def get_code(self, fullname): + """Concrete implementation of InspectLoader.get_code. + + Reading of bytecode requires path_stats to be implemented. To write + bytecode, set_data must also be implemented. + + """ + source_path = self.get_filename(fullname) + source_mtime = None + source_bytes = None + source_hash = None + hash_based = False + check_source = True + try: + bytecode_path = cache_from_source(source_path) + except NotImplementedError: + bytecode_path = None + else: + try: + st = self.path_stats(source_path) + except OSError: + pass + else: + source_mtime = int(st['mtime']) + try: + data = self.get_data(bytecode_path) + except OSError: + pass + else: + exc_details = { + 'name': fullname, + 'path': bytecode_path, + } + try: + flags = _classify_pyc(data, fullname, exc_details) + bytes_data = memoryview(data)[16:] + hash_based = flags & 0b1 != 0 + if hash_based: + check_source = flags & 0b10 != 0 + if (_imp.check_hash_based_pycs != 'never' and + (check_source or + _imp.check_hash_based_pycs == 'always')): + source_bytes = self.get_data(source_path) + source_hash = _imp.source_hash( + _imp.pyc_magic_number_token, + source_bytes, + ) + _validate_hash_pyc(data, source_hash, fullname, + exc_details) + else: + _validate_timestamp_pyc( + data, + source_mtime, + st['size'], + fullname, + exc_details, + ) + except (ImportError, EOFError): + pass + else: + _bootstrap._verbose_message('{} matches {}', bytecode_path, + source_path) + return _compile_bytecode(bytes_data, name=fullname, + bytecode_path=bytecode_path, + source_path=source_path) + if source_bytes is None: + source_bytes = self.get_data(source_path) + code_object = self.source_to_code(source_bytes, source_path) + _bootstrap._verbose_message('code object from {}', source_path) + if (not sys.dont_write_bytecode and bytecode_path is not None and + source_mtime is not None): + if hash_based: + if source_hash is None: + source_hash = _imp.source_hash(_imp.pyc_magic_number_token, + source_bytes) + data = _code_to_hash_pyc(code_object, source_hash, check_source) + else: + data = _code_to_timestamp_pyc(code_object, source_mtime, + len(source_bytes)) + try: + self._cache_bytecode(source_path, bytecode_path, data) + except NotImplementedError: + pass + return code_object + + +class FileLoader: + + """Base file loader class which implements the loader protocol methods that + require file system usage.""" def __init__(self, fullname, path): + """Cache the module name and the path to the file found by the + finder.""" self.name = fullname self.path = path - def get_filename(self, fullname=None): - if fullname is not None and fullname != self.name: - raise ImportError( - f'loader for {self.name} cannot handle {fullname}', - name=fullname, - ) + def __eq__(self, other): + return (self.__class__ == other.__class__ and + self.__dict__ == other.__dict__) + + def __hash__(self): + return hash(self.name) ^ hash(self.path) + + @_check_name + def load_module(self, fullname): + """Load a module from a file. + + This method is deprecated. Use exec_module() instead. + + """ + # The only reason for this method is for the name check. + # Issue #14857: Avoid the zero-argument form of super so the implementation + # of that form can be updated without breaking the frozen module. + return super(FileLoader, self).load_module(fullname) + + @_check_name + def get_filename(self, fullname): + """Return the path to the source file as found by the finder.""" return self.path def get_data(self, path): """Return the data from path as raw bytes.""" - with open(path, 'rb') as file: - return file.read() + if isinstance(self, (SourceLoader, SourcelessFileLoader, ExtensionFileLoader)): + with _io.open_code(str(path)) as file: + return file.read() + else: + with _io.FileIO(path, 'r') as file: + return file.read() + @_check_name + def get_resource_reader(self, module): + from importlib.readers import FileReader + return FileReader(self) -# CPython: Lib/importlib/_bootstrap_external.py:962 SourceFileLoader -class SourceFileLoader(FileLoader): - """Concrete loader for source files. Implements the slice of the - SourceLoader / FileLoader contract py_compile.compile() drives: - get_data, get_filename, source_to_code, path_stats. - """ - # CPython: Lib/importlib/_bootstrap_external.py:818 source_to_code - def source_to_code(self, data, path, *, _optimize=-1): - """Return the code object compiled from source.""" - return compile(data, path, 'exec', - dont_inherit=True, optimize=_optimize) +class SourceFileLoader(FileLoader, SourceLoader): + + """Concrete implementation of SourceLoader using the file system.""" - # CPython: Lib/importlib/_bootstrap_external.py:966 path_stats def path_stats(self, path): - st = _os.stat(path) + """Return the metadata for the path.""" + st = _path_stat(path) return {'mtime': st.st_mtime, 'size': st.st_size} - # CPython: Lib/importlib/_bootstrap_external.py:977 SourceFileLoader.get_code - def get_code(self, fullname=None): - if fullname is None: - fullname = self.name - source = self.get_data(self.get_filename(fullname)) - return self.source_to_code(source, self.path) + def _cache_bytecode(self, source_path, bytecode_path, data): + # Adapt between the two APIs + mode = _calc_mode(source_path) + return self.set_data(bytecode_path, data, _mode=mode) + + def set_data(self, path, data, *, _mode=0o666): + """Write bytes data to a file.""" + parent, filename = _path_split(path) + path_parts = [] + # Figure out what directories are missing. + while parent and not _path_isdir(parent): + parent, part = _path_split(parent) + path_parts.append(part) + # Create needed directories. + for part in reversed(path_parts): + parent = _path_join(parent, part) + try: + _os.mkdir(parent) + except FileExistsError: + # Probably another Python process already created the dir. + continue + except OSError as exc: + # Could be a permission error, read-only filesystem: just forget + # about writing the data. + _bootstrap._verbose_message('could not create {!r}: {!r}', + parent, exc) + return + try: + _write_atomic(path, data, _mode) + _bootstrap._verbose_message('created {!r}', path) + except OSError as exc: + # Same as above: just don't write the bytecode. + _bootstrap._verbose_message('could not create {!r}: {!r}', path, + exc) + + +class SourcelessFileLoader(FileLoader, _LoaderBasics): + + """Loader which handles sourceless file imports.""" + + def get_code(self, fullname): + path = self.get_filename(fullname) + data = self.get_data(path) + # Call _classify_pyc to do basic validation of the pyc but ignore the + # result. There's no source to check against. + exc_details = { + 'name': fullname, + 'path': path, + } + _classify_pyc(data, fullname, exc_details) + return _compile_bytecode( + memoryview(data)[16:], + name=fullname, + bytecode_path=path, + ) - # CPython: Lib/importlib/_bootstrap_external.py:886 SourceLoader.exec_module - def exec_module(self, module): - code = self.get_code(module.__name__) - exec(code, module.__dict__) + def get_source(self, fullname): + """Return None as there is no source code.""" + return None -# CPython: Lib/importlib/_bootstrap_external.py:101 _path_join -def _path_join(*path_parts): - """Replacement for os.path.join().""" - return path_sep.join([part.rstrip(path_separators) - for part in path_parts if part]) +class ExtensionFileLoader(FileLoader, _LoaderBasics): + """Loader for extension modules. -# CPython: Lib/importlib/_bootstrap_external.py:107 _path_split -def _path_split(path): - """Replacement for os.path.split().""" - i = max(path.rfind(p) for p in path_separators) - if i < 0: - return '', path - return path[:i], path[i + 1:] + The constructor is designed to work with FileFinder. + """ -# CPython: Lib/importlib/_bootstrap_external.py:202 _path_isabs -def _path_isabs(path): - """Replacement for os.path.isabs.""" - if not path: - return False - return path[0] in path_separators + def __init__(self, name, path): + self.name = name + self.path = path + def __eq__(self, other): + return (self.__class__ == other.__class__ and + self.__dict__ == other.__dict__) -# CPython: Lib/importlib/_bootstrap_external.py:217 _path_abspath -def _path_abspath(path): - """Replacement for os.path.abspath.""" - if not _path_isabs(path): - for sep in path_separators: - path = path.removeprefix(f".{sep}") - return _path_join(_os.getcwd(), path) - else: - return path + def __hash__(self): + return hash(self.name) ^ hash(self.path) + def create_module(self, spec): + """Create an uninitialized extension module""" + module = _bootstrap._call_with_frames_removed( + _imp.create_dynamic, spec) + _bootstrap._verbose_message('extension module {!r} loaded from {!r}', + spec.name, self.path) + return module -# CPython: Lib/importlib/_bootstrap_external.py:239 cache_from_source -def cache_from_source(path, debug_override=None, *, optimization=None): - """Given the path to a .py file, return the path to its .pyc file. + def exec_module(self, module): + """Initialize an extension module""" + _bootstrap._call_with_frames_removed(_imp.exec_dynamic, module) + _bootstrap._verbose_message('extension module {!r} executed from {!r}', + self.name, self.path) + + def is_package(self, fullname): + """Return True if the extension module is a package.""" + file_name = _path_split(self.path)[1] + return any(file_name == '__init__' + suffix + for suffix in EXTENSION_SUFFIXES) + + def get_code(self, fullname): + """Return None as an extension module cannot create a code object.""" + return None + + def get_source(self, fullname): + """Return None as extension modules have no source code.""" + return None + + @_check_name + def get_filename(self, fullname): + """Return the path to the source file as found by the finder.""" + return self.path - The .py file does not need to exist; this simply returns the path to the - .pyc file calculated as if the .py file were imported. - The 'optimization' parameter controls the presumed optimization level of - the bytecode file. If 'optimization' is not None, the string representation - of the argument is taken and verified to be alphanumeric (else ValueError - is raised). +class _NamespacePath: + """Represents a namespace package's path. It uses the module name + to find its parent module, and from there it looks up the parent's + __path__. When this changes, the module's own path is recomputed, + using path_finder. For top-level modules, the parent module's path + is sys.path.""" + + # When invalidate_caches() is called, this epoch is incremented + # https://bugs.python.org/issue45703 + _epoch = 0 + + def __init__(self, name, path, path_finder): + self._name = name + self._path = path + self._last_parent_path = tuple(self._get_parent_path()) + self._last_epoch = self._epoch + self._path_finder = path_finder + + def _find_parent_path_names(self): + """Returns a tuple of (parent-module-name, parent-path-attr-name)""" + parent, dot, me = self._name.rpartition('.') + if dot == '': + # This is a top-level module. sys.path contains the parent path. + return 'sys', 'path' + # Not a top-level module. parent-module.__path__ contains the + # parent path. + return parent, '__path__' + + def _get_parent_path(self): + parent_module_name, path_attr_name = self._find_parent_path_names() + return getattr(sys.modules[parent_module_name], path_attr_name) + + def _recalculate(self): + # If the parent's path has changed, recalculate _path + parent_path = tuple(self._get_parent_path()) # Make a copy + if parent_path != self._last_parent_path or self._epoch != self._last_epoch: + spec = self._path_finder(self._name, parent_path) + # Note that no changes are made if a loader is returned, but we + # do remember the new parent path + if spec is not None and spec.loader is None: + if spec.submodule_search_locations: + self._path = spec.submodule_search_locations + self._last_parent_path = parent_path # Save the copy + self._last_epoch = self._epoch + return self._path + + def __iter__(self): + return iter(self._recalculate()) + + def __getitem__(self, index): + return self._recalculate()[index] + + def __setitem__(self, index, path): + self._path[index] = path + + def __len__(self): + return len(self._recalculate()) + + def __repr__(self): + return f'_NamespacePath({self._path!r})' + + def __contains__(self, item): + return item in self._recalculate() + + def append(self, item): + self._path.append(item) + + +# This class is actually exposed publicly in a namespace package's __loader__ +# attribute, so it should be available through a non-private name. +# https://github.com/python/cpython/issues/92054 +class NamespaceLoader: + def __init__(self, name, path, path_finder): + self._path = _NamespacePath(name, path, path_finder) + + def is_package(self, fullname): + return True + + def get_source(self, fullname): + return '' + + def get_code(self, fullname): + return compile('', '', 'exec', dont_inherit=True) + + def create_module(self, spec): + """Use default semantics for module creation.""" - The debug_override parameter is deprecated. If debug_override is not None, - a True value is the same as setting 'optimization' to the empty string - while a False value is equivalent to setting 'optimization' to '1'. + def exec_module(self, module): + pass + + def load_module(self, fullname): + """Load a namespace module. + + This method is deprecated. Use exec_module() instead. + + """ + # The import system never calls this method. + _bootstrap._verbose_message('namespace module loaded with path {!r}', + self._path) + # Warning implemented in _load_module_shim(). + return _bootstrap._load_module_shim(self, fullname) + + def get_resource_reader(self, module): + from importlib.readers import NamespaceReader + return NamespaceReader(self._path) + + +# We use this exclusively in module_from_spec() for backward-compatibility. +_NamespaceLoader = NamespaceLoader + + +# Finders ##################################################################### + +class PathFinder: + + """Meta path finder for sys.path and package __path__ attributes.""" + + @staticmethod + def invalidate_caches(): + """Call the invalidate_caches() method on all path entry finders + stored in sys.path_importer_cache (where implemented).""" + for name, finder in list(sys.path_importer_cache.items()): + # Drop entry if finder name is a relative path. The current + # working directory may have changed. + if finder is None or not _path_isabs(name): + del sys.path_importer_cache[name] + elif hasattr(finder, 'invalidate_caches'): + finder.invalidate_caches() + # Also invalidate the caches of _NamespacePaths + # https://bugs.python.org/issue45703 + _NamespacePath._epoch += 1 + + from importlib.metadata import MetadataPathFinder + MetadataPathFinder.invalidate_caches() + + @staticmethod + def _path_hooks(path): + """Search sys.path_hooks for a finder for 'path'.""" + if sys.path_hooks is not None and not sys.path_hooks: + _warnings.warn('sys.path_hooks is empty', ImportWarning) + for hook in sys.path_hooks: + try: + return hook(path) + except ImportError: + continue + else: + return None + + @classmethod + def _path_importer_cache(cls, path): + """Get the finder for the path entry from sys.path_importer_cache. + + If the path entry is not in the cache, find the appropriate finder + and cache it. If no finder is available, store None. + + """ + if path == '': + try: + path = _os.getcwd() + except (FileNotFoundError, PermissionError): + # Don't cache the failure as the cwd can easily change to + # a valid directory later on. + return None + try: + finder = sys.path_importer_cache[path] + except KeyError: + finder = cls._path_hooks(path) + sys.path_importer_cache[path] = finder + return finder + + @classmethod + def _get_spec(cls, fullname, path, target=None): + """Find the loader or namespace_path for this module/package name.""" + # If this ends up being a namespace package, namespace_path is + # the list of paths that will become its __path__ + namespace_path = [] + for entry in path: + if not isinstance(entry, str): + continue + finder = cls._path_importer_cache(entry) + if finder is not None: + spec = finder.find_spec(fullname, target) + if spec is None: + continue + if spec.loader is not None: + return spec + portions = spec.submodule_search_locations + if portions is None: + raise ImportError('spec missing loader') + # This is possibly part of a namespace package. + # Remember these path entries (if any) for when we + # create a namespace package, and continue iterating + # on path. + namespace_path.extend(portions) + else: + spec = _bootstrap.ModuleSpec(fullname, None) + spec.submodule_search_locations = namespace_path + return spec + + @classmethod + def find_spec(cls, fullname, path=None, target=None): + """Try to find a spec for 'fullname' on sys.path or 'path'. + + The search is based on sys.path_hooks and sys.path_importer_cache. + """ + if path is None: + path = sys.path + spec = cls._get_spec(fullname, path, target) + if spec is None: + return None + elif spec.loader is None: + namespace_path = spec.submodule_search_locations + if namespace_path: + # We found at least one namespace path. Return a spec which + # can create the namespace package. + spec.origin = None + spec.submodule_search_locations = _NamespacePath(fullname, namespace_path, cls._get_spec) + return spec + else: + return None + else: + return spec + + @staticmethod + def find_distributions(*args, **kwargs): + """ + Find distributions. + + Return an iterable of all Distribution instances capable of + loading the metadata for packages matching ``context.name`` + (or all names if ``None`` indicated) along the paths in the list + of directories ``context.path``. + """ + from importlib.metadata import MetadataPathFinder + return MetadataPathFinder.find_distributions(*args, **kwargs) + + +class FileFinder: + + """File-based finder. + + Interactions with the file system are cached for performance, being + refreshed when the directory the finder is handling has been modified. - If sys.implementation.cache_tag is None then NotImplementedError is raised. """ - if debug_override is not None: - if optimization is not None: - message = 'debug_override or optimization must be set to None' - raise TypeError(message) - optimization = '' if debug_override else 1 - path = _os.fspath(path) - head, tail = _path_split(path) - base, sep, rest = tail.rpartition('.') - tag = sys.implementation.cache_tag - if tag is None: - raise NotImplementedError('sys.implementation.cache_tag is None') - almost_filename = ''.join([(base if base else rest), sep, tag]) - if optimization is None: - if sys.flags.optimize == 0: - optimization = '' + + def __init__(self, path, *loader_details): + """Initialize with the path to search on and a variable number of + 2-tuples containing the loader and the file suffixes the loader + recognizes.""" + loaders = [] + for loader, suffixes in loader_details: + loaders.extend((suffix, loader) for suffix in suffixes) + self._loaders = loaders + # Base (directory) path + if not path or path == '.': + self.path = _os.getcwd() else: - optimization = sys.flags.optimize - optimization = str(optimization) - if optimization != '': - if not optimization.isalnum(): - raise ValueError(f'{optimization!r} is not alphanumeric') - almost_filename = f'{almost_filename}.{_OPT}{optimization}' - filename = almost_filename + BYTECODE_SUFFIXES[0] - if getattr(sys, 'pycache_prefix', None) is not None: - head = _path_abspath(head) - if head[1:2] == ':' and head[0:1] not in path_separators: - head = head[2:] - return _path_join( - sys.pycache_prefix, - head.lstrip(path_separators), - filename, + self.path = _path_abspath(path) + self._path_mtime = -1 + self._path_cache = set() + self._relaxed_path_cache = set() + + def invalidate_caches(self): + """Invalidate the directory mtime.""" + self._path_mtime = -1 + + def _get_spec(self, loader_class, fullname, path, smsl, target): + loader = loader_class(fullname, path) + return spec_from_file_location(fullname, path, loader=loader, + submodule_search_locations=smsl) + + def find_spec(self, fullname, target=None): + """Try to find a spec for the specified module. + + Returns the matching spec, or None if not found. + """ + is_namespace = False + tail_module = fullname.rpartition('.')[2] + try: + mtime = _path_stat(self.path or _os.getcwd()).st_mtime + except OSError: + mtime = -1 + if mtime != self._path_mtime: + self._fill_cache() + self._path_mtime = mtime + # tail_module keeps the original casing, for __file__ and friends + if _relax_case(): + cache = self._relaxed_path_cache + cache_module = tail_module.lower() + else: + cache = self._path_cache + cache_module = tail_module + # Check if the module is the name of a directory (and thus a package). + if cache_module in cache: + base_path = _path_join(self.path, tail_module) + for suffix, loader_class in self._loaders: + init_filename = '__init__' + suffix + full_path = _path_join(base_path, init_filename) + if _path_isfile(full_path): + return self._get_spec(loader_class, fullname, full_path, [base_path], target) + else: + # If a namespace package, return the path if we don't + # find a module in the next section. + is_namespace = _path_isdir(base_path) + # Check for a file w/ a proper suffix exists. + for suffix, loader_class in self._loaders: + try: + full_path = _path_join(self.path, tail_module + suffix) + except ValueError: + return None + _bootstrap._verbose_message('trying {}', full_path, verbosity=2) + if cache_module + suffix in cache: + if _path_isfile(full_path): + return self._get_spec(loader_class, fullname, full_path, + None, target) + if is_namespace: + _bootstrap._verbose_message('possible namespace for {}', base_path) + spec = _bootstrap.ModuleSpec(fullname, None) + spec.submodule_search_locations = [base_path] + return spec + return None + + def _fill_cache(self): + """Fill the cache of potential modules and packages for this directory.""" + path = self.path + try: + contents = _os.listdir(path or _os.getcwd()) + except (FileNotFoundError, PermissionError, NotADirectoryError): + # Directory has either been removed, turned into a file, or made + # unreadable. + contents = [] + # We store two cached versions, to handle runtime changes of the + # PYTHONCASEOK environment variable. + if not sys.platform.startswith('win'): + self._path_cache = set(contents) + else: + # Windows users can import modules with case-insensitive file + # suffixes (for legacy reasons). Make the suffix lowercase here + # so it's done once instead of for every import. This is safe as + # the specified suffixes to check against are always specified in a + # case-sensitive manner. + lower_suffix_contents = set() + for item in contents: + name, dot, suffix = item.partition('.') + if dot: + new_name = f'{name}.{suffix.lower()}' + else: + new_name = name + lower_suffix_contents.add(new_name) + self._path_cache = lower_suffix_contents + if sys.platform.startswith(_CASE_INSENSITIVE_PLATFORMS): + self._relaxed_path_cache = {fn.lower() for fn in contents} + + @classmethod + def path_hook(cls, *loader_details): + """A class method which returns a closure to use on sys.path_hook + which will return an instance using the specified loaders and the path + called on the closure. + + If the path called on the closure is not a directory, ImportError is + raised. + + """ + def path_hook_for_FileFinder(path): + """Path hook for importlib.machinery.FileFinder.""" + if not _path_isdir(path): + raise ImportError('only directories are supported', path=path) + return cls(path, *loader_details) + + return path_hook_for_FileFinder + + def __repr__(self): + return f'FileFinder({self.path!r})' + + +class AppleFrameworkLoader(ExtensionFileLoader): + """A loader for modules that have been packaged as frameworks for + compatibility with Apple's iOS App Store policies. + """ + def create_module(self, spec): + # If the ModuleSpec has been created by the FileFinder, it will have + # been created with an origin pointing to the .fwork file. We need to + # redirect this to the location in the Frameworks folder, using the + # content of the .fwork file. + if spec.origin.endswith(".fwork"): + with _io.FileIO(spec.origin, 'r') as file: + framework_binary = file.read().decode().strip() + bundle_path = _path_split(sys.executable)[0] + spec.origin = _path_join(bundle_path, framework_binary) + + # If the loader is created based on the spec for a loaded module, the + # path will be pointing at the Framework location. If this occurs, + # get the original .fwork location to use as the module's __file__. + if self.path.endswith(".fwork"): + path = self.path + else: + with _io.FileIO(self.path + ".origin", 'r') as file: + origin = file.read().decode().strip() + bundle_path = _path_split(sys.executable)[0] + path = _path_join(bundle_path, origin) + + module = _bootstrap._call_with_frames_removed(_imp.create_dynamic, spec) + + _bootstrap._verbose_message( + "Apple framework extension module {!r} loaded from {!r} (path {!r})", + spec.name, + spec.origin, + path, ) - return _path_join(head, _PYCACHE, filename) + # Ensure that the __file__ points at the .fwork location + try: + module.__file__ = path + except AttributeError: + # Not important enough to report. + # (The error is also ignored in _bootstrap._init_module_attrs or + # import_run_extension in import.c) + pass -# CPython: Lib/importlib/_bootstrap_external.py:310 source_from_cache -def source_from_cache(path): - """Given the path to a .pyc. file, return the path to its .py file. + return module - The .pyc file does not need to exist; this simply returns the path to - the .py file calculated to correspond to the .pyc file. If path does - not conform to PEP 3147/488 format, ValueError will be raised. If - sys.implementation.cache_tag is None then NotImplementedError is raised. +# Import setup ############################################################### + +def _fix_up_module(ns, name, pathname, cpathname=None): + # This function is used by PyImport_ExecCodeModuleObject(). + loader = ns.get('__loader__') + spec = ns.get('__spec__') + if not loader: + if spec: + loader = spec.loader + elif pathname == cpathname: + loader = SourcelessFileLoader(name, pathname) + else: + loader = SourceFileLoader(name, pathname) + if not spec: + spec = spec_from_file_location(name, pathname, loader=loader) + if cpathname: + spec.cached = _path_abspath(cpathname) + try: + ns['__spec__'] = spec + ns['__loader__'] = loader + ns['__file__'] = pathname + ns['__cached__'] = cpathname + except Exception: + # Not important enough to report. + pass + + +def _get_supported_file_loaders(): + """Returns a list of file-based module loaders. + + Each item is a tuple (loader, suffixes). """ - if sys.implementation.cache_tag is None: - raise NotImplementedError('sys.implementation.cache_tag is None') - path = _os.fspath(path) - head, pycache_filename = _path_split(path) - found_in_pycache_prefix = False - if getattr(sys, 'pycache_prefix', None) is not None: - stripped_path = sys.pycache_prefix.rstrip(path_separators) - if head.startswith(stripped_path + path_sep): - head = head[len(stripped_path):] - found_in_pycache_prefix = True - if not found_in_pycache_prefix: - head, pycache = _path_split(head) - if pycache != _PYCACHE: - raise ValueError(f'{_PYCACHE} not bottom-level directory in ' - f'{path!r}') - dot_count = pycache_filename.count('.') - if dot_count not in {2, 3}: - raise ValueError(f'expected only 2 or 3 dots in {pycache_filename!r}') - elif dot_count == 3: - optimization = pycache_filename.rsplit('.', 2)[-2] - if not optimization.startswith(_OPT): - raise ValueError("optimization portion of filename does not start " - f"with {_OPT!r}") - opt_level = optimization[len(_OPT):] - if not opt_level.isalnum(): - raise ValueError(f"optimization level {opt_level!r} is not an " - "alphanumeric value") - base_filename = pycache_filename.partition('.')[0] - return _path_join(head, base_filename + SOURCE_SUFFIXES[0]) + extension_loaders = [] + if hasattr(_imp, 'create_dynamic'): + if sys.platform in {"ios", "tvos", "watchos"}: + extension_loaders = [(AppleFrameworkLoader, [ + suffix.replace(".so", ".fwork") + for suffix in _imp.extension_suffixes() + ])] + extension_loaders.append((ExtensionFileLoader, _imp.extension_suffixes())) + source = SourceFileLoader, SOURCE_SUFFIXES + bytecode = SourcelessFileLoader, BYTECODE_SUFFIXES + return extension_loaders + [source, bytecode] + + +def _set_bootstrap_module(_bootstrap_module): + global _bootstrap + _bootstrap = _bootstrap_module + + +def _install(_bootstrap_module): + """Install the path-based import components.""" + _set_bootstrap_module(_bootstrap_module) + supported_loaders = _get_supported_file_loaders() + sys.path_hooks.extend([FileFinder.path_hook(*supported_loaders)]) + sys.meta_path.append(PathFinder) diff --git a/stdlib/importlib/abc.py b/stdlib/importlib/abc.py new file mode 100644 index 000000000..1e47495f6 --- /dev/null +++ b/stdlib/importlib/abc.py @@ -0,0 +1,234 @@ +"""Abstract base classes related to import.""" +from . import _bootstrap_external +from . import machinery +try: + import _frozen_importlib +except ImportError as exc: + if exc.name != '_frozen_importlib': + raise + _frozen_importlib = None +try: + import _frozen_importlib_external +except ImportError: + _frozen_importlib_external = _bootstrap_external +from ._abc import Loader +import abc + + +__all__ = [ + 'Loader', 'MetaPathFinder', 'PathEntryFinder', + 'ResourceLoader', 'InspectLoader', 'ExecutionLoader', + 'FileLoader', 'SourceLoader', +] + + +def _register(abstract_cls, *classes): + for cls in classes: + abstract_cls.register(cls) + if _frozen_importlib is not None: + try: + frozen_cls = getattr(_frozen_importlib, cls.__name__) + except AttributeError: + frozen_cls = getattr(_frozen_importlib_external, cls.__name__) + abstract_cls.register(frozen_cls) + + +class MetaPathFinder(metaclass=abc.ABCMeta): + + """Abstract base class for import finders on sys.meta_path.""" + + # We don't define find_spec() here since that would break + # hasattr checks we do to support backward compatibility. + + def invalidate_caches(self): + """An optional method for clearing the finder's cache, if any. + This method is used by importlib.invalidate_caches(). + """ + +_register(MetaPathFinder, machinery.BuiltinImporter, machinery.FrozenImporter, + machinery.PathFinder, machinery.WindowsRegistryFinder) + + +class PathEntryFinder(metaclass=abc.ABCMeta): + + """Abstract base class for path entry finders used by PathFinder.""" + + def invalidate_caches(self): + """An optional method for clearing the finder's cache, if any. + This method is used by PathFinder.invalidate_caches(). + """ + +_register(PathEntryFinder, machinery.FileFinder) + + +class ResourceLoader(Loader): + + """Abstract base class for loaders which can return data from their + back-end storage to facilitate reading data to perform an import. + + This ABC represents one of the optional protocols specified by PEP 302. + + For directly loading resources, use TraversableResources instead. This class + primarily exists for backwards compatibility with other ABCs in this module. + + """ + + @abc.abstractmethod + def get_data(self, path): + """Abstract method which when implemented should return the bytes for + the specified path. The path must be a str.""" + raise OSError + + +class InspectLoader(Loader): + + """Abstract base class for loaders which support inspection about the + modules they can load. + + This ABC represents one of the optional protocols specified by PEP 302. + + """ + + def is_package(self, fullname): + """Optional method which when implemented should return whether the + module is a package. The fullname is a str. Returns a bool. + + Raises ImportError if the module cannot be found. + """ + raise ImportError + + def get_code(self, fullname): + """Method which returns the code object for the module. + + The fullname is a str. Returns a types.CodeType if possible, else + returns None if a code object does not make sense + (e.g. built-in module). Raises ImportError if the module cannot be + found. + """ + source = self.get_source(fullname) + if source is None: + return None + return self.source_to_code(source) + + @abc.abstractmethod + def get_source(self, fullname): + """Abstract method which should return the source code for the + module. The fullname is a str. Returns a str. + + Raises ImportError if the module cannot be found. + """ + raise ImportError + + @staticmethod + def source_to_code(data, path=''): + """Compile 'data' into a code object. + + The 'data' argument can be anything that compile() can handle. The'path' + argument should be where the data was retrieved (when applicable).""" + return compile(data, path, 'exec', dont_inherit=True) + + exec_module = _bootstrap_external._LoaderBasics.exec_module + load_module = _bootstrap_external._LoaderBasics.load_module + +_register(InspectLoader, machinery.BuiltinImporter, machinery.FrozenImporter, machinery.NamespaceLoader) + + +class ExecutionLoader(InspectLoader): + + """Abstract base class for loaders that wish to support the execution of + modules as scripts. + + This ABC represents one of the optional protocols specified in PEP 302. + + """ + + @abc.abstractmethod + def get_filename(self, fullname): + """Abstract method which should return the value that __file__ is to be + set to. + + Raises ImportError if the module cannot be found. + """ + raise ImportError + + def get_code(self, fullname): + """Method to return the code object for fullname. + + Should return None if not applicable (e.g. built-in module). + Raise ImportError if the module cannot be found. + """ + source = self.get_source(fullname) + if source is None: + return None + try: + path = self.get_filename(fullname) + except ImportError: + return self.source_to_code(source) + else: + return self.source_to_code(source, path) + +_register( + ExecutionLoader, + machinery.ExtensionFileLoader, + machinery.AppleFrameworkLoader, +) + + +class FileLoader(_bootstrap_external.FileLoader, ResourceLoader, ExecutionLoader): + + """Abstract base class partially implementing the ResourceLoader and + ExecutionLoader ABCs.""" + +_register(FileLoader, machinery.SourceFileLoader, + machinery.SourcelessFileLoader) + + +class SourceLoader(_bootstrap_external.SourceLoader, ResourceLoader, ExecutionLoader): + + """Abstract base class for loading source code (and optionally any + corresponding bytecode). + + To support loading from source code, the abstractmethods inherited from + ResourceLoader and ExecutionLoader need to be implemented. To also support + loading from bytecode, the optional methods specified directly by this ABC + is required. + + Inherited abstractmethods not implemented in this ABC: + + * ResourceLoader.get_data + * ExecutionLoader.get_filename + + """ + + def path_mtime(self, path): + """Return the (int) modification time for the path (str).""" + import warnings + warnings.warn('SourceLoader.path_mtime is deprecated in favour of ' + 'SourceLoader.path_stats().', + DeprecationWarning, stacklevel=2) + if self.path_stats.__func__ is SourceLoader.path_stats: + raise OSError + return int(self.path_stats(path)['mtime']) + + def path_stats(self, path): + """Return a metadata dict for the source pointed to by the path (str). + Possible keys: + - 'mtime' (mandatory) is the numeric timestamp of last source + code modification; + - 'size' (optional) is the size in bytes of the source code. + """ + if self.path_mtime.__func__ is SourceLoader.path_mtime: + raise OSError + return {'mtime': self.path_mtime(path)} + + def set_data(self, path, data): + """Write the bytes to the path (if possible). + + Accepts a str path and data as bytes. + + Any needed intermediary directories are to be created. If for some + reason the file cannot be written because of permissions, fail + silently. + """ + +_register(SourceLoader, machinery.SourceFileLoader) diff --git a/stdlib/importlib/machinery.py b/stdlib/importlib/machinery.py index e593f2bb9..63d726445 100644 --- a/stdlib/importlib/machinery.py +++ b/stdlib/importlib/machinery.py @@ -1,67 +1,50 @@ -"""importlib.machinery: gopy-side stub. - -The CPython module re-exports loader / finder classes plus suffix -constants from ._bootstrap and ._bootstrap_external. gopy's import -system is implemented Go-side, so most loaders and finders aren't -needed at the Python boundary; the SourceFileLoader re-export is -necessary because py_compile.compile() drives it directly. - -When a future spec lands the full importlib bootstrap port, this file -becomes the byte-equal vendor of Lib/importlib/machinery.py. - -CPython: Lib/importlib/machinery.py -""" - -from importlib._bootstrap_external import ( - FileLoader, - SourceFileLoader, +"""The machinery of importlib: finders, loaders, hooks, etc.""" + +from ._bootstrap import ModuleSpec +from ._bootstrap import BuiltinImporter +from ._bootstrap import FrozenImporter +from ._bootstrap_external import ( + SOURCE_SUFFIXES, BYTECODE_SUFFIXES, EXTENSION_SUFFIXES, + DEBUG_BYTECODE_SUFFIXES as _DEBUG_BYTECODE_SUFFIXES, + OPTIMIZED_BYTECODE_SUFFIXES as _OPTIMIZED_BYTECODE_SUFFIXES ) - -SOURCE_SUFFIXES = ['.py'] -DEBUG_BYTECODE_SUFFIXES = ['.pyc'] -OPTIMIZED_BYTECODE_SUFFIXES = ['.pyc'] -BYTECODE_SUFFIXES = DEBUG_BYTECODE_SUFFIXES -EXTENSION_SUFFIXES = [] +from ._bootstrap_external import WindowsRegistryFinder +from ._bootstrap_external import PathFinder +from ._bootstrap_external import FileFinder +from ._bootstrap_external import SourceFileLoader +from ._bootstrap_external import SourcelessFileLoader +from ._bootstrap_external import ExtensionFileLoader +from ._bootstrap_external import AppleFrameworkLoader +from ._bootstrap_external import NamespaceLoader def all_suffixes(): - """Returns a list of all recognized module suffixes for this process.""" + """Returns a list of all recognized module suffixes for this process""" return SOURCE_SUFFIXES + BYTECODE_SUFFIXES + EXTENSION_SUFFIXES -class FileFinder: - """Stub: gopy's import system is Go-side; pkgutil registers an - iterator against FileFinder but it's only consulted when the - user walks a package, which the spec 1711 test path doesn't. - """ - - def __init__(self, path, *loader_details): - self.path = path - self._loaders = loader_details - +__all__ = ['AppleFrameworkLoader', 'BYTECODE_SUFFIXES', 'BuiltinImporter', + 'DEBUG_BYTECODE_SUFFIXES', 'EXTENSION_SUFFIXES', + 'ExtensionFileLoader', 'FileFinder', 'FrozenImporter', 'ModuleSpec', + 'NamespaceLoader', 'OPTIMIZED_BYTECODE_SUFFIXES', 'PathFinder', + 'SOURCE_SUFFIXES', 'SourceFileLoader', 'SourcelessFileLoader', + 'WindowsRegistryFinder', 'all_suffixes'] -class ModuleSpec: - """Minimal stand-in for importlib.machinery.ModuleSpec.""" - def __init__(self, name, loader, *, origin=None, loader_state=None, - is_package=None): - self.name = name - self.loader = loader - self.origin = origin - self.loader_state = loader_state - self.submodule_search_locations = [] if is_package else None - self.has_location = origin is not None - self.cached = None +def __getattr__(name): + import warnings + if name == 'DEBUG_BYTECODE_SUFFIXES': + warnings.warn('importlib.machinery.DEBUG_BYTECODE_SUFFIXES is ' + 'deprecated; use importlib.machinery.BYTECODE_SUFFIXES ' + 'instead.', + DeprecationWarning, stacklevel=2) + return _DEBUG_BYTECODE_SUFFIXES + elif name == 'OPTIMIZED_BYTECODE_SUFFIXES': + warnings.warn('importlib.machinery.OPTIMIZED_BYTECODE_SUFFIXES is ' + 'deprecated; use importlib.machinery.BYTECODE_SUFFIXES ' + 'instead.', + DeprecationWarning, stacklevel=2) + return _OPTIMIZED_BYTECODE_SUFFIXES -__all__ = [ - 'BYTECODE_SUFFIXES', - 'DEBUG_BYTECODE_SUFFIXES', - 'EXTENSION_SUFFIXES', - 'FileLoader', - 'ModuleSpec', - 'OPTIMIZED_BYTECODE_SUFFIXES', - 'SOURCE_SUFFIXES', - 'SourceFileLoader', - 'all_suffixes', -] + raise AttributeError(f'module {__name__!r} has no attribute {name!r}') diff --git a/stdlib/importlib/metadata/__init__.py b/stdlib/importlib/metadata/__init__.py new file mode 100644 index 000000000..8ce62dd86 --- /dev/null +++ b/stdlib/importlib/metadata/__init__.py @@ -0,0 +1,1093 @@ +from __future__ import annotations + +import os +import re +import abc +import sys +import json +import email +import types +import inspect +import pathlib +import zipfile +import operator +import textwrap +import warnings +import functools +import itertools +import posixpath +import collections + +from . import _meta +from ._collections import FreezableDefaultDict, Pair +from ._functools import method_cache, pass_none +from ._itertools import always_iterable, unique_everseen +from ._meta import PackageMetadata, SimplePath + +from contextlib import suppress +from importlib import import_module +from importlib.abc import MetaPathFinder +from itertools import starmap +from typing import Any, Iterable, List, Mapping, Match, Optional, Set, cast + +__all__ = [ + 'Distribution', + 'DistributionFinder', + 'PackageMetadata', + 'PackageNotFoundError', + 'distribution', + 'distributions', + 'entry_points', + 'files', + 'metadata', + 'packages_distributions', + 'requires', + 'version', +] + + +class PackageNotFoundError(ModuleNotFoundError): + """The package was not found.""" + + def __str__(self) -> str: + return f"No package metadata was found for {self.name}" + + @property + def name(self) -> str: # type: ignore[override] + (name,) = self.args + return name + + +class Sectioned: + """ + A simple entry point config parser for performance + + >>> for item in Sectioned.read(Sectioned._sample): + ... print(item) + Pair(name='sec1', value='# comments ignored') + Pair(name='sec1', value='a = 1') + Pair(name='sec1', value='b = 2') + Pair(name='sec2', value='a = 2') + + >>> res = Sectioned.section_pairs(Sectioned._sample) + >>> item = next(res) + >>> item.name + 'sec1' + >>> item.value + Pair(name='a', value='1') + >>> item = next(res) + >>> item.value + Pair(name='b', value='2') + >>> item = next(res) + >>> item.name + 'sec2' + >>> item.value + Pair(name='a', value='2') + >>> list(res) + [] + """ + + _sample = textwrap.dedent( + """ + [sec1] + # comments ignored + a = 1 + b = 2 + + [sec2] + a = 2 + """ + ).lstrip() + + @classmethod + def section_pairs(cls, text): + return ( + section._replace(value=Pair.parse(section.value)) + for section in cls.read(text, filter_=cls.valid) + if section.name is not None + ) + + @staticmethod + def read(text, filter_=None): + lines = filter(filter_, map(str.strip, text.splitlines())) + name = None + for value in lines: + section_match = value.startswith('[') and value.endswith(']') + if section_match: + name = value.strip('[]') + continue + yield Pair(name, value) + + @staticmethod + def valid(line: str): + return line and not line.startswith('#') + + +class EntryPoint: + """An entry point as defined by Python packaging conventions. + + See `the packaging docs on entry points + `_ + for more information. + + >>> ep = EntryPoint( + ... name=None, group=None, value='package.module:attr [extra1, extra2]') + >>> ep.module + 'package.module' + >>> ep.attr + 'attr' + >>> ep.extras + ['extra1', 'extra2'] + """ + + pattern = re.compile( + r'(?P[\w.]+)\s*' + r'(:\s*(?P[\w.]+)\s*)?' + r'((?P\[.*\])\s*)?$' + ) + """ + A regular expression describing the syntax for an entry point, + which might look like: + + - module + - package.module + - package.module:attribute + - package.module:object.attribute + - package.module:attr [extra1, extra2] + + Other combinations are possible as well. + + The expression is lenient about whitespace around the ':', + following the attr, and following any extras. + """ + + name: str + value: str + group: str + + dist: Optional[Distribution] = None + + def __init__(self, name: str, value: str, group: str) -> None: + vars(self).update(name=name, value=value, group=group) + + def load(self) -> Any: + """Load the entry point from its definition. If only a module + is indicated by the value, return that module. Otherwise, + return the named object. + """ + match = cast(Match, self.pattern.match(self.value)) + module = import_module(match.group('module')) + attrs = filter(None, (match.group('attr') or '').split('.')) + return functools.reduce(getattr, attrs, module) + + @property + def module(self) -> str: + match = self.pattern.match(self.value) + assert match is not None + return match.group('module') + + @property + def attr(self) -> str: + match = self.pattern.match(self.value) + assert match is not None + return match.group('attr') + + @property + def extras(self) -> List[str]: + match = self.pattern.match(self.value) + assert match is not None + return re.findall(r'\w+', match.group('extras') or '') + + def _for(self, dist): + vars(self).update(dist=dist) + return self + + def matches(self, **params): + """ + EntryPoint matches the given parameters. + + >>> ep = EntryPoint(group='foo', name='bar', value='bing:bong [extra1, extra2]') + >>> ep.matches(group='foo') + True + >>> ep.matches(name='bar', value='bing:bong [extra1, extra2]') + True + >>> ep.matches(group='foo', name='other') + False + >>> ep.matches() + True + >>> ep.matches(extras=['extra1', 'extra2']) + True + >>> ep.matches(module='bing') + True + >>> ep.matches(attr='bong') + True + """ + attrs = (getattr(self, param) for param in params) + return all(map(operator.eq, params.values(), attrs)) + + def _key(self): + return self.name, self.value, self.group + + def __lt__(self, other): + return self._key() < other._key() + + def __eq__(self, other): + return self._key() == other._key() + + def __setattr__(self, name, value): + raise AttributeError("EntryPoint objects are immutable.") + + def __repr__(self): + return ( + f'EntryPoint(name={self.name!r}, value={self.value!r}, ' + f'group={self.group!r})' + ) + + def __hash__(self) -> int: + return hash(self._key()) + + +class EntryPoints(tuple): + """ + An immutable collection of selectable EntryPoint objects. + """ + + __slots__ = () + + def __getitem__(self, name: str) -> EntryPoint: # type: ignore[override] + """ + Get the EntryPoint in self matching name. + """ + try: + return next(iter(self.select(name=name))) + except StopIteration: + raise KeyError(name) + + def __repr__(self): + """ + Repr with classname and tuple constructor to + signal that we deviate from regular tuple behavior. + """ + return '%s(%r)' % (self.__class__.__name__, tuple(self)) + + def select(self, **params) -> EntryPoints: + """ + Select entry points from self that match the + given parameters (typically group and/or name). + """ + return EntryPoints(ep for ep in self if ep.matches(**params)) + + @property + def names(self) -> Set[str]: + """ + Return the set of all names of all entry points. + """ + return {ep.name for ep in self} + + @property + def groups(self) -> Set[str]: + """ + Return the set of all groups of all entry points. + """ + return {ep.group for ep in self} + + @classmethod + def _from_text_for(cls, text, dist): + return cls(ep._for(dist) for ep in cls._from_text(text)) + + @staticmethod + def _from_text(text): + return ( + EntryPoint(name=item.value.name, value=item.value.value, group=item.name) + for item in Sectioned.section_pairs(text or '') + ) + + +class PackagePath(pathlib.PurePosixPath): + """A reference to a path in a package""" + + hash: Optional[FileHash] + size: int + dist: Distribution + + def read_text(self, encoding: str = 'utf-8') -> str: # type: ignore[override] + return self.locate().read_text(encoding=encoding) + + def read_binary(self) -> bytes: + return self.locate().read_bytes() + + def locate(self) -> SimplePath: + """Return a path-like object for this path""" + return self.dist.locate_file(self) + + +class FileHash: + def __init__(self, spec: str) -> None: + self.mode, _, self.value = spec.partition('=') + + def __repr__(self) -> str: + return f'' + + +class DeprecatedNonAbstract: + # Required until Python 3.14 + def __new__(cls, *args, **kwargs): + all_names = { + name for subclass in inspect.getmro(cls) for name in vars(subclass) + } + abstract = { + name + for name in all_names + if getattr(getattr(cls, name), '__isabstractmethod__', False) + } + if abstract: + warnings.warn( + f"Unimplemented abstract methods {abstract}", + DeprecationWarning, + stacklevel=2, + ) + return super().__new__(cls) + + +class Distribution(DeprecatedNonAbstract): + """ + An abstract Python distribution package. + + Custom providers may derive from this class and define + the abstract methods to provide a concrete implementation + for their environment. Some providers may opt to override + the default implementation of some properties to bypass + the file-reading mechanism. + """ + + @abc.abstractmethod + def read_text(self, filename) -> Optional[str]: + """Attempt to load metadata file given by the name. + + Python distribution metadata is organized by blobs of text + typically represented as "files" in the metadata directory + (e.g. package-1.0.dist-info). These files include things + like: + + - METADATA: The distribution metadata including fields + like Name and Version and Description. + - entry_points.txt: A series of entry points as defined in + `the entry points spec `_. + - RECORD: A record of files according to + `this recording spec `_. + + A package may provide any set of files, including those + not listed here or none at all. + + :param filename: The name of the file in the distribution info. + :return: The text if found, otherwise None. + """ + + @abc.abstractmethod + def locate_file(self, path: str | os.PathLike[str]) -> SimplePath: + """ + Given a path to a file in this distribution, return a SimplePath + to it. + """ + + @classmethod + def from_name(cls, name: str) -> Distribution: + """Return the Distribution for the given package name. + + :param name: The name of the distribution package to search for. + :return: The Distribution instance (or subclass thereof) for the named + package, if found. + :raises PackageNotFoundError: When the named package's distribution + metadata cannot be found. + :raises ValueError: When an invalid value is supplied for name. + """ + if not name: + raise ValueError("A distribution name is required.") + try: + return next(iter(cls.discover(name=name))) + except StopIteration: + raise PackageNotFoundError(name) + + @classmethod + def discover( + cls, *, context: Optional[DistributionFinder.Context] = None, **kwargs + ) -> Iterable[Distribution]: + """Return an iterable of Distribution objects for all packages. + + Pass a ``context`` or pass keyword arguments for constructing + a context. + + :context: A ``DistributionFinder.Context`` object. + :return: Iterable of Distribution objects for packages matching + the context. + """ + if context and kwargs: + raise ValueError("cannot accept context and kwargs") + context = context or DistributionFinder.Context(**kwargs) + return itertools.chain.from_iterable( + resolver(context) for resolver in cls._discover_resolvers() + ) + + @staticmethod + def at(path: str | os.PathLike[str]) -> Distribution: + """Return a Distribution for the indicated metadata path. + + :param path: a string or path-like object + :return: a concrete Distribution instance for the path + """ + return PathDistribution(pathlib.Path(path)) + + @staticmethod + def _discover_resolvers(): + """Search the meta_path for resolvers (MetadataPathFinders).""" + declared = ( + getattr(finder, 'find_distributions', None) for finder in sys.meta_path + ) + return filter(None, declared) + + @property + def metadata(self) -> _meta.PackageMetadata: + """Return the parsed metadata for this Distribution. + + The returned object will have keys that name the various bits of + metadata per the + `Core metadata specifications `_. + + Custom providers may provide the METADATA file or override this + property. + """ + # deferred for performance (python/cpython#109829) + from . import _adapters + + opt_text = ( + self.read_text('METADATA') + or self.read_text('PKG-INFO') + # This last clause is here to support old egg-info files. Its + # effect is to just end up using the PathDistribution's self._path + # (which points to the egg-info file) attribute unchanged. + or self.read_text('') + ) + text = cast(str, opt_text) + return _adapters.Message(email.message_from_string(text)) + + @property + def name(self) -> str: + """Return the 'Name' metadata for the distribution package.""" + return self.metadata['Name'] + + @property + def _normalized_name(self): + """Return a normalized version of the name.""" + return Prepared.normalize(self.name) + + @property + def version(self) -> str: + """Return the 'Version' metadata for the distribution package.""" + return self.metadata['Version'] + + @property + def entry_points(self) -> EntryPoints: + """ + Return EntryPoints for this distribution. + + Custom providers may provide the ``entry_points.txt`` file + or override this property. + """ + return EntryPoints._from_text_for(self.read_text('entry_points.txt'), self) + + @property + def files(self) -> Optional[List[PackagePath]]: + """Files in this distribution. + + :return: List of PackagePath for this distribution or None + + Result is `None` if the metadata file that enumerates files + (i.e. RECORD for dist-info, or installed-files.txt or + SOURCES.txt for egg-info) is missing. + Result may be empty if the metadata exists but is empty. + + Custom providers are recommended to provide a "RECORD" file (in + ``read_text``) or override this property to allow for callers to be + able to resolve filenames provided by the package. + """ + + def make_file(name, hash=None, size_str=None): + result = PackagePath(name) + result.hash = FileHash(hash) if hash else None + result.size = int(size_str) if size_str else None + result.dist = self + return result + + @pass_none + def make_files(lines): + # Delay csv import, since Distribution.files is not as widely used + # as other parts of importlib.metadata + import csv + + return starmap(make_file, csv.reader(lines)) + + @pass_none + def skip_missing_files(package_paths): + return list(filter(lambda path: path.locate().exists(), package_paths)) + + return skip_missing_files( + make_files( + self._read_files_distinfo() + or self._read_files_egginfo_installed() + or self._read_files_egginfo_sources() + ) + ) + + def _read_files_distinfo(self): + """ + Read the lines of RECORD. + """ + text = self.read_text('RECORD') + return text and text.splitlines() + + def _read_files_egginfo_installed(self): + """ + Read installed-files.txt and return lines in a similar + CSV-parsable format as RECORD: each file must be placed + relative to the site-packages directory and must also be + quoted (since file names can contain literal commas). + + This file is written when the package is installed by pip, + but it might not be written for other installation methods. + Assume the file is accurate if it exists. + """ + text = self.read_text('installed-files.txt') + # Prepend the .egg-info/ subdir to the lines in this file. + # But this subdir is only available from PathDistribution's + # self._path. + subdir = getattr(self, '_path', None) + if not text or not subdir: + return + + paths = ( + (subdir / name) + .resolve() + .relative_to(self.locate_file('').resolve(), walk_up=True) + .as_posix() + for name in text.splitlines() + ) + return map('"{}"'.format, paths) + + def _read_files_egginfo_sources(self): + """ + Read SOURCES.txt and return lines in a similar CSV-parsable + format as RECORD: each file name must be quoted (since it + might contain literal commas). + + Note that SOURCES.txt is not a reliable source for what + files are installed by a package. This file is generated + for a source archive, and the files that are present + there (e.g. setup.py) may not correctly reflect the files + that are present after the package has been installed. + """ + text = self.read_text('SOURCES.txt') + return text and map('"{}"'.format, text.splitlines()) + + @property + def requires(self) -> Optional[List[str]]: + """Generated requirements specified for this Distribution""" + reqs = self._read_dist_info_reqs() or self._read_egg_info_reqs() + return reqs and list(reqs) + + def _read_dist_info_reqs(self): + return self.metadata.get_all('Requires-Dist') + + def _read_egg_info_reqs(self): + source = self.read_text('requires.txt') + return pass_none(self._deps_from_requires_text)(source) + + @classmethod + def _deps_from_requires_text(cls, source): + return cls._convert_egg_info_reqs_to_simple_reqs(Sectioned.read(source)) + + @staticmethod + def _convert_egg_info_reqs_to_simple_reqs(sections): + """ + Historically, setuptools would solicit and store 'extra' + requirements, including those with environment markers, + in separate sections. More modern tools expect each + dependency to be defined separately, with any relevant + extras and environment markers attached directly to that + requirement. This method converts the former to the + latter. See _test_deps_from_requires_text for an example. + """ + + def make_condition(name): + return name and f'extra == "{name}"' + + def quoted_marker(section): + section = section or '' + extra, sep, markers = section.partition(':') + if extra and markers: + markers = f'({markers})' + conditions = list(filter(None, [markers, make_condition(extra)])) + return '; ' + ' and '.join(conditions) if conditions else '' + + def url_req_space(req): + """ + PEP 508 requires a space between the url_spec and the quoted_marker. + Ref python/importlib_metadata#357. + """ + # '@' is uniquely indicative of a url_req. + return ' ' * ('@' in req) + + for section in sections: + space = url_req_space(section.value) + yield section.value + space + quoted_marker(section.name) + + @property + def origin(self): + return self._load_json('direct_url.json') + + def _load_json(self, filename): + return pass_none(json.loads)( + self.read_text(filename), + object_hook=lambda data: types.SimpleNamespace(**data), + ) + + +class DistributionFinder(MetaPathFinder): + """ + A MetaPathFinder capable of discovering installed distributions. + + Custom providers should implement this interface in order to + supply metadata. + """ + + class Context: + """ + Keyword arguments presented by the caller to + ``distributions()`` or ``Distribution.discover()`` + to narrow the scope of a search for distributions + in all DistributionFinders. + + Each DistributionFinder may expect any parameters + and should attempt to honor the canonical + parameters defined below when appropriate. + + This mechanism gives a custom provider a means to + solicit additional details from the caller beyond + "name" and "path" when searching distributions. + For example, imagine a provider that exposes suites + of packages in either a "public" or "private" ``realm``. + A caller may wish to query only for distributions in + a particular realm and could call + ``distributions(realm="private")`` to signal to the + custom provider to only include distributions from that + realm. + """ + + name = None + """ + Specific name for which a distribution finder should match. + A name of ``None`` matches all distributions. + """ + + def __init__(self, **kwargs): + vars(self).update(kwargs) + + @property + def path(self) -> List[str]: + """ + The sequence of directory path that a distribution finder + should search. + + Typically refers to Python installed package paths such as + "site-packages" directories and defaults to ``sys.path``. + """ + return vars(self).get('path', sys.path) + + @abc.abstractmethod + def find_distributions(self, context=Context()) -> Iterable[Distribution]: + """ + Find distributions. + + Return an iterable of all Distribution instances capable of + loading the metadata for packages matching the ``context``, + a DistributionFinder.Context instance. + """ + + +class FastPath: + """ + Micro-optimized class for searching a root for children. + + Root is a path on the file system that may contain metadata + directories either as natural directories or within a zip file. + + >>> FastPath('').children() + ['...'] + + FastPath objects are cached and recycled for any given root. + + >>> FastPath('foobar') is FastPath('foobar') + True + """ + + @functools.lru_cache() # type: ignore + def __new__(cls, root): + return super().__new__(cls) + + def __init__(self, root): + self.root = root + + def joinpath(self, child): + return pathlib.Path(self.root, child) + + def children(self): + with suppress(Exception): + return os.listdir(self.root or '.') + with suppress(Exception): + return self.zip_children() + return [] + + def zip_children(self): + zip_path = zipfile.Path(self.root) + names = zip_path.root.namelist() + self.joinpath = zip_path.joinpath + + return dict.fromkeys(child.split(posixpath.sep, 1)[0] for child in names) + + def search(self, name): + return self.lookup(self.mtime).search(name) + + @property + def mtime(self): + with suppress(OSError): + return os.stat(self.root).st_mtime + self.lookup.cache_clear() + + @method_cache + def lookup(self, mtime): + return Lookup(self) + + +class Lookup: + """ + A micro-optimized class for searching a (fast) path for metadata. + """ + + def __init__(self, path: FastPath): + """ + Calculate all of the children representing metadata. + + From the children in the path, calculate early all of the + children that appear to represent metadata (infos) or legacy + metadata (eggs). + """ + + base = os.path.basename(path.root).lower() + base_is_egg = base.endswith(".egg") + self.infos = FreezableDefaultDict(list) + self.eggs = FreezableDefaultDict(list) + + for child in path.children(): + low = child.lower() + if low.endswith((".dist-info", ".egg-info")): + # rpartition is faster than splitext and suitable for this purpose. + name = low.rpartition(".")[0].partition("-")[0] + normalized = Prepared.normalize(name) + self.infos[normalized].append(path.joinpath(child)) + elif base_is_egg and low == "egg-info": + name = base.rpartition(".")[0].partition("-")[0] + legacy_normalized = Prepared.legacy_normalize(name) + self.eggs[legacy_normalized].append(path.joinpath(child)) + + self.infos.freeze() + self.eggs.freeze() + + def search(self, prepared: Prepared): + """ + Yield all infos and eggs matching the Prepared query. + """ + infos = ( + self.infos[prepared.normalized] + if prepared + else itertools.chain.from_iterable(self.infos.values()) + ) + eggs = ( + self.eggs[prepared.legacy_normalized] + if prepared + else itertools.chain.from_iterable(self.eggs.values()) + ) + return itertools.chain(infos, eggs) + + +class Prepared: + """ + A prepared search query for metadata on a possibly-named package. + + Pre-calculates the normalization to prevent repeated operations. + + >>> none = Prepared(None) + >>> none.normalized + >>> none.legacy_normalized + >>> bool(none) + False + >>> sample = Prepared('Sample__Pkg-name.foo') + >>> sample.normalized + 'sample_pkg_name_foo' + >>> sample.legacy_normalized + 'sample__pkg_name.foo' + >>> bool(sample) + True + """ + + normalized = None + legacy_normalized = None + + def __init__(self, name: Optional[str]): + self.name = name + if name is None: + return + self.normalized = self.normalize(name) + self.legacy_normalized = self.legacy_normalize(name) + + @staticmethod + def normalize(name): + """ + PEP 503 normalization plus dashes as underscores. + """ + return re.sub(r"[-_.]+", "-", name).lower().replace('-', '_') + + @staticmethod + def legacy_normalize(name): + """ + Normalize the package name as found in the convention in + older packaging tools versions and specs. + """ + return name.lower().replace('-', '_') + + def __bool__(self): + return bool(self.name) + + +class MetadataPathFinder(DistributionFinder): + @classmethod + def find_distributions( + cls, context=DistributionFinder.Context() + ) -> Iterable[PathDistribution]: + """ + Find distributions. + + Return an iterable of all Distribution instances capable of + loading the metadata for packages matching ``context.name`` + (or all names if ``None`` indicated) along the paths in the list + of directories ``context.path``. + """ + found = cls._search_paths(context.name, context.path) + return map(PathDistribution, found) + + @classmethod + def _search_paths(cls, name, paths): + """Find metadata directories in paths heuristically.""" + prepared = Prepared(name) + return itertools.chain.from_iterable( + path.search(prepared) for path in map(FastPath, paths) + ) + + @classmethod + def invalidate_caches(cls) -> None: + FastPath.__new__.cache_clear() + + +class PathDistribution(Distribution): + def __init__(self, path: SimplePath) -> None: + """Construct a distribution. + + :param path: SimplePath indicating the metadata directory. + """ + self._path = path + + def read_text(self, filename: str | os.PathLike[str]) -> Optional[str]: + with suppress( + FileNotFoundError, + IsADirectoryError, + KeyError, + NotADirectoryError, + PermissionError, + ): + return self._path.joinpath(filename).read_text(encoding='utf-8') + + return None + + read_text.__doc__ = Distribution.read_text.__doc__ + + def locate_file(self, path: str | os.PathLike[str]) -> SimplePath: + return self._path.parent / path + + @property + def _normalized_name(self): + """ + Performance optimization: where possible, resolve the + normalized name from the file system path. + """ + stem = os.path.basename(str(self._path)) + return ( + pass_none(Prepared.normalize)(self._name_from_stem(stem)) + or super()._normalized_name + ) + + @staticmethod + def _name_from_stem(stem): + """ + >>> PathDistribution._name_from_stem('foo-3.0.egg-info') + 'foo' + >>> PathDistribution._name_from_stem('CherryPy-3.0.dist-info') + 'CherryPy' + >>> PathDistribution._name_from_stem('face.egg-info') + 'face' + >>> PathDistribution._name_from_stem('foo.bar') + """ + filename, ext = os.path.splitext(stem) + if ext not in ('.dist-info', '.egg-info'): + return + name, sep, rest = filename.partition('-') + return name + + +def distribution(distribution_name: str) -> Distribution: + """Get the ``Distribution`` instance for the named package. + + :param distribution_name: The name of the distribution package as a string. + :return: A ``Distribution`` instance (or subclass thereof). + """ + return Distribution.from_name(distribution_name) + + +def distributions(**kwargs) -> Iterable[Distribution]: + """Get all ``Distribution`` instances in the current environment. + + :return: An iterable of ``Distribution`` instances. + """ + return Distribution.discover(**kwargs) + + +def metadata(distribution_name: str) -> _meta.PackageMetadata: + """Get the metadata for the named package. + + :param distribution_name: The name of the distribution package to query. + :return: A PackageMetadata containing the parsed metadata. + """ + return Distribution.from_name(distribution_name).metadata + + +def version(distribution_name: str) -> str: + """Get the version string for the named package. + + :param distribution_name: The name of the distribution package to query. + :return: The version string for the package as defined in the package's + "Version" metadata key. + """ + return distribution(distribution_name).version + + +_unique = functools.partial( + unique_everseen, + key=operator.attrgetter('_normalized_name'), +) +""" +Wrapper for ``distributions`` to return unique distributions by name. +""" + + +def entry_points(**params) -> EntryPoints: + """Return EntryPoint objects for all installed packages. + + Pass selection parameters (group or name) to filter the + result to entry points matching those properties (see + EntryPoints.select()). + + :return: EntryPoints for all installed packages. + """ + eps = itertools.chain.from_iterable( + dist.entry_points for dist in _unique(distributions()) + ) + return EntryPoints(eps).select(**params) + + +def files(distribution_name: str) -> Optional[List[PackagePath]]: + """Return a list of files for the named package. + + :param distribution_name: The name of the distribution package to query. + :return: List of files composing the distribution. + """ + return distribution(distribution_name).files + + +def requires(distribution_name: str) -> Optional[List[str]]: + """ + Return a list of requirements for the named package. + + :return: An iterable of requirements, suitable for + packaging.requirement.Requirement. + """ + return distribution(distribution_name).requires + + +def packages_distributions() -> Mapping[str, List[str]]: + """ + Return a mapping of top-level packages to their + distributions. + + >>> import collections.abc + >>> pkgs = packages_distributions() + >>> all(isinstance(dist, collections.abc.Sequence) for dist in pkgs.values()) + True + """ + pkg_to_dist = collections.defaultdict(list) + for dist in distributions(): + for pkg in _top_level_declared(dist) or _top_level_inferred(dist): + pkg_to_dist[pkg].append(dist.metadata['Name']) + return dict(pkg_to_dist) + + +def _top_level_declared(dist): + return (dist.read_text('top_level.txt') or '').split() + + +def _topmost(name: PackagePath) -> Optional[str]: + """ + Return the top-most parent as long as there is a parent. + """ + top, *rest = name.parts + return top if rest else None + + +def _get_toplevel_name(name: PackagePath) -> str: + """ + Infer a possibly importable module name from a name presumed on + sys.path. + + >>> _get_toplevel_name(PackagePath('foo.py')) + 'foo' + >>> _get_toplevel_name(PackagePath('foo')) + 'foo' + >>> _get_toplevel_name(PackagePath('foo.pyc')) + 'foo' + >>> _get_toplevel_name(PackagePath('foo/__init__.py')) + 'foo' + >>> _get_toplevel_name(PackagePath('foo.pth')) + 'foo.pth' + >>> _get_toplevel_name(PackagePath('foo.dist-info')) + 'foo.dist-info' + """ + return _topmost(name) or ( + # python/typeshed#10328 + inspect.getmodulename(name) # type: ignore + or str(name) + ) + + +def _top_level_inferred(dist): + opt_names = set(map(_get_toplevel_name, always_iterable(dist.files))) + + def importable_name(name): + return '.' not in name + + return filter(importable_name, opt_names) diff --git a/stdlib/importlib/metadata/_adapters.py b/stdlib/importlib/metadata/_adapters.py new file mode 100644 index 000000000..591168808 --- /dev/null +++ b/stdlib/importlib/metadata/_adapters.py @@ -0,0 +1,89 @@ +import functools +import warnings +import re +import textwrap +import email.message + +from ._text import FoldedCase + + +# Do not remove prior to 2024-01-01 or Python 3.14 +_warn = functools.partial( + warnings.warn, + "Implicit None on return values is deprecated and will raise KeyErrors.", + DeprecationWarning, + stacklevel=2, +) + + +class Message(email.message.Message): + multiple_use_keys = set( + map( + FoldedCase, + [ + 'Classifier', + 'Obsoletes-Dist', + 'Platform', + 'Project-URL', + 'Provides-Dist', + 'Provides-Extra', + 'Requires-Dist', + 'Requires-External', + 'Supported-Platform', + 'Dynamic', + ], + ) + ) + """ + Keys that may be indicated multiple times per PEP 566. + """ + + def __new__(cls, orig: email.message.Message): + res = super().__new__(cls) + vars(res).update(vars(orig)) + return res + + def __init__(self, *args, **kwargs): + self._headers = self._repair_headers() + + # suppress spurious error from mypy + def __iter__(self): + return super().__iter__() + + def __getitem__(self, item): + """ + Warn users that a ``KeyError`` can be expected when a + missing key is supplied. Ref python/importlib_metadata#371. + """ + res = super().__getitem__(item) + if res is None: + _warn() + return res + + def _repair_headers(self): + def redent(value): + "Correct for RFC822 indentation" + if not value or '\n' not in value: + return value + return textwrap.dedent(' ' * 8 + value) + + headers = [(key, redent(value)) for key, value in vars(self)['_headers']] + if self._payload: + headers.append(('Description', self.get_payload())) + return headers + + @property + def json(self): + """ + Convert PackageMetadata to a JSON-compatible format + per PEP 0566. + """ + + def transform(key): + value = self.get_all(key) if key in self.multiple_use_keys else self[key] + if key == 'Keywords': + value = re.split(r'\s+', value) + tk = key.lower().replace('-', '_') + return tk, value + + return dict(map(transform, map(FoldedCase, self))) diff --git a/stdlib/importlib/metadata/_collections.py b/stdlib/importlib/metadata/_collections.py new file mode 100644 index 000000000..cf0954e1a --- /dev/null +++ b/stdlib/importlib/metadata/_collections.py @@ -0,0 +1,30 @@ +import collections + + +# from jaraco.collections 3.3 +class FreezableDefaultDict(collections.defaultdict): + """ + Often it is desirable to prevent the mutation of + a default dict after its initial construction, such + as to prevent mutation during iteration. + + >>> dd = FreezableDefaultDict(list) + >>> dd[0].append('1') + >>> dd.freeze() + >>> dd[1] + [] + >>> len(dd) + 1 + """ + + def __missing__(self, key): + return getattr(self, '_frozen', super().__missing__)(key) + + def freeze(self): + self._frozen = lambda key: self.default_factory() + + +class Pair(collections.namedtuple('Pair', 'name value')): + @classmethod + def parse(cls, text): + return cls(*map(str.strip, text.split("=", 1))) diff --git a/stdlib/importlib/metadata/_functools.py b/stdlib/importlib/metadata/_functools.py new file mode 100644 index 000000000..71f66bd03 --- /dev/null +++ b/stdlib/importlib/metadata/_functools.py @@ -0,0 +1,104 @@ +import types +import functools + + +# from jaraco.functools 3.3 +def method_cache(method, cache_wrapper=None): + """ + Wrap lru_cache to support storing the cache data in the object instances. + + Abstracts the common paradigm where the method explicitly saves an + underscore-prefixed protected property on first call and returns that + subsequently. + + >>> class MyClass: + ... calls = 0 + ... + ... @method_cache + ... def method(self, value): + ... self.calls += 1 + ... return value + + >>> a = MyClass() + >>> a.method(3) + 3 + >>> for x in range(75): + ... res = a.method(x) + >>> a.calls + 75 + + Note that the apparent behavior will be exactly like that of lru_cache + except that the cache is stored on each instance, so values in one + instance will not flush values from another, and when an instance is + deleted, so are the cached values for that instance. + + >>> b = MyClass() + >>> for x in range(35): + ... res = b.method(x) + >>> b.calls + 35 + >>> a.method(0) + 0 + >>> a.calls + 75 + + Note that if method had been decorated with ``functools.lru_cache()``, + a.calls would have been 76 (due to the cached value of 0 having been + flushed by the 'b' instance). + + Clear the cache with ``.cache_clear()`` + + >>> a.method.cache_clear() + + Same for a method that hasn't yet been called. + + >>> c = MyClass() + >>> c.method.cache_clear() + + Another cache wrapper may be supplied: + + >>> cache = functools.lru_cache(maxsize=2) + >>> MyClass.method2 = method_cache(lambda self: 3, cache_wrapper=cache) + >>> a = MyClass() + >>> a.method2() + 3 + + Caution - do not subsequently wrap the method with another decorator, such + as ``@property``, which changes the semantics of the function. + + See also + http://code.activestate.com/recipes/577452-a-memoize-decorator-for-instance-methods/ + for another implementation and additional justification. + """ + cache_wrapper = cache_wrapper or functools.lru_cache() + + def wrapper(self, *args, **kwargs): + # it's the first call, replace the method with a cached, bound method + bound_method = types.MethodType(method, self) + cached_method = cache_wrapper(bound_method) + setattr(self, method.__name__, cached_method) + return cached_method(*args, **kwargs) + + # Support cache clear even before cache has been created. + wrapper.cache_clear = lambda: None + + return wrapper + + +# From jaraco.functools 3.3 +def pass_none(func): + """ + Wrap func so it's not called if its first param is None + + >>> print_text = pass_none(print) + >>> print_text('text') + text + >>> print_text(None) + """ + + @functools.wraps(func) + def wrapper(param, *args, **kwargs): + if param is not None: + return func(param, *args, **kwargs) + + return wrapper diff --git a/stdlib/importlib/metadata/_itertools.py b/stdlib/importlib/metadata/_itertools.py new file mode 100644 index 000000000..d4ca9b914 --- /dev/null +++ b/stdlib/importlib/metadata/_itertools.py @@ -0,0 +1,73 @@ +from itertools import filterfalse + + +def unique_everseen(iterable, key=None): + "List unique elements, preserving order. Remember all elements ever seen." + # unique_everseen('AAAABBBCCDAABBB') --> A B C D + # unique_everseen('ABBCcAD', str.lower) --> A B C D + seen = set() + seen_add = seen.add + if key is None: + for element in filterfalse(seen.__contains__, iterable): + seen_add(element) + yield element + else: + for element in iterable: + k = key(element) + if k not in seen: + seen_add(k) + yield element + + +# copied from more_itertools 8.8 +def always_iterable(obj, base_type=(str, bytes)): + """If *obj* is iterable, return an iterator over its items:: + + >>> obj = (1, 2, 3) + >>> list(always_iterable(obj)) + [1, 2, 3] + + If *obj* is not iterable, return a one-item iterable containing *obj*:: + + >>> obj = 1 + >>> list(always_iterable(obj)) + [1] + + If *obj* is ``None``, return an empty iterable: + + >>> obj = None + >>> list(always_iterable(None)) + [] + + By default, binary and text strings are not considered iterable:: + + >>> obj = 'foo' + >>> list(always_iterable(obj)) + ['foo'] + + If *base_type* is set, objects for which ``isinstance(obj, base_type)`` + returns ``True`` won't be considered iterable. + + >>> obj = {'a': 1} + >>> list(always_iterable(obj)) # Iterate over the dict's keys + ['a'] + >>> list(always_iterable(obj, base_type=dict)) # Treat dicts as a unit + [{'a': 1}] + + Set *base_type* to ``None`` to avoid any special handling and treat objects + Python considers iterable as iterable: + + >>> obj = 'foo' + >>> list(always_iterable(obj, base_type=None)) + ['f', 'o', 'o'] + """ + if obj is None: + return iter(()) + + if (base_type is not None) and isinstance(obj, base_type): + return iter((obj,)) + + try: + return iter(obj) + except TypeError: + return iter((obj,)) diff --git a/stdlib/importlib/metadata/_meta.py b/stdlib/importlib/metadata/_meta.py new file mode 100644 index 000000000..1927d0f62 --- /dev/null +++ b/stdlib/importlib/metadata/_meta.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +import os +from typing import Protocol +from typing import Any, Dict, Iterator, List, Optional, TypeVar, Union, overload + + +_T = TypeVar("_T") + + +class PackageMetadata(Protocol): + def __len__(self) -> int: ... # pragma: no cover + + def __contains__(self, item: str) -> bool: ... # pragma: no cover + + def __getitem__(self, key: str) -> str: ... # pragma: no cover + + def __iter__(self) -> Iterator[str]: ... # pragma: no cover + + @overload + def get( + self, name: str, failobj: None = None + ) -> Optional[str]: ... # pragma: no cover + + @overload + def get(self, name: str, failobj: _T) -> Union[str, _T]: ... # pragma: no cover + + # overload per python/importlib_metadata#435 + @overload + def get_all( + self, name: str, failobj: None = None + ) -> Optional[List[Any]]: ... # pragma: no cover + + @overload + def get_all(self, name: str, failobj: _T) -> Union[List[Any], _T]: + """ + Return all values associated with a possibly multi-valued key. + """ + + @property + def json(self) -> Dict[str, Union[str, List[str]]]: + """ + A JSON-compatible form of the metadata. + """ + + +class SimplePath(Protocol): + """ + A minimal subset of pathlib.Path required by Distribution. + """ + + def joinpath( + self, other: Union[str, os.PathLike[str]] + ) -> SimplePath: ... # pragma: no cover + + def __truediv__( + self, other: Union[str, os.PathLike[str]] + ) -> SimplePath: ... # pragma: no cover + + @property + def parent(self) -> SimplePath: ... # pragma: no cover + + def read_text(self, encoding=None) -> str: ... # pragma: no cover + + def read_bytes(self) -> bytes: ... # pragma: no cover + + def exists(self) -> bool: ... # pragma: no cover diff --git a/stdlib/importlib/metadata/_text.py b/stdlib/importlib/metadata/_text.py new file mode 100644 index 000000000..c88cfbb23 --- /dev/null +++ b/stdlib/importlib/metadata/_text.py @@ -0,0 +1,99 @@ +import re + +from ._functools import method_cache + + +# from jaraco.text 3.5 +class FoldedCase(str): + """ + A case insensitive string class; behaves just like str + except compares equal when the only variation is case. + + >>> s = FoldedCase('hello world') + + >>> s == 'Hello World' + True + + >>> 'Hello World' == s + True + + >>> s != 'Hello World' + False + + >>> s.index('O') + 4 + + >>> s.split('O') + ['hell', ' w', 'rld'] + + >>> sorted(map(FoldedCase, ['GAMMA', 'alpha', 'Beta'])) + ['alpha', 'Beta', 'GAMMA'] + + Sequence membership is straightforward. + + >>> "Hello World" in [s] + True + >>> s in ["Hello World"] + True + + You may test for set inclusion, but candidate and elements + must both be folded. + + >>> FoldedCase("Hello World") in {s} + True + >>> s in {FoldedCase("Hello World")} + True + + String inclusion works as long as the FoldedCase object + is on the right. + + >>> "hello" in FoldedCase("Hello World") + True + + But not if the FoldedCase object is on the left: + + >>> FoldedCase('hello') in 'Hello World' + False + + In that case, use in_: + + >>> FoldedCase('hello').in_('Hello World') + True + + >>> FoldedCase('hello') > FoldedCase('Hello') + False + """ + + def __lt__(self, other): + return self.lower() < other.lower() + + def __gt__(self, other): + return self.lower() > other.lower() + + def __eq__(self, other): + return self.lower() == other.lower() + + def __ne__(self, other): + return self.lower() != other.lower() + + def __hash__(self): + return hash(self.lower()) + + def __contains__(self, other): + return super().lower().__contains__(other.lower()) + + def in_(self, other): + "Does self appear in other?" + return self in FoldedCase(other) + + # cache lower since it's likely to be called frequently. + @method_cache + def lower(self): + return super().lower() + + def index(self, sub): + return self.lower().index(sub.lower()) + + def split(self, splitter=' ', maxsplit=0): + pattern = re.compile(re.escape(splitter), re.I) + return pattern.split(self, maxsplit) diff --git a/stdlib/importlib/metadata/diagnose.py b/stdlib/importlib/metadata/diagnose.py new file mode 100644 index 000000000..e405471ac --- /dev/null +++ b/stdlib/importlib/metadata/diagnose.py @@ -0,0 +1,21 @@ +import sys + +from . import Distribution + + +def inspect(path): + print("Inspecting", path) + dists = list(Distribution.discover(path=[path])) + if not dists: + return + print("Found", len(dists), "packages:", end=' ') + print(', '.join(dist.name for dist in dists)) + + +def run(): + for path in sys.path: + inspect(path) + + +if __name__ == '__main__': + run() diff --git a/stdlib/importlib/readers.py b/stdlib/importlib/readers.py new file mode 100644 index 000000000..df7fb92e5 --- /dev/null +++ b/stdlib/importlib/readers.py @@ -0,0 +1,12 @@ +""" +Compatibility shim for .resources.readers as found on Python 3.10. + +Consumers that can rely on Python 3.11 should use the other +module directly. +""" + +from .resources.readers import ( + FileReader, ZipReader, MultiplexedPath, NamespaceReader, +) + +__all__ = ['FileReader', 'ZipReader', 'MultiplexedPath', 'NamespaceReader'] diff --git a/stdlib/importlib/resources/__init__.py b/stdlib/importlib/resources/__init__.py new file mode 100644 index 000000000..723c9f9eb --- /dev/null +++ b/stdlib/importlib/resources/__init__.py @@ -0,0 +1,43 @@ +""" +Read resources contained within a package. + +This codebase is shared between importlib.resources in the stdlib +and importlib_resources in PyPI. See +https://github.com/python/importlib_metadata/wiki/Development-Methodology +for more detail. +""" + +from ._common import ( + as_file, + files, + Package, + Anchor, +) + +from ._functional import ( + contents, + is_resource, + open_binary, + open_text, + path, + read_binary, + read_text, +) + +from .abc import ResourceReader + + +__all__ = [ + 'Package', + 'Anchor', + 'ResourceReader', + 'as_file', + 'files', + 'contents', + 'is_resource', + 'open_binary', + 'open_text', + 'path', + 'read_binary', + 'read_text', +] diff --git a/stdlib/importlib/resources/_adapters.py b/stdlib/importlib/resources/_adapters.py new file mode 100644 index 000000000..50688fbb6 --- /dev/null +++ b/stdlib/importlib/resources/_adapters.py @@ -0,0 +1,168 @@ +from contextlib import suppress +from io import TextIOWrapper + +from . import abc + + +class SpecLoaderAdapter: + """ + Adapt a package spec to adapt the underlying loader. + """ + + def __init__(self, spec, adapter=lambda spec: spec.loader): + self.spec = spec + self.loader = adapter(spec) + + def __getattr__(self, name): + return getattr(self.spec, name) + + +class TraversableResourcesLoader: + """ + Adapt a loader to provide TraversableResources. + """ + + def __init__(self, spec): + self.spec = spec + + def get_resource_reader(self, name): + return CompatibilityFiles(self.spec)._native() + + +def _io_wrapper(file, mode='r', *args, **kwargs): + if mode == 'r': + return TextIOWrapper(file, *args, **kwargs) + elif mode == 'rb': + return file + raise ValueError(f"Invalid mode value '{mode}', only 'r' and 'rb' are supported") + + +class CompatibilityFiles: + """ + Adapter for an existing or non-existent resource reader + to provide a compatibility .files(). + """ + + class SpecPath(abc.Traversable): + """ + Path tied to a module spec. + Can be read and exposes the resource reader children. + """ + + def __init__(self, spec, reader): + self._spec = spec + self._reader = reader + + def iterdir(self): + if not self._reader: + return iter(()) + return iter( + CompatibilityFiles.ChildPath(self._reader, path) + for path in self._reader.contents() + ) + + def is_file(self): + return False + + is_dir = is_file + + def joinpath(self, other): + if not self._reader: + return CompatibilityFiles.OrphanPath(other) + return CompatibilityFiles.ChildPath(self._reader, other) + + @property + def name(self): + return self._spec.name + + def open(self, mode='r', *args, **kwargs): + return _io_wrapper(self._reader.open_resource(None), mode, *args, **kwargs) + + class ChildPath(abc.Traversable): + """ + Path tied to a resource reader child. + Can be read but doesn't expose any meaningful children. + """ + + def __init__(self, reader, name): + self._reader = reader + self._name = name + + def iterdir(self): + return iter(()) + + def is_file(self): + return self._reader.is_resource(self.name) + + def is_dir(self): + return not self.is_file() + + def joinpath(self, other): + return CompatibilityFiles.OrphanPath(self.name, other) + + @property + def name(self): + return self._name + + def open(self, mode='r', *args, **kwargs): + return _io_wrapper( + self._reader.open_resource(self.name), mode, *args, **kwargs + ) + + class OrphanPath(abc.Traversable): + """ + Orphan path, not tied to a module spec or resource reader. + Can't be read and doesn't expose any meaningful children. + """ + + def __init__(self, *path_parts): + if len(path_parts) < 1: + raise ValueError('Need at least one path part to construct a path') + self._path = path_parts + + def iterdir(self): + return iter(()) + + def is_file(self): + return False + + is_dir = is_file + + def joinpath(self, other): + return CompatibilityFiles.OrphanPath(*self._path, other) + + @property + def name(self): + return self._path[-1] + + def open(self, mode='r', *args, **kwargs): + raise FileNotFoundError("Can't open orphan path") + + def __init__(self, spec): + self.spec = spec + + @property + def _reader(self): + with suppress(AttributeError): + return self.spec.loader.get_resource_reader(self.spec.name) + + def _native(self): + """ + Return the native reader if it supports files(). + """ + reader = self._reader + return reader if hasattr(reader, 'files') else self + + def __getattr__(self, attr): + return getattr(self._reader, attr) + + def files(self): + return CompatibilityFiles.SpecPath(self.spec, self._reader) + + +def wrap_spec(package): + """ + Construct a package spec with traversable compatibility + on the spec/loader/reader. + """ + return SpecLoaderAdapter(package.__spec__, TraversableResourcesLoader) diff --git a/stdlib/importlib/resources/_common.py b/stdlib/importlib/resources/_common.py new file mode 100644 index 000000000..4e9014c45 --- /dev/null +++ b/stdlib/importlib/resources/_common.py @@ -0,0 +1,211 @@ +import os +import pathlib +import tempfile +import functools +import contextlib +import types +import importlib +import inspect +import warnings +import itertools + +from typing import Union, Optional, cast +from .abc import ResourceReader, Traversable + +Package = Union[types.ModuleType, str] +Anchor = Package + + +def package_to_anchor(func): + """ + Replace 'package' parameter as 'anchor' and warn about the change. + + Other errors should fall through. + + >>> files('a', 'b') + Traceback (most recent call last): + TypeError: files() takes from 0 to 1 positional arguments but 2 were given + + Remove this compatibility in Python 3.14. + """ + undefined = object() + + @functools.wraps(func) + def wrapper(anchor=undefined, package=undefined): + if package is not undefined: + if anchor is not undefined: + return func(anchor, package) + warnings.warn( + "First parameter to files is renamed to 'anchor'", + DeprecationWarning, + stacklevel=2, + ) + return func(package) + elif anchor is undefined: + return func() + return func(anchor) + + return wrapper + + +@package_to_anchor +def files(anchor: Optional[Anchor] = None) -> Traversable: + """ + Get a Traversable resource for an anchor. + """ + return from_package(resolve(anchor)) + + +def get_resource_reader(package: types.ModuleType) -> Optional[ResourceReader]: + """ + Return the package's loader if it's a ResourceReader. + """ + # We can't use + # a issubclass() check here because apparently abc.'s __subclasscheck__() + # hook wants to create a weak reference to the object, but + # zipimport.zipimporter does not support weak references, resulting in a + # TypeError. That seems terrible. + spec = package.__spec__ + reader = getattr(spec.loader, 'get_resource_reader', None) # type: ignore[union-attr] + if reader is None: + return None + return reader(spec.name) # type: ignore[union-attr] + + +@functools.singledispatch +def resolve(cand: Optional[Anchor]) -> types.ModuleType: + return cast(types.ModuleType, cand) + + +@resolve.register +def _(cand: str) -> types.ModuleType: + return importlib.import_module(cand) + + +@resolve.register +def _(cand: None) -> types.ModuleType: + return resolve(_infer_caller().f_globals['__name__']) + + +def _infer_caller(): + """ + Walk the stack and find the frame of the first caller not in this module. + """ + + def is_this_file(frame_info): + return frame_info.filename == stack[0].filename + + def is_wrapper(frame_info): + return frame_info.function == 'wrapper' + + stack = inspect.stack() + not_this_file = itertools.filterfalse(is_this_file, stack) + # also exclude 'wrapper' due to singledispatch in the call stack + callers = itertools.filterfalse(is_wrapper, not_this_file) + return next(callers).frame + + +def from_package(package: types.ModuleType): + """ + Return a Traversable object for the given package. + + """ + # deferred for performance (python/cpython#109829) + from ._adapters import wrap_spec + + spec = wrap_spec(package) + reader = spec.loader.get_resource_reader(spec.name) + return reader.files() + + +@contextlib.contextmanager +def _tempfile( + reader, + suffix='', + # gh-93353: Keep a reference to call os.remove() in late Python + # finalization. + *, + _os_remove=os.remove, +): + # Not using tempfile.NamedTemporaryFile as it leads to deeper 'try' + # blocks due to the need to close the temporary file to work on Windows + # properly. + fd, raw_path = tempfile.mkstemp(suffix=suffix) + try: + try: + os.write(fd, reader()) + finally: + os.close(fd) + del reader + yield pathlib.Path(raw_path) + finally: + try: + _os_remove(raw_path) + except FileNotFoundError: + pass + + +def _temp_file(path): + return _tempfile(path.read_bytes, suffix=path.name) + + +def _is_present_dir(path: Traversable) -> bool: + """ + Some Traversables implement ``is_dir()`` to raise an + exception (i.e. ``FileNotFoundError``) when the + directory doesn't exist. This function wraps that call + to always return a boolean and only return True + if there's a dir and it exists. + """ + with contextlib.suppress(FileNotFoundError): + return path.is_dir() + return False + + +@functools.singledispatch +def as_file(path): + """ + Given a Traversable object, return that object as a + path on the local file system in a context manager. + """ + return _temp_dir(path) if _is_present_dir(path) else _temp_file(path) + + +@as_file.register(pathlib.Path) +@contextlib.contextmanager +def _(path): + """ + Degenerate behavior for pathlib.Path objects. + """ + yield path + + +@contextlib.contextmanager +def _temp_path(dir: tempfile.TemporaryDirectory): + """ + Wrap tempfile.TemporaryDirectory to return a pathlib object. + """ + with dir as result: + yield pathlib.Path(result) + + +@contextlib.contextmanager +def _temp_dir(path): + """ + Given a traversable dir, recursively replicate the whole tree + to the file system in a context manager. + """ + assert path.is_dir() + with _temp_path(tempfile.TemporaryDirectory()) as temp_dir: + yield _write_contents(temp_dir, path) + + +def _write_contents(target, source): + child = target.joinpath(source.name) + if source.is_dir(): + child.mkdir() + for item in source.iterdir(): + _write_contents(child, item) + else: + child.write_bytes(source.read_bytes()) + return child diff --git a/stdlib/importlib/resources/_functional.py b/stdlib/importlib/resources/_functional.py new file mode 100644 index 000000000..f59416f2d --- /dev/null +++ b/stdlib/importlib/resources/_functional.py @@ -0,0 +1,81 @@ +"""Simplified function-based API for importlib.resources""" + +import warnings + +from ._common import files, as_file + + +_MISSING = object() + + +def open_binary(anchor, *path_names): + """Open for binary reading the *resource* within *package*.""" + return _get_resource(anchor, path_names).open('rb') + + +def open_text(anchor, *path_names, encoding=_MISSING, errors='strict'): + """Open for text reading the *resource* within *package*.""" + encoding = _get_encoding_arg(path_names, encoding) + resource = _get_resource(anchor, path_names) + return resource.open('r', encoding=encoding, errors=errors) + + +def read_binary(anchor, *path_names): + """Read and return contents of *resource* within *package* as bytes.""" + return _get_resource(anchor, path_names).read_bytes() + + +def read_text(anchor, *path_names, encoding=_MISSING, errors='strict'): + """Read and return contents of *resource* within *package* as str.""" + encoding = _get_encoding_arg(path_names, encoding) + resource = _get_resource(anchor, path_names) + return resource.read_text(encoding=encoding, errors=errors) + + +def path(anchor, *path_names): + """Return the path to the *resource* as an actual file system path.""" + return as_file(_get_resource(anchor, path_names)) + + +def is_resource(anchor, *path_names): + """Return ``True`` if there is a resource named *name* in the package, + + Otherwise returns ``False``. + """ + return _get_resource(anchor, path_names).is_file() + + +def contents(anchor, *path_names): + """Return an iterable over the named resources within the package. + + The iterable returns :class:`str` resources (e.g. files). + The iterable does not recurse into subdirectories. + """ + warnings.warn( + "importlib.resources.contents is deprecated. " + "Use files(anchor).iterdir() instead.", + DeprecationWarning, + stacklevel=1, + ) + return (resource.name for resource in _get_resource(anchor, path_names).iterdir()) + + +def _get_encoding_arg(path_names, encoding): + # For compatibility with versions where *encoding* was a positional + # argument, it needs to be given explicitly when there are multiple + # *path_names*. + # This limitation can be removed in Python 3.15. + if encoding is _MISSING: + if len(path_names) > 1: + raise TypeError( + "'encoding' argument required with multiple path names", + ) + else: + return 'utf-8' + return encoding + + +def _get_resource(anchor, path_names): + if anchor is None: + raise TypeError("anchor must be module or string, got None") + return files(anchor).joinpath(*path_names) diff --git a/stdlib/importlib/resources/_itertools.py b/stdlib/importlib/resources/_itertools.py new file mode 100644 index 000000000..7b775ef5a --- /dev/null +++ b/stdlib/importlib/resources/_itertools.py @@ -0,0 +1,38 @@ +# from more_itertools 9.0 +def only(iterable, default=None, too_long=None): + """If *iterable* has only one item, return it. + If it has zero items, return *default*. + If it has more than one item, raise the exception given by *too_long*, + which is ``ValueError`` by default. + >>> only([], default='missing') + 'missing' + >>> only([1]) + 1 + >>> only([1, 2]) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + ValueError: Expected exactly one item in iterable, but got 1, 2, + and perhaps more.' + >>> only([1, 2], too_long=TypeError) # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + TypeError + Note that :func:`only` attempts to advance *iterable* twice to ensure there + is only one item. See :func:`spy` or :func:`peekable` to check + iterable contents less destructively. + """ + it = iter(iterable) + first_value = next(it, default) + + try: + second_value = next(it) + except StopIteration: + pass + else: + msg = ( + 'Expected exactly one item in iterable, but got {!r}, {!r}, ' + 'and perhaps more.'.format(first_value, second_value) + ) + raise too_long or ValueError(msg) + + return first_value diff --git a/stdlib/importlib/resources/abc.py b/stdlib/importlib/resources/abc.py new file mode 100644 index 000000000..6750a7aaf --- /dev/null +++ b/stdlib/importlib/resources/abc.py @@ -0,0 +1,173 @@ +import abc +import io +import itertools +import os +import pathlib +from typing import Any, BinaryIO, Iterable, Iterator, NoReturn, Text, Optional +from typing import runtime_checkable, Protocol +from typing import Union + + +StrPath = Union[str, os.PathLike[str]] + +__all__ = ["ResourceReader", "Traversable", "TraversableResources"] + + +class ResourceReader(metaclass=abc.ABCMeta): + """Abstract base class for loaders to provide resource reading support.""" + + @abc.abstractmethod + def open_resource(self, resource: Text) -> BinaryIO: + """Return an opened, file-like object for binary reading. + + The 'resource' argument is expected to represent only a file name. + If the resource cannot be found, FileNotFoundError is raised. + """ + # This deliberately raises FileNotFoundError instead of + # NotImplementedError so that if this method is accidentally called, + # it'll still do the right thing. + raise FileNotFoundError + + @abc.abstractmethod + def resource_path(self, resource: Text) -> Text: + """Return the file system path to the specified resource. + + The 'resource' argument is expected to represent only a file name. + If the resource does not exist on the file system, raise + FileNotFoundError. + """ + # This deliberately raises FileNotFoundError instead of + # NotImplementedError so that if this method is accidentally called, + # it'll still do the right thing. + raise FileNotFoundError + + @abc.abstractmethod + def is_resource(self, path: Text) -> bool: + """Return True if the named 'path' is a resource. + + Files are resources, directories are not. + """ + raise FileNotFoundError + + @abc.abstractmethod + def contents(self) -> Iterable[str]: + """Return an iterable of entries in `package`.""" + raise FileNotFoundError + + +class TraversalError(Exception): + pass + + +@runtime_checkable +class Traversable(Protocol): + """ + An object with a subset of pathlib.Path methods suitable for + traversing directories and opening files. + + Any exceptions that occur when accessing the backing resource + may propagate unaltered. + """ + + @abc.abstractmethod + def iterdir(self) -> Iterator["Traversable"]: + """ + Yield Traversable objects in self + """ + + def read_bytes(self) -> bytes: + """ + Read contents of self as bytes + """ + with self.open('rb') as strm: + return strm.read() + + def read_text(self, encoding: Optional[str] = None) -> str: + """ + Read contents of self as text + """ + with self.open(encoding=encoding) as strm: + return strm.read() + + @abc.abstractmethod + def is_dir(self) -> bool: + """ + Return True if self is a directory + """ + + @abc.abstractmethod + def is_file(self) -> bool: + """ + Return True if self is a file + """ + + def joinpath(self, *descendants: StrPath) -> "Traversable": + """ + Return Traversable resolved with any descendants applied. + + Each descendant should be a path segment relative to self + and each may contain multiple levels separated by + ``posixpath.sep`` (``/``). + """ + if not descendants: + return self + names = itertools.chain.from_iterable( + path.parts for path in map(pathlib.PurePosixPath, descendants) + ) + target = next(names) + matches = ( + traversable for traversable in self.iterdir() if traversable.name == target + ) + try: + match = next(matches) + except StopIteration: + raise TraversalError( + "Target not found during traversal.", target, list(names) + ) + return match.joinpath(*names) + + def __truediv__(self, child: StrPath) -> "Traversable": + """ + Return Traversable child in self + """ + return self.joinpath(child) + + @abc.abstractmethod + def open(self, mode='r', *args, **kwargs): + """ + mode may be 'r' or 'rb' to open as text or binary. Return a handle + suitable for reading (same as pathlib.Path.open). + + When opening as text, accepts encoding parameters such as those + accepted by io.TextIOWrapper. + """ + + @property + @abc.abstractmethod + def name(self) -> str: + """ + The base name of this object without any parent references. + """ + + +class TraversableResources(ResourceReader): + """ + The required interface for providing traversable + resources. + """ + + @abc.abstractmethod + def files(self) -> "Traversable": + """Return a Traversable object for the loaded package.""" + + def open_resource(self, resource: StrPath) -> io.BufferedReader: + return self.files().joinpath(resource).open('rb') + + def resource_path(self, resource: Any) -> NoReturn: + raise FileNotFoundError(resource) + + def is_resource(self, path: StrPath) -> bool: + return self.files().joinpath(path).is_file() + + def contents(self) -> Iterator[str]: + return (item.name for item in self.files().iterdir()) diff --git a/stdlib/importlib/resources/readers.py b/stdlib/importlib/resources/readers.py new file mode 100644 index 000000000..70fc7e2b9 --- /dev/null +++ b/stdlib/importlib/resources/readers.py @@ -0,0 +1,203 @@ +from __future__ import annotations + +import collections +import contextlib +import itertools +import pathlib +import operator +import re +import warnings +import zipfile +from collections.abc import Iterator + +from . import abc + +from ._itertools import only + + +def remove_duplicates(items): + return iter(collections.OrderedDict.fromkeys(items)) + + +class FileReader(abc.TraversableResources): + def __init__(self, loader): + self.path = pathlib.Path(loader.path).parent + + def resource_path(self, resource): + """ + Return the file system path to prevent + `resources.path()` from creating a temporary + copy. + """ + return str(self.path.joinpath(resource)) + + def files(self): + return self.path + + +class ZipReader(abc.TraversableResources): + def __init__(self, loader, module): + self.prefix = loader.prefix.replace('\\', '/') + if loader.is_package(module): + _, _, name = module.rpartition('.') + self.prefix += name + '/' + self.archive = loader.archive + + def open_resource(self, resource): + try: + return super().open_resource(resource) + except KeyError as exc: + raise FileNotFoundError(exc.args[0]) + + def is_resource(self, path): + """ + Workaround for `zipfile.Path.is_file` returning true + for non-existent paths. + """ + target = self.files().joinpath(path) + return target.is_file() and target.exists() + + def files(self): + return zipfile.Path(self.archive, self.prefix) + + +class MultiplexedPath(abc.Traversable): + """ + Given a series of Traversable objects, implement a merged + version of the interface across all objects. Useful for + namespace packages which may be multihomed at a single + name. + """ + + def __init__(self, *paths): + self._paths = list(map(_ensure_traversable, remove_duplicates(paths))) + if not self._paths: + message = 'MultiplexedPath must contain at least one path' + raise FileNotFoundError(message) + if not all(path.is_dir() for path in self._paths): + raise NotADirectoryError('MultiplexedPath only supports directories') + + def iterdir(self): + children = (child for path in self._paths for child in path.iterdir()) + by_name = operator.attrgetter('name') + groups = itertools.groupby(sorted(children, key=by_name), key=by_name) + return map(self._follow, (locs for name, locs in groups)) + + def read_bytes(self): + raise FileNotFoundError(f'{self} is not a file') + + def read_text(self, *args, **kwargs): + raise FileNotFoundError(f'{self} is not a file') + + def is_dir(self): + return True + + def is_file(self): + return False + + def joinpath(self, *descendants): + try: + return super().joinpath(*descendants) + except abc.TraversalError: + # One of the paths did not resolve (a directory does not exist). + # Just return something that will not exist. + return self._paths[0].joinpath(*descendants) + + @classmethod + def _follow(cls, children): + """ + Construct a MultiplexedPath if needed. + + If children contains a sole element, return it. + Otherwise, return a MultiplexedPath of the items. + Unless one of the items is not a Directory, then return the first. + """ + subdirs, one_dir, one_file = itertools.tee(children, 3) + + try: + return only(one_dir) + except ValueError: + try: + return cls(*subdirs) + except NotADirectoryError: + return next(one_file) + + def open(self, *args, **kwargs): + raise FileNotFoundError(f'{self} is not a file') + + @property + def name(self): + return self._paths[0].name + + def __repr__(self): + paths = ', '.join(f"'{path}'" for path in self._paths) + return f'MultiplexedPath({paths})' + + +class NamespaceReader(abc.TraversableResources): + def __init__(self, namespace_path): + if 'NamespacePath' not in str(namespace_path): + raise ValueError('Invalid path') + self.path = MultiplexedPath(*filter(bool, map(self._resolve, namespace_path))) + + @classmethod + def _resolve(cls, path_str) -> abc.Traversable | None: + r""" + Given an item from a namespace path, resolve it to a Traversable. + + path_str might be a directory on the filesystem or a path to a + zipfile plus the path within the zipfile, e.g. ``/foo/bar`` or + ``/foo/baz.zip/inner_dir`` or ``foo\baz.zip\inner_dir\sub``. + + path_str might also be a sentinel used by editable packages to + trigger other behaviors (see python/importlib_resources#311). + In that case, return None. + """ + dirs = (cand for cand in cls._candidate_paths(path_str) if cand.is_dir()) + return next(dirs, None) + + @classmethod + def _candidate_paths(cls, path_str: str) -> Iterator[abc.Traversable]: + yield pathlib.Path(path_str) + yield from cls._resolve_zip_path(path_str) + + @staticmethod + def _resolve_zip_path(path_str: str): + for match in reversed(list(re.finditer(r'[\\/]', path_str))): + with contextlib.suppress( + FileNotFoundError, + IsADirectoryError, + NotADirectoryError, + PermissionError, + ): + inner = path_str[match.end() :].replace('\\', '/') + '/' + yield zipfile.Path(path_str[: match.start()], inner.lstrip('/')) + + def resource_path(self, resource): + """ + Return the file system path to prevent + `resources.path()` from creating a temporary + copy. + """ + return str(self.path.joinpath(resource)) + + def files(self): + return self.path + + +def _ensure_traversable(path): + """ + Convert deprecated string arguments to traversables (pathlib.Path). + + Remove with Python 3.15. + """ + if not isinstance(path, str): + return path + + warnings.warn( + "String arguments are deprecated. Pass a Traversable instead.", + DeprecationWarning, + stacklevel=3, + ) + + return pathlib.Path(path) diff --git a/stdlib/importlib/resources/simple.py b/stdlib/importlib/resources/simple.py new file mode 100644 index 000000000..2e75299b1 --- /dev/null +++ b/stdlib/importlib/resources/simple.py @@ -0,0 +1,106 @@ +""" +Interface adapters for low-level readers. +""" + +import abc +import io +import itertools +from typing import BinaryIO, List + +from .abc import Traversable, TraversableResources + + +class SimpleReader(abc.ABC): + """ + The minimum, low-level interface required from a resource + provider. + """ + + @property + @abc.abstractmethod + def package(self) -> str: + """ + The name of the package for which this reader loads resources. + """ + + @abc.abstractmethod + def children(self) -> List['SimpleReader']: + """ + Obtain an iterable of SimpleReader for available + child containers (e.g. directories). + """ + + @abc.abstractmethod + def resources(self) -> List[str]: + """ + Obtain available named resources for this virtual package. + """ + + @abc.abstractmethod + def open_binary(self, resource: str) -> BinaryIO: + """ + Obtain a File-like for a named resource. + """ + + @property + def name(self): + return self.package.split('.')[-1] + + +class ResourceContainer(Traversable): + """ + Traversable container for a package's resources via its reader. + """ + + def __init__(self, reader: SimpleReader): + self.reader = reader + + def is_dir(self): + return True + + def is_file(self): + return False + + def iterdir(self): + files = (ResourceHandle(self, name) for name in self.reader.resources) + dirs = map(ResourceContainer, self.reader.children()) + return itertools.chain(files, dirs) + + def open(self, *args, **kwargs): + raise IsADirectoryError() + + +class ResourceHandle(Traversable): + """ + Handle to a named resource in a ResourceReader. + """ + + def __init__(self, parent: ResourceContainer, name: str): + self.parent = parent + self.name = name # type: ignore[misc] + + def is_file(self): + return True + + def is_dir(self): + return False + + def open(self, mode='r', *args, **kwargs): + stream = self.parent.reader.open_binary(self.name) + if 'b' not in mode: + stream = io.TextIOWrapper(stream, *args, **kwargs) + return stream + + def joinpath(self, name): + raise RuntimeError("Cannot traverse into a resource") + + +class TraversableReader(TraversableResources, SimpleReader): + """ + A TraversableResources based on SimpleReader. Resource providers + may derive from this class to provide the TraversableResources + interface by supplying the SimpleReader interface. + """ + + def files(self): + return ResourceContainer(self) diff --git a/stdlib/importlib/simple.py b/stdlib/importlib/simple.py new file mode 100644 index 000000000..845bb9036 --- /dev/null +++ b/stdlib/importlib/simple.py @@ -0,0 +1,14 @@ +""" +Compatibility shim for .resources.simple as found on Python 3.10. + +Consumers that can rely on Python 3.11 should use the other +module directly. +""" + +from .resources.simple import ( + SimpleReader, ResourceHandle, ResourceContainer, TraversableReader, +) + +__all__ = [ + 'SimpleReader', 'ResourceHandle', 'ResourceContainer', 'TraversableReader', +] diff --git a/stdlib/importlib/util.py b/stdlib/importlib/util.py index aadfcae60..2b564e9b5 100644 --- a/stdlib/importlib/util.py +++ b/stdlib/importlib/util.py @@ -1,207 +1,279 @@ -"""importlib.util: gopy stub for the parts pkgutil/unittest.mock need. - -CPython's Lib/importlib/util.py re-exports symbols from the import -machinery's _bootstrap and _bootstrap_external modules, which gopy -doesn't fully ship. The pkgutil/unittest.mock load path only references -MAGIC_NUMBER at module load (inside a function body) plus find_spec -later; resolve_name doesn't touch util at all. Until spec 1711 Phase -9 wires the full importlib port this stub keeps the import chain -green. - -CPython: Lib/importlib/util.py -""" - -import os +"""Utility code for constructing importers, etc.""" +from ._abc import Loader +from ._bootstrap import module_from_spec +from ._bootstrap import _resolve_name +from ._bootstrap import spec_from_loader +from ._bootstrap import _find_spec +from ._bootstrap_external import MAGIC_NUMBER +from ._bootstrap_external import cache_from_source +from ._bootstrap_external import decode_source +from ._bootstrap_external import source_from_cache +from ._bootstrap_external import spec_from_file_location + +import _imp import sys import types -from importlib._bootstrap_external import ( - MAGIC_NUMBER, - cache_from_source, - decode_source, - source_from_cache, - source_hash, -) +def source_hash(source_bytes): + "Return the hash of *source_bytes* as used in hash-based pyc files." + return _imp.source_hash(_imp.pyc_magic_number_token, source_bytes) -class _SourceFileLoader: - """Minimal SourceFileLoader: reads the .py file and compiles it. - CPython: Lib/importlib/_bootstrap_external.py:962 SourceFileLoader - """ - - def __init__(self, name, path): - self.name = name - self.path = path - - def get_filename(self, fullname=None): - return self.path +def resolve_name(name, package): + """Resolve a relative module name to an absolute one.""" + if not name.startswith('.'): + return name + elif not package: + raise ImportError(f'no package specified for {repr(name)} ' + '(required for relative module names)') + level = 0 + for character in name: + if character != '.': + break + level += 1 + return _resolve_name(name[level:], package, level) - def get_source(self, fullname=None): - with open(self.path, "rb") as f: - data = f.read() - try: - return data.decode("utf-8") - except UnicodeDecodeError: - return data.decode("latin-1") - def get_code(self, fullname): - source = self.get_source(fullname) - return compile(source, self.path, "exec") +def _find_spec_from_path(name, path=None): + """Return the spec for the specified module. + First, sys.modules is checked to see if the module was already imported. If + so, then sys.modules[name].__spec__ is returned. If that happens to be + set to None, then ValueError is raised. If the module is not in + sys.modules, then sys.meta_path is searched for a suitable spec with the + value of 'path' given to the finders. None is returned if no spec could + be found. -class _ModuleSpec: - """Stripped-down ModuleSpec mirroring importlib.machinery.ModuleSpec. + Dotted names do not have their parent packages implicitly imported. You will + most likely need to explicitly import all parent packages in the proper + order for a submodule to get the correct spec. - CPython: Lib/importlib/_bootstrap.py:392 ModuleSpec """ - - def __init__(self, name, loader, *, origin=None, is_package=False): - self.name = name - self.loader = loader - self.origin = origin - self.submodule_search_locations = [] if is_package else None - self.has_location = origin is not None - self.cached = None - self.parent = name.rpartition(".")[0] if is_package else name.rpartition(".")[0] - - -def _resolve_search_paths(name): - parent, _, _ = name.rpartition(".") - if not parent: - return sys.path - pkg = sys.modules.get(parent) - if pkg is None: - try: - __import__(parent) - except ImportError: + if name not in sys.modules: + return _find_spec(name, path) + else: + module = sys.modules[name] + if module is None: return None - pkg = sys.modules.get(parent) - if pkg is None: - return None - return getattr(pkg, "__path__", None) + try: + spec = module.__spec__ + except AttributeError: + raise ValueError(f'{name}.__spec__ is not set') from None + else: + if spec is None: + raise ValueError(f'{name}.__spec__ is None') + return spec def find_spec(name, package=None): - """Locate name on sys.path (or the parent package's __path__) and - return a ModuleSpec the caller can drive through .loader.get_code(). + """Return the spec for the specified module. - CPython: Lib/importlib/util.py:90 find_spec - """ - if name.startswith("."): - if package is None: - raise ValueError("relative module name requires package") - name = resolve_name(name, package) - if name in sys.modules: - mod = sys.modules[name] - spec = getattr(mod, "__spec__", None) - if spec is not None: - return spec - search = _resolve_search_paths(name) - if search is None: - return None - tail = name.rpartition(".")[2] - for entry in search: - directory = entry if entry else "." - pkg_init = os.path.join(directory, tail, "__init__.py") - if os.path.isfile(pkg_init): - loader = _SourceFileLoader(name, pkg_init) - spec = _ModuleSpec(name, loader, origin=pkg_init, is_package=True) - spec.submodule_search_locations = [os.path.join(directory, tail)] - return spec - mod_file = os.path.join(directory, tail + ".py") - if os.path.isfile(mod_file): - return _ModuleSpec(name, _SourceFileLoader(name, mod_file), - origin=mod_file) - return None + First, sys.modules is checked to see if the module was already imported. If + so, then sys.modules[name].__spec__ is returned. If that happens to be + set to None, then ValueError is raised. If the module is not in + sys.modules, then sys.meta_path is searched for a suitable spec with the + value of 'path' given to the finders. None is returned if no spec could + be found. + If the name is for submodule (contains a dot), the parent module is + automatically imported. -def module_from_spec(spec): - """Create a new module based on spec and spec.loader.create_module. + The name and package arguments work the same as importlib.import_module(). + In other words, relative module names (with leading dots) work. - CPython: Lib/importlib/_bootstrap.py:571 module_from_spec - """ - import types - module = None - if hasattr(spec.loader, 'create_module'): - module = spec.loader.create_module(spec) - if module is None: - module = types.ModuleType(spec.name) - module.__loader__ = spec.loader - module.__spec__ = spec - module.__package__ = spec.name.rpartition('.')[0] - if spec.origin is not None: - module.__file__ = spec.origin - if spec.submodule_search_locations is not None: - module.__path__ = list(spec.submodule_search_locations) - return module - - -def spec_from_loader(name, loader, *, origin=None, is_package=None): - """Return a ModuleSpec based on a loader. - - CPython: Lib/importlib/util.py:44 spec_from_loader """ - if origin is None and hasattr(loader, 'get_filename'): - try: - origin = loader.get_filename(name) - except (ImportError, AttributeError): - pass - if is_package is None: - if hasattr(loader, 'is_package'): + fullname = resolve_name(name, package) if name.startswith('.') else name + if fullname not in sys.modules: + parent_name = fullname.rpartition('.')[0] + if parent_name: + parent = __import__(parent_name, fromlist=['__path__']) try: - is_package = loader.is_package(name) - except ImportError: - is_package = False + parent_path = parent.__path__ + except AttributeError as e: + raise ModuleNotFoundError( + f"__path__ attribute not found on {parent_name!r} " + f"while trying to find {fullname!r}", name=fullname) from e else: - is_package = False - return _ModuleSpec(name, loader, origin=origin, is_package=bool(is_package)) + parent_path = None + return _find_spec(fullname, parent_path) + else: + module = sys.modules[fullname] + if module is None: + return None + try: + spec = module.__spec__ + except AttributeError: + raise ValueError(f'{name}.__spec__ is not set') from None + else: + if spec is None: + raise ValueError(f'{name}.__spec__ is None') + return spec -def spec_from_file_location(name, location=None, *, loader=None, - submodule_search_locations=None): - """Return a ModuleSpec for the specified module, using file location. +# Normally we would use contextlib.contextmanager. However, this module +# is imported by runpy, which means we want to avoid any unnecessary +# dependencies. Thus we use a class. - CPython: Lib/importlib/util.py:132 spec_from_file_location - """ - if location is None and loader is None: - return None - if loader is None and location is not None: - loader = _SourceFileLoader(name, str(location)) - origin = str(location) if location is not None else getattr(loader, 'path', None) - is_package = submodule_search_locations is not None - spec = _ModuleSpec(name, loader, origin=origin, is_package=is_package) - if submodule_search_locations is not None: - spec.submodule_search_locations = list(submodule_search_locations) - return spec +class _incompatible_extension_module_restrictions: + """A context manager that can temporarily skip the compatibility check. + NOTE: This function is meant to accommodate an unusual case; one + which is likely to eventually go away. There's is a pretty good + chance this is not what you were looking for. -def resolve_name(name, package): - """Resolve a relative module name to an absolute one.""" - if not name.startswith('.'): - return name - if not package: - raise ImportError(f'no package specified for {name!r} ' - '(required for relative module names)') - level = 0 - for character in name: - if character != '.': - break - level += 1 - return _resolve_name(name[level:], package, level) + WARNING: Using this function to disable the check can lead to + unexpected behavior and even crashes. It should only be used during + extension module development. + If "disable_check" is True then the compatibility check will not + happen while the context manager is active. Otherwise the check + *will* happen. -def _resolve_name(name, package, level): - bits = package.rsplit('.', level - 1) - if len(bits) < level: - raise ImportError('attempted relative import beyond top-level package') - base = bits[0] - return f'{base}.{name}' if name else base + Normally, extensions that do not support multiple interpreters + may not be imported in a subinterpreter. That implies modules + that do not implement multi-phase init or that explicitly of out. + Likewise for modules import in a subinterpreter with its own GIL + when the extension does not support a per-interpreter GIL. This + implies the module does not have a Py_mod_multiple_interpreters slot + set to Py_MOD_PER_INTERPRETER_GIL_SUPPORTED. -class LazyLoader: - """Stub: not used by the unittest.mock import chain.""" + In both cases, this context manager may be used to temporarily + disable the check for compatible extension modules. + + You can get the same effect as this function by implementing the + basic interface of multi-phase init (PEP 489) and lying about + support for multiple interpreters (or per-interpreter GIL). + """ + + def __init__(self, *, disable_check): + self.disable_check = bool(disable_check) + + def __enter__(self): + self.old = _imp._override_multi_interp_extensions_check(self.override) + return self + + def __exit__(self, *args): + old = self.old + del self.old + _imp._override_multi_interp_extensions_check(old) + + @property + def override(self): + return -1 if self.disable_check else 1 + + +class _LazyModule(types.ModuleType): + + """A subclass of the module type which triggers loading upon attribute access.""" + + def __getattribute__(self, attr): + """Trigger the load of the module and return the attribute.""" + __spec__ = object.__getattribute__(self, '__spec__') + loader_state = __spec__.loader_state + with loader_state['lock']: + # Only the first thread to get the lock should trigger the load + # and reset the module's class. The rest can now getattr(). + if object.__getattribute__(self, '__class__') is _LazyModule: + __class__ = loader_state['__class__'] + + # Reentrant calls from the same thread must be allowed to proceed without + # triggering the load again. + # exec_module() and self-referential imports are the primary ways this can + # happen, but in any case we must return something to avoid deadlock. + if loader_state['is_loading']: + return __class__.__getattribute__(self, attr) + loader_state['is_loading'] = True + + __dict__ = __class__.__getattribute__(self, '__dict__') + + # All module metadata must be gathered from __spec__ in order to avoid + # using mutated values. + # Get the original name to make sure no object substitution occurred + # in sys.modules. + original_name = __spec__.name + # Figure out exactly what attributes were mutated between the creation + # of the module and now. + attrs_then = loader_state['__dict__'] + attrs_now = __dict__ + attrs_updated = {} + for key, value in attrs_now.items(): + # Code that set an attribute may have kept a reference to the + # assigned object, making identity more important than equality. + if key not in attrs_then: + attrs_updated[key] = value + elif id(attrs_now[key]) != id(attrs_then[key]): + attrs_updated[key] = value + __spec__.loader.exec_module(self) + # If exec_module() was used directly there is no guarantee the module + # object was put into sys.modules. + if original_name in sys.modules: + if id(self) != id(sys.modules[original_name]): + raise ValueError(f"module object for {original_name!r} " + "substituted in sys.modules during a lazy " + "load") + # Update after loading since that's what would happen in an eager + # loading situation. + __dict__.update(attrs_updated) + # Finally, stop triggering this method, if the module did not + # already update its own __class__. + if isinstance(self, _LazyModule): + object.__setattr__(self, '__class__', __class__) + + return getattr(self, attr) + + def __delattr__(self, attr): + """Trigger the load and then perform the deletion.""" + # To trigger the load and raise an exception if the attribute + # doesn't exist. + self.__getattribute__(attr) + delattr(self, attr) + + +class LazyLoader(Loader): + + """A loader that creates a module which defers loading until attribute access.""" + + @staticmethod + def __check_eager_loader(loader): + if not hasattr(loader, 'exec_module'): + raise TypeError('loader must define exec_module()') @classmethod def factory(cls, loader): - raise NotImplementedError("importlib.util.LazyLoader is unavailable in gopy") + """Construct a callable which returns the eager loader made lazy.""" + cls.__check_eager_loader(loader) + return lambda *args, **kwargs: cls(loader(*args, **kwargs)) + + def __init__(self, loader): + self.__check_eager_loader(loader) + self.loader = loader + + def create_module(self, spec): + return self.loader.create_module(spec) + + def exec_module(self, module): + """Make the module load lazily.""" + # Threading is only needed for lazy loading, and importlib.util can + # be pulled in at interpreter startup, so defer until needed. + import threading + module.__spec__.loader = self.loader + module.__loader__ = self.loader + # Don't need to worry about deep-copying as trying to set an attribute + # on an object would have triggered the load, + # e.g. ``module.__spec__.loader = None`` would trigger a load from + # trying to access module.__spec__. + loader_state = {} + loader_state['__dict__'] = module.__dict__.copy() + loader_state['__class__'] = module.__class__ + loader_state['lock'] = threading.RLock() + loader_state['is_loading'] = False + module.__spec__.loader_state = loader_state + module.__class__ = _LazyModule + + +__all__ = ['LazyLoader', 'Loader', 'MAGIC_NUMBER', + 'cache_from_source', 'decode_source', 'find_spec', + 'module_from_spec', 'resolve_name', 'source_from_cache', + 'source_hash', 'spec_from_file_location', 'spec_from_loader'] diff --git a/stdlib/modulefinder.py b/stdlib/modulefinder.py new file mode 100644 index 000000000..ac478ee7f --- /dev/null +++ b/stdlib/modulefinder.py @@ -0,0 +1,671 @@ +"""Find modules used by a script, using introspection.""" + +import dis +import importlib._bootstrap_external +import importlib.machinery +import marshal +import os +import io +import sys + +# Old imp constants: + +_SEARCH_ERROR = 0 +_PY_SOURCE = 1 +_PY_COMPILED = 2 +_C_EXTENSION = 3 +_PKG_DIRECTORY = 5 +_C_BUILTIN = 6 +_PY_FROZEN = 7 + +# Modulefinder does a good job at simulating Python's, but it can not +# handle __path__ modifications packages make at runtime. Therefore there +# is a mechanism whereby you can register extra paths in this map for a +# package, and it will be honored. + +# Note this is a mapping is lists of paths. +packagePathMap = {} + +# A Public interface +def AddPackagePath(packagename, path): + packagePathMap.setdefault(packagename, []).append(path) + +replacePackageMap = {} + +# This ReplacePackage mechanism allows modulefinder to work around +# situations in which a package injects itself under the name +# of another package into sys.modules at runtime by calling +# ReplacePackage("real_package_name", "faked_package_name") +# before running ModuleFinder. + +def ReplacePackage(oldname, newname): + replacePackageMap[oldname] = newname + + +def _find_module(name, path=None): + """An importlib reimplementation of imp.find_module (for our purposes).""" + + # It's necessary to clear the caches for our Finder first, in case any + # modules are being added/deleted/modified at runtime. In particular, + # test_modulefinder.py changes file tree contents in a cache-breaking way: + + importlib.machinery.PathFinder.invalidate_caches() + + spec = importlib.machinery.PathFinder.find_spec(name, path) + + if spec is None: + raise ImportError("No module named {name!r}".format(name=name), name=name) + + # Some special cases: + + if spec.loader is importlib.machinery.BuiltinImporter: + return None, None, ("", "", _C_BUILTIN) + + if spec.loader is importlib.machinery.FrozenImporter: + return None, None, ("", "", _PY_FROZEN) + + file_path = spec.origin + + if spec.loader.is_package(name): + return None, os.path.dirname(file_path), ("", "", _PKG_DIRECTORY) + + if isinstance(spec.loader, importlib.machinery.SourceFileLoader): + kind = _PY_SOURCE + + elif isinstance( + spec.loader, ( + importlib.machinery.ExtensionFileLoader, + importlib.machinery.AppleFrameworkLoader, + ) + ): + kind = _C_EXTENSION + + elif isinstance(spec.loader, importlib.machinery.SourcelessFileLoader): + kind = _PY_COMPILED + + else: # Should never happen. + return None, None, ("", "", _SEARCH_ERROR) + + file = io.open_code(file_path) + suffix = os.path.splitext(file_path)[-1] + + return file, file_path, (suffix, "rb", kind) + + +class Module: + + def __init__(self, name, file=None, path=None): + self.__name__ = name + self.__file__ = file + self.__path__ = path + self.__code__ = None + # The set of global names that are assigned to in the module. + # This includes those names imported through starimports of + # Python modules. + self.globalnames = {} + # The set of starimports this module did that could not be + # resolved, ie. a starimport from a non-Python module. + self.starimports = {} + + def __repr__(self): + s = "Module(%r" % (self.__name__,) + if self.__file__ is not None: + s = s + ", %r" % (self.__file__,) + if self.__path__ is not None: + s = s + ", %r" % (self.__path__,) + s = s + ")" + return s + +class ModuleFinder: + + def __init__(self, path=None, debug=0, excludes=None, replace_paths=None): + if path is None: + path = sys.path + self.path = path + self.modules = {} + self.badmodules = {} + self.debug = debug + self.indent = 0 + self.excludes = excludes if excludes is not None else [] + self.replace_paths = replace_paths if replace_paths is not None else [] + self.processed_paths = [] # Used in debugging only + + def msg(self, level, str, *args): + if level <= self.debug: + for i in range(self.indent): + print(" ", end=' ') + print(str, end=' ') + for arg in args: + print(repr(arg), end=' ') + print() + + def msgin(self, *args): + level = args[0] + if level <= self.debug: + self.indent = self.indent + 1 + self.msg(*args) + + def msgout(self, *args): + level = args[0] + if level <= self.debug: + self.indent = self.indent - 1 + self.msg(*args) + + def run_script(self, pathname): + self.msg(2, "run_script", pathname) + with io.open_code(pathname) as fp: + stuff = ("", "rb", _PY_SOURCE) + self.load_module('__main__', fp, pathname, stuff) + + def load_file(self, pathname): + dir, name = os.path.split(pathname) + name, ext = os.path.splitext(name) + with io.open_code(pathname) as fp: + stuff = (ext, "rb", _PY_SOURCE) + self.load_module(name, fp, pathname, stuff) + + def import_hook(self, name, caller=None, fromlist=None, level=-1): + self.msg(3, "import_hook", name, caller, fromlist, level) + parent = self.determine_parent(caller, level=level) + q, tail = self.find_head_package(parent, name) + m = self.load_tail(q, tail) + if not fromlist: + return q + if m.__path__: + self.ensure_fromlist(m, fromlist) + return None + + def determine_parent(self, caller, level=-1): + self.msgin(4, "determine_parent", caller, level) + if not caller or level == 0: + self.msgout(4, "determine_parent -> None") + return None + pname = caller.__name__ + if level >= 1: # relative import + if caller.__path__: + level -= 1 + if level == 0: + parent = self.modules[pname] + assert parent is caller + self.msgout(4, "determine_parent ->", parent) + return parent + if pname.count(".") < level: + raise ImportError("relative importpath too deep") + pname = ".".join(pname.split(".")[:-level]) + parent = self.modules[pname] + self.msgout(4, "determine_parent ->", parent) + return parent + if caller.__path__: + parent = self.modules[pname] + assert caller is parent + self.msgout(4, "determine_parent ->", parent) + return parent + if '.' in pname: + i = pname.rfind('.') + pname = pname[:i] + parent = self.modules[pname] + assert parent.__name__ == pname + self.msgout(4, "determine_parent ->", parent) + return parent + self.msgout(4, "determine_parent -> None") + return None + + def find_head_package(self, parent, name): + self.msgin(4, "find_head_package", parent, name) + if '.' in name: + i = name.find('.') + head = name[:i] + tail = name[i+1:] + else: + head = name + tail = "" + if parent: + qname = "%s.%s" % (parent.__name__, head) + else: + qname = head + q = self.import_module(head, qname, parent) + if q: + self.msgout(4, "find_head_package ->", (q, tail)) + return q, tail + if parent: + qname = head + parent = None + q = self.import_module(head, qname, parent) + if q: + self.msgout(4, "find_head_package ->", (q, tail)) + return q, tail + self.msgout(4, "raise ImportError: No module named", qname) + raise ImportError("No module named " + qname) + + def load_tail(self, q, tail): + self.msgin(4, "load_tail", q, tail) + m = q + while tail: + i = tail.find('.') + if i < 0: i = len(tail) + head, tail = tail[:i], tail[i+1:] + mname = "%s.%s" % (m.__name__, head) + m = self.import_module(head, mname, m) + if not m: + self.msgout(4, "raise ImportError: No module named", mname) + raise ImportError("No module named " + mname) + self.msgout(4, "load_tail ->", m) + return m + + def ensure_fromlist(self, m, fromlist, recursive=0): + self.msg(4, "ensure_fromlist", m, fromlist, recursive) + for sub in fromlist: + if sub == "*": + if not recursive: + all = self.find_all_submodules(m) + if all: + self.ensure_fromlist(m, all, 1) + elif not hasattr(m, sub): + subname = "%s.%s" % (m.__name__, sub) + submod = self.import_module(sub, subname, m) + if not submod: + raise ImportError("No module named " + subname) + + def find_all_submodules(self, m): + if not m.__path__: + return + modules = {} + # 'suffixes' used to be a list hardcoded to [".py", ".pyc"]. + # But we must also collect Python extension modules - although + # we cannot separate normal dlls from Python extensions. + suffixes = [] + suffixes += importlib.machinery.EXTENSION_SUFFIXES[:] + suffixes += importlib.machinery.SOURCE_SUFFIXES[:] + suffixes += importlib.machinery.BYTECODE_SUFFIXES[:] + for dir in m.__path__: + try: + names = os.listdir(dir) + except OSError: + self.msg(2, "can't list directory", dir) + continue + for name in names: + mod = None + for suff in suffixes: + n = len(suff) + if name[-n:] == suff: + mod = name[:-n] + break + if mod and mod != "__init__": + modules[mod] = mod + return modules.keys() + + def import_module(self, partname, fqname, parent): + self.msgin(3, "import_module", partname, fqname, parent) + try: + m = self.modules[fqname] + except KeyError: + pass + else: + self.msgout(3, "import_module ->", m) + return m + if fqname in self.badmodules: + self.msgout(3, "import_module -> None") + return None + if parent and parent.__path__ is None: + self.msgout(3, "import_module -> None") + return None + try: + fp, pathname, stuff = self.find_module(partname, + parent and parent.__path__, parent) + except ImportError: + self.msgout(3, "import_module ->", None) + return None + + try: + m = self.load_module(fqname, fp, pathname, stuff) + finally: + if fp: + fp.close() + if parent: + setattr(parent, partname, m) + self.msgout(3, "import_module ->", m) + return m + + def load_module(self, fqname, fp, pathname, file_info): + suffix, mode, type = file_info + self.msgin(2, "load_module", fqname, fp and "fp", pathname) + if type == _PKG_DIRECTORY: + m = self.load_package(fqname, pathname) + self.msgout(2, "load_module ->", m) + return m + if type == _PY_SOURCE: + co = compile(fp.read(), pathname, 'exec') + elif type == _PY_COMPILED: + try: + data = fp.read() + importlib._bootstrap_external._classify_pyc(data, fqname, {}) + except ImportError as exc: + self.msgout(2, "raise ImportError: " + str(exc), pathname) + raise + co = marshal.loads(memoryview(data)[16:]) + else: + co = None + m = self.add_module(fqname) + m.__file__ = pathname + if co: + if self.replace_paths: + co = self.replace_paths_in_code(co) + m.__code__ = co + self.scan_code(co, m) + self.msgout(2, "load_module ->", m) + return m + + def _add_badmodule(self, name, caller): + if name not in self.badmodules: + self.badmodules[name] = {} + if caller: + self.badmodules[name][caller.__name__] = 1 + else: + self.badmodules[name]["-"] = 1 + + def _safe_import_hook(self, name, caller, fromlist, level=-1): + # wrapper for self.import_hook() that won't raise ImportError + if name in self.badmodules: + self._add_badmodule(name, caller) + return + try: + self.import_hook(name, caller, level=level) + except ImportError as msg: + self.msg(2, "ImportError:", str(msg)) + self._add_badmodule(name, caller) + except SyntaxError as msg: + self.msg(2, "SyntaxError:", str(msg)) + self._add_badmodule(name, caller) + else: + if fromlist: + for sub in fromlist: + fullname = name + "." + sub + if fullname in self.badmodules: + self._add_badmodule(fullname, caller) + continue + try: + self.import_hook(name, caller, [sub], level=level) + except ImportError as msg: + self.msg(2, "ImportError:", str(msg)) + self._add_badmodule(fullname, caller) + + def scan_opcodes(self, co): + # Scan the code, and yield 'interesting' opcode combinations + for name in dis._find_store_names(co): + yield "store", (name,) + for name, level, fromlist in dis._find_imports(co): + if level == 0: # absolute import + yield "absolute_import", (fromlist, name) + else: # relative import + yield "relative_import", (level, fromlist, name) + + def scan_code(self, co, m): + code = co.co_code + scanner = self.scan_opcodes + for what, args in scanner(co): + if what == "store": + name, = args + m.globalnames[name] = 1 + elif what == "absolute_import": + fromlist, name = args + have_star = 0 + if fromlist is not None: + if "*" in fromlist: + have_star = 1 + fromlist = [f for f in fromlist if f != "*"] + self._safe_import_hook(name, m, fromlist, level=0) + if have_star: + # We've encountered an "import *". If it is a Python module, + # the code has already been parsed and we can suck out the + # global names. + mm = None + if m.__path__: + # At this point we don't know whether 'name' is a + # submodule of 'm' or a global module. Let's just try + # the full name first. + mm = self.modules.get(m.__name__ + "." + name) + if mm is None: + mm = self.modules.get(name) + if mm is not None: + m.globalnames.update(mm.globalnames) + m.starimports.update(mm.starimports) + if mm.__code__ is None: + m.starimports[name] = 1 + else: + m.starimports[name] = 1 + elif what == "relative_import": + level, fromlist, name = args + if name: + self._safe_import_hook(name, m, fromlist, level=level) + else: + parent = self.determine_parent(m, level=level) + self._safe_import_hook(parent.__name__, None, fromlist, level=0) + else: + # We don't expect anything else from the generator. + raise RuntimeError(what) + + for c in co.co_consts: + if isinstance(c, type(co)): + self.scan_code(c, m) + + def load_package(self, fqname, pathname): + self.msgin(2, "load_package", fqname, pathname) + newname = replacePackageMap.get(fqname) + if newname: + fqname = newname + m = self.add_module(fqname) + m.__file__ = pathname + m.__path__ = [pathname] + + # As per comment at top of file, simulate runtime __path__ additions. + m.__path__ = m.__path__ + packagePathMap.get(fqname, []) + + fp, buf, stuff = self.find_module("__init__", m.__path__) + try: + self.load_module(fqname, fp, buf, stuff) + self.msgout(2, "load_package ->", m) + return m + finally: + if fp: + fp.close() + + def add_module(self, fqname): + if fqname in self.modules: + return self.modules[fqname] + self.modules[fqname] = m = Module(fqname) + return m + + def find_module(self, name, path, parent=None): + if parent is not None: + # assert path is not None + fullname = parent.__name__+'.'+name + else: + fullname = name + if fullname in self.excludes: + self.msgout(3, "find_module -> Excluded", fullname) + raise ImportError(name) + + if path is None: + if name in sys.builtin_module_names: + return (None, None, ("", "", _C_BUILTIN)) + + path = self.path + + return _find_module(name, path) + + def report(self): + """Print a report to stdout, listing the found modules with their + paths, as well as modules that are missing, or seem to be missing. + """ + print() + print(" %-25s %s" % ("Name", "File")) + print(" %-25s %s" % ("----", "----")) + # Print modules found + keys = sorted(self.modules.keys()) + for key in keys: + m = self.modules[key] + if m.__path__: + print("P", end=' ') + else: + print("m", end=' ') + print("%-25s" % key, m.__file__ or "") + + # Print missing modules + missing, maybe = self.any_missing_maybe() + if missing: + print() + print("Missing modules:") + for name in missing: + mods = sorted(self.badmodules[name].keys()) + print("?", name, "imported from", ', '.join(mods)) + # Print modules that may be missing, but then again, maybe not... + if maybe: + print() + print("Submodules that appear to be missing, but could also be", end=' ') + print("global names in the parent package:") + for name in maybe: + mods = sorted(self.badmodules[name].keys()) + print("?", name, "imported from", ', '.join(mods)) + + def any_missing(self): + """Return a list of modules that appear to be missing. Use + any_missing_maybe() if you want to know which modules are + certain to be missing, and which *may* be missing. + """ + missing, maybe = self.any_missing_maybe() + return missing + maybe + + def any_missing_maybe(self): + """Return two lists, one with modules that are certainly missing + and one with modules that *may* be missing. The latter names could + either be submodules *or* just global names in the package. + + The reason it can't always be determined is that it's impossible to + tell which names are imported when "from module import *" is done + with an extension module, short of actually importing it. + """ + missing = [] + maybe = [] + for name in self.badmodules: + if name in self.excludes: + continue + i = name.rfind(".") + if i < 0: + missing.append(name) + continue + subname = name[i+1:] + pkgname = name[:i] + pkg = self.modules.get(pkgname) + if pkg is not None: + if pkgname in self.badmodules[name]: + # The package tried to import this module itself and + # failed. It's definitely missing. + missing.append(name) + elif subname in pkg.globalnames: + # It's a global in the package: definitely not missing. + pass + elif pkg.starimports: + # It could be missing, but the package did an "import *" + # from a non-Python module, so we simply can't be sure. + maybe.append(name) + else: + # It's not a global in the package, the package didn't + # do funny star imports, it's very likely to be missing. + # The symbol could be inserted into the package from the + # outside, but since that's not good style we simply list + # it missing. + missing.append(name) + else: + missing.append(name) + missing.sort() + maybe.sort() + return missing, maybe + + def replace_paths_in_code(self, co): + new_filename = original_filename = os.path.normpath(co.co_filename) + for f, r in self.replace_paths: + if original_filename.startswith(f): + new_filename = r + original_filename[len(f):] + break + + if self.debug and original_filename not in self.processed_paths: + if new_filename != original_filename: + self.msgout(2, "co_filename %r changed to %r" \ + % (original_filename,new_filename,)) + else: + self.msgout(2, "co_filename %r remains unchanged" \ + % (original_filename,)) + self.processed_paths.append(original_filename) + + consts = list(co.co_consts) + for i in range(len(consts)): + if isinstance(consts[i], type(co)): + consts[i] = self.replace_paths_in_code(consts[i]) + + return co.replace(co_consts=tuple(consts), co_filename=new_filename) + + +def test(): + # Parse command line + import getopt + try: + opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:") + except getopt.error as msg: + print(msg) + return + + # Process options + debug = 1 + domods = 0 + addpath = [] + exclude = [] + for o, a in opts: + if o == '-d': + debug = debug + 1 + if o == '-m': + domods = 1 + if o == '-p': + addpath = addpath + a.split(os.pathsep) + if o == '-q': + debug = 0 + if o == '-x': + exclude.append(a) + + # Provide default arguments + if not args: + script = "hello.py" + else: + script = args[0] + + # Set the path based on sys.path and the script directory + path = sys.path[:] + path[0] = os.path.dirname(script) + path = addpath + path + if debug > 1: + print("path:") + for item in path: + print(" ", repr(item)) + + # Create the module finder and turn its crank + mf = ModuleFinder(path, debug, exclude) + for arg in args[1:]: + if arg == '-m': + domods = 1 + continue + if domods: + if arg[-2:] == '.*': + mf.import_hook(arg[:-2], None, ["*"]) + else: + mf.import_hook(arg) + else: + mf.load_file(arg) + mf.run_script(script) + mf.report() + return mf # for -i debugging + + +if __name__ == '__main__': + try: + mf = test() + except KeyboardInterrupt: + print("\n[interrupted]") diff --git a/stdlib/pyclbr.py b/stdlib/pyclbr.py new file mode 100644 index 000000000..37f86995d --- /dev/null +++ b/stdlib/pyclbr.py @@ -0,0 +1,314 @@ +"""Parse a Python module and describe its classes and functions. + +Parse enough of a Python file to recognize imports and class and +function definitions, and to find out the superclasses of a class. + +The interface consists of a single function: + readmodule_ex(module, path=None) +where module is the name of a Python module, and path is an optional +list of directories where the module is to be searched. If present, +path is prepended to the system search path sys.path. The return value +is a dictionary. The keys of the dictionary are the names of the +classes and functions defined in the module (including classes that are +defined via the from XXX import YYY construct). The values are +instances of classes Class and Function. One special key/value pair is +present for packages: the key '__path__' has a list as its value which +contains the package search path. + +Classes and Functions have a common superclass: _Object. Every instance +has the following attributes: + module -- name of the module; + name -- name of the object; + file -- file in which the object is defined; + lineno -- line in the file where the object's definition starts; + end_lineno -- line in the file where the object's definition ends; + parent -- parent of this object, if any; + children -- nested objects contained in this object. +The 'children' attribute is a dictionary mapping names to objects. + +Instances of Function describe functions with the attributes from _Object, +plus the following: + is_async -- if a function is defined with an 'async' prefix + +Instances of Class describe classes with the attributes from _Object, +plus the following: + super -- list of super classes (Class instances if possible); + methods -- mapping of method names to beginning line numbers. +If the name of a super class is not recognized, the corresponding +entry in the list of super classes is not a class instance but a +string giving the name of the super class. Since import statements +are recognized and imported modules are scanned as well, this +shouldn't happen often. +""" + +import ast +import sys +import importlib.util + +__all__ = ["readmodule", "readmodule_ex", "Class", "Function"] + +_modules = {} # Initialize cache of modules we've seen. + + +class _Object: + "Information about Python class or function." + def __init__(self, module, name, file, lineno, end_lineno, parent): + self.module = module + self.name = name + self.file = file + self.lineno = lineno + self.end_lineno = end_lineno + self.parent = parent + self.children = {} + if parent is not None: + parent.children[name] = self + + +# Odd Function and Class signatures are for back-compatibility. +class Function(_Object): + "Information about a Python function, including methods." + def __init__(self, module, name, file, lineno, + parent=None, is_async=False, *, end_lineno=None): + super().__init__(module, name, file, lineno, end_lineno, parent) + self.is_async = is_async + if isinstance(parent, Class): + parent.methods[name] = lineno + + +class Class(_Object): + "Information about a Python class." + def __init__(self, module, name, super_, file, lineno, + parent=None, *, end_lineno=None): + super().__init__(module, name, file, lineno, end_lineno, parent) + self.super = super_ or [] + self.methods = {} + + +# These 2 functions are used in these tests +# Lib/test/test_pyclbr, Lib/idlelib/idle_test/test_browser.py +def _nest_function(ob, func_name, lineno, end_lineno, is_async=False): + "Return a Function after nesting within ob." + return Function(ob.module, func_name, ob.file, lineno, + parent=ob, is_async=is_async, end_lineno=end_lineno) + +def _nest_class(ob, class_name, lineno, end_lineno, super=None): + "Return a Class after nesting within ob." + return Class(ob.module, class_name, super, ob.file, lineno, + parent=ob, end_lineno=end_lineno) + + +def readmodule(module, path=None): + """Return Class objects for the top-level classes in module. + + This is the original interface, before Functions were added. + """ + + res = {} + for key, value in _readmodule(module, path or []).items(): + if isinstance(value, Class): + res[key] = value + return res + +def readmodule_ex(module, path=None): + """Return a dictionary with all functions and classes in module. + + Search for module in PATH + sys.path. + If possible, include imported superclasses. + Do this by reading source, without importing (and executing) it. + """ + return _readmodule(module, path or []) + + +def _readmodule(module, path, inpackage=None): + """Do the hard work for readmodule[_ex]. + + If inpackage is given, it must be the dotted name of the package in + which we are searching for a submodule, and then PATH must be the + package search path; otherwise, we are searching for a top-level + module, and path is combined with sys.path. + """ + # Compute the full module name (prepending inpackage if set). + if inpackage is not None: + fullmodule = "%s.%s" % (inpackage, module) + else: + fullmodule = module + + # Check in the cache. + if fullmodule in _modules: + return _modules[fullmodule] + + # Initialize the dict for this module's contents. + tree = {} + + # Check if it is a built-in module; we don't do much for these. + if module in sys.builtin_module_names and inpackage is None: + _modules[module] = tree + return tree + + # Check for a dotted module name. + i = module.rfind('.') + if i >= 0: + package = module[:i] + submodule = module[i+1:] + parent = _readmodule(package, path, inpackage) + if inpackage is not None: + package = "%s.%s" % (inpackage, package) + if not '__path__' in parent: + raise ImportError('No package named {}'.format(package)) + return _readmodule(submodule, parent['__path__'], package) + + # Search the path for the module. + f = None + if inpackage is not None: + search_path = path + else: + search_path = path + sys.path + spec = importlib.util._find_spec_from_path(fullmodule, search_path) + if spec is None: + raise ModuleNotFoundError(f"no module named {fullmodule!r}", name=fullmodule) + _modules[fullmodule] = tree + # Is module a package? + if spec.submodule_search_locations is not None: + tree['__path__'] = spec.submodule_search_locations + try: + source = spec.loader.get_source(fullmodule) + except (AttributeError, ImportError): + # If module is not Python source, we cannot do anything. + return tree + else: + if source is None: + return tree + + fname = spec.loader.get_filename(fullmodule) + return _create_tree(fullmodule, path, fname, source, tree, inpackage) + + +class _ModuleBrowser(ast.NodeVisitor): + def __init__(self, module, path, file, tree, inpackage): + self.path = path + self.tree = tree + self.file = file + self.module = module + self.inpackage = inpackage + self.stack = [] + + def visit_ClassDef(self, node): + bases = [] + for base in node.bases: + name = ast.unparse(base) + if name in self.tree: + # We know this super class. + bases.append(self.tree[name]) + elif len(names := name.split(".")) > 1: + # Super class form is module.class: + # look in module for class. + *_, module, class_ = names + if module in _modules: + bases.append(_modules[module].get(class_, name)) + else: + bases.append(name) + + parent = self.stack[-1] if self.stack else None + class_ = Class(self.module, node.name, bases, self.file, node.lineno, + parent=parent, end_lineno=node.end_lineno) + if parent is None: + self.tree[node.name] = class_ + self.stack.append(class_) + self.generic_visit(node) + self.stack.pop() + + def visit_FunctionDef(self, node, *, is_async=False): + parent = self.stack[-1] if self.stack else None + function = Function(self.module, node.name, self.file, node.lineno, + parent, is_async, end_lineno=node.end_lineno) + if parent is None: + self.tree[node.name] = function + self.stack.append(function) + self.generic_visit(node) + self.stack.pop() + + def visit_AsyncFunctionDef(self, node): + self.visit_FunctionDef(node, is_async=True) + + def visit_Import(self, node): + if node.col_offset != 0: + return + + for module in node.names: + try: + try: + _readmodule(module.name, self.path, self.inpackage) + except ImportError: + _readmodule(module.name, []) + except (ImportError, SyntaxError): + # If we can't find or parse the imported module, + # too bad -- don't die here. + continue + + def visit_ImportFrom(self, node): + if node.col_offset != 0: + return + try: + module = "." * node.level + if node.module: + module += node.module + module = _readmodule(module, self.path, self.inpackage) + except (ImportError, SyntaxError): + return + + for name in node.names: + if name.name in module: + self.tree[name.asname or name.name] = module[name.name] + elif name.name == "*": + for import_name, import_value in module.items(): + if import_name.startswith("_"): + continue + self.tree[import_name] = import_value + + +def _create_tree(fullmodule, path, fname, source, tree, inpackage): + mbrowser = _ModuleBrowser(fullmodule, path, fname, tree, inpackage) + mbrowser.visit(ast.parse(source)) + return mbrowser.tree + + +def _main(): + "Print module output (default this file) for quick visual check." + import os + try: + mod = sys.argv[1] + except: + mod = __file__ + if os.path.exists(mod): + path = [os.path.dirname(mod)] + mod = os.path.basename(mod) + if mod.lower().endswith(".py"): + mod = mod[:-3] + else: + path = [] + tree = readmodule_ex(mod, path) + lineno_key = lambda a: getattr(a, 'lineno', 0) + objs = sorted(tree.values(), key=lineno_key, reverse=True) + indent_level = 2 + while objs: + obj = objs.pop() + if isinstance(obj, list): + # Value is a __path__ key. + continue + if not hasattr(obj, 'indent'): + obj.indent = 0 + + if isinstance(obj, _Object): + new_objs = sorted(obj.children.values(), + key=lineno_key, reverse=True) + for ob in new_objs: + ob.indent = obj.indent + indent_level + objs.extend(new_objs) + if isinstance(obj, Class): + print("{}class {} {} {}" + .format(' ' * obj.indent, obj.name, obj.super, obj.lineno)) + elif isinstance(obj, Function): + print("{}def {} {}".format(' ' * obj.indent, obj.name, obj.lineno)) + +if __name__ == "__main__": + _main() diff --git a/stdlib/site.py b/stdlib/site.py new file mode 100644 index 000000000..aeb7c6cfc --- /dev/null +++ b/stdlib/site.py @@ -0,0 +1,779 @@ +"""Append module search paths for third-party packages to sys.path. + +**************************************************************** +* This module is automatically imported during initialization. * +**************************************************************** + +This will append site-specific paths to the module search path. On +Unix (including Mac OSX), it starts with sys.prefix and +sys.exec_prefix (if different) and appends +lib/python/site-packages. +On other platforms (such as Windows), it tries each of the +prefixes directly, as well as with lib/site-packages appended. The +resulting directories, if they exist, are appended to sys.path, and +also inspected for path configuration files. + +If a file named "pyvenv.cfg" exists one directory above sys.executable, +sys.prefix and sys.exec_prefix are set to that directory and +it is also checked for site-packages (sys.base_prefix and +sys.base_exec_prefix will always be the "real" prefixes of the Python +installation). If "pyvenv.cfg" (a bootstrap configuration file) contains +the key "include-system-site-packages" set to anything other than "false" +(case-insensitive), the system-level prefixes will still also be +searched for site-packages; otherwise they won't. + +All of the resulting site-specific directories, if they exist, are +appended to sys.path, and also inspected for path configuration +files. + +A path configuration file is a file whose name has the form +.pth; its contents are additional directories (one per line) +to be added to sys.path. Non-existing directories (or +non-directories) are never added to sys.path; no directory is added to +sys.path more than once. Blank lines and lines beginning with +'#' are skipped. Lines starting with 'import' are executed. + +For example, suppose sys.prefix and sys.exec_prefix are set to +/usr/local and there is a directory /usr/local/lib/python2.5/site-packages +with three subdirectories, foo, bar and spam, and two path +configuration files, foo.pth and bar.pth. Assume foo.pth contains the +following: + + # foo package configuration + foo + bar + bletch + +and bar.pth contains: + + # bar package configuration + bar + +Then the following directories are added to sys.path, in this order: + + /usr/local/lib/python2.5/site-packages/bar + /usr/local/lib/python2.5/site-packages/foo + +Note that bletch is omitted because it doesn't exist; bar precedes foo +because bar.pth comes alphabetically before foo.pth; and spam is +omitted because it is not mentioned in either path configuration file. + +The readline module is also automatically configured to enable +completion for systems that support it. This can be overridden in +sitecustomize, usercustomize or PYTHONSTARTUP. Starting Python in +isolated mode (-I) disables automatic readline configuration. + +After these operations, an attempt is made to import a module +named sitecustomize, which can perform arbitrary additional +site-specific customizations. If this import fails with an +ImportError exception, it is silently ignored. +""" + +import sys +import os +import builtins +import _sitebuiltins +import _io as io +import stat +import errno + +# Prefixes for site-packages; add additional prefixes like /usr/local here +PREFIXES = [sys.prefix, sys.exec_prefix] +# Enable per user site-packages directory +# set it to False to disable the feature or True to force the feature +ENABLE_USER_SITE = None + +# for distutils.commands.install +# These values are initialized by the getuserbase() and getusersitepackages() +# functions, through the main() function when Python starts. +USER_SITE = None +USER_BASE = None + + +def _trace(message): + if sys.flags.verbose: + print(message, file=sys.stderr) + + +def _warn(*args, **kwargs): + import warnings + + warnings.warn(*args, **kwargs) + + +def makepath(*paths): + dir = os.path.join(*paths) + try: + dir = os.path.abspath(dir) + except OSError: + pass + return dir, os.path.normcase(dir) + + +def abs_paths(): + """Set all module __file__ and __cached__ attributes to an absolute path""" + for m in set(sys.modules.values()): + loader_module = None + try: + loader_module = m.__loader__.__module__ + except AttributeError: + try: + loader_module = m.__spec__.loader.__module__ + except AttributeError: + pass + if loader_module not in {'_frozen_importlib', '_frozen_importlib_external'}: + continue # don't mess with a PEP 302-supplied __file__ + try: + m.__file__ = os.path.abspath(m.__file__) + except (AttributeError, OSError, TypeError): + pass + try: + m.__cached__ = os.path.abspath(m.__cached__) + except (AttributeError, OSError, TypeError): + pass + + +def removeduppaths(): + """ Remove duplicate entries from sys.path along with making them + absolute""" + # This ensures that the initial path provided by the interpreter contains + # only absolute pathnames, even if we're running from the build directory. + L = [] + known_paths = set() + for dir in sys.path: + # Filter out duplicate paths (on case-insensitive file systems also + # if they only differ in case); turn relative paths into absolute + # paths. + dir, dircase = makepath(dir) + if dircase not in known_paths: + L.append(dir) + known_paths.add(dircase) + sys.path[:] = L + return known_paths + + +def _init_pathinfo(): + """Return a set containing all existing file system items from sys.path.""" + d = set() + for item in sys.path: + try: + if os.path.exists(item): + _, itemcase = makepath(item) + d.add(itemcase) + except TypeError: + continue + return d + + +def addpackage(sitedir, name, known_paths): + """Process a .pth file within the site-packages directory: + For each line in the file, either combine it with sitedir to a path + and add that to known_paths, or execute it if it starts with 'import '. + """ + if known_paths is None: + known_paths = _init_pathinfo() + reset = True + else: + reset = False + fullname = os.path.join(sitedir, name) + try: + st = os.lstat(fullname) + except OSError: + return + if ((getattr(st, 'st_flags', 0) & stat.UF_HIDDEN) or + (getattr(st, 'st_file_attributes', 0) & stat.FILE_ATTRIBUTE_HIDDEN)): + _trace(f"Skipping hidden .pth file: {fullname!r}") + return + _trace(f"Processing .pth file: {fullname!r}") + try: + with io.open_code(fullname) as f: + pth_content = f.read() + except OSError: + return + + try: + # Accept BOM markers in .pth files as we do in source files + # (Windows PowerShell 5.1 makes it hard to emit UTF-8 files without a BOM) + pth_content = pth_content.decode("utf-8-sig") + except UnicodeDecodeError: + # Fallback to locale encoding for backward compatibility. + # We will deprecate this fallback in the future. + import locale + pth_content = pth_content.decode(locale.getencoding()) + _trace(f"Cannot read {fullname!r} as UTF-8. " + f"Using fallback encoding {locale.getencoding()!r}") + + for n, line in enumerate(pth_content.splitlines(), 1): + if line.startswith("#"): + continue + if line.strip() == "": + continue + try: + if line.startswith(("import ", "import\t")): + exec(line) + continue + line = line.rstrip() + dir, dircase = makepath(sitedir, line) + if dircase not in known_paths and os.path.exists(dir): + sys.path.append(dir) + known_paths.add(dircase) + except Exception as exc: + print(f"Error processing line {n:d} of {fullname}:\n", + file=sys.stderr) + import traceback + for record in traceback.format_exception(exc): + for line in record.splitlines(): + print(' '+line, file=sys.stderr) + print("\nRemainder of file ignored", file=sys.stderr) + break + if reset: + known_paths = None + return known_paths + + +def addsitedir(sitedir, known_paths=None): + """Add 'sitedir' argument to sys.path if missing and handle .pth files in + 'sitedir'""" + _trace(f"Adding directory: {sitedir!r}") + if known_paths is None: + known_paths = _init_pathinfo() + reset = True + else: + reset = False + sitedir, sitedircase = makepath(sitedir) + if not sitedircase in known_paths: + sys.path.append(sitedir) # Add path component + known_paths.add(sitedircase) + try: + names = os.listdir(sitedir) + except OSError: + return + names = [name for name in names + if name.endswith(".pth") and not name.startswith(".")] + for name in sorted(names): + addpackage(sitedir, name, known_paths) + if reset: + known_paths = None + return known_paths + + +def check_enableusersite(): + """Check if user site directory is safe for inclusion + + The function tests for the command line flag (including environment var), + process uid/gid equal to effective uid/gid. + + None: Disabled for security reasons + False: Disabled by user (command line option) + True: Safe and enabled + """ + if sys.flags.no_user_site: + return False + + if hasattr(os, "getuid") and hasattr(os, "geteuid"): + # check process uid == effective uid + if os.geteuid() != os.getuid(): + return None + if hasattr(os, "getgid") and hasattr(os, "getegid"): + # check process gid == effective gid + if os.getegid() != os.getgid(): + return None + + return True + + +# NOTE: sysconfig and it's dependencies are relatively large but site module +# needs very limited part of them. +# To speedup startup time, we have copy of them. +# +# See https://bugs.python.org/issue29585 + +# Copy of sysconfig._get_implementation() +def _get_implementation(): + return 'Python' + +# Copy of sysconfig._getuserbase() +def _getuserbase(): + env_base = os.environ.get("PYTHONUSERBASE", None) + if env_base: + return env_base + + # Emscripten, iOS, tvOS, VxWorks, WASI, and watchOS have no home directories + if sys.platform in {"emscripten", "ios", "tvos", "vxworks", "wasi", "watchos"}: + return None + + def joinuser(*args): + return os.path.expanduser(os.path.join(*args)) + + if os.name == "nt": + base = os.environ.get("APPDATA") or "~" + return joinuser(base, _get_implementation()) + + if sys.platform == "darwin" and sys._framework: + return joinuser("~", "Library", sys._framework, + "%d.%d" % sys.version_info[:2]) + + return joinuser("~", ".local") + + +# Same to sysconfig.get_path('purelib', os.name+'_user') +def _get_path(userbase): + version = sys.version_info + if hasattr(sys, 'abiflags') and 't' in sys.abiflags: + abi_thread = 't' + else: + abi_thread = '' + + implementation = _get_implementation() + implementation_lower = implementation.lower() + if os.name == 'nt': + ver_nodot = sys.winver.replace('.', '') + return f'{userbase}\\{implementation}{ver_nodot}\\site-packages' + + if sys.platform == 'darwin' and sys._framework: + return f'{userbase}/lib/{implementation_lower}/site-packages' + + return f'{userbase}/lib/python{version[0]}.{version[1]}{abi_thread}/site-packages' + + +def getuserbase(): + """Returns the `user base` directory path. + + The `user base` directory can be used to store data. If the global + variable ``USER_BASE`` is not initialized yet, this function will also set + it. + """ + global USER_BASE + if USER_BASE is None: + USER_BASE = _getuserbase() + return USER_BASE + + +def getusersitepackages(): + """Returns the user-specific site-packages directory path. + + If the global variable ``USER_SITE`` is not initialized yet, this + function will also set it. + """ + global USER_SITE, ENABLE_USER_SITE + userbase = getuserbase() # this will also set USER_BASE + + if USER_SITE is None: + if userbase is None: + ENABLE_USER_SITE = False # disable user site and return None + else: + USER_SITE = _get_path(userbase) + + return USER_SITE + +def addusersitepackages(known_paths): + """Add a per user site-package to sys.path + + Each user has its own python directory with site-packages in the + home directory. + """ + # get the per user site-package path + # this call will also make sure USER_BASE and USER_SITE are set + _trace("Processing user site-packages") + user_site = getusersitepackages() + + if ENABLE_USER_SITE and os.path.isdir(user_site): + addsitedir(user_site, known_paths) + return known_paths + +def getsitepackages(prefixes=None): + """Returns a list containing all global site-packages directories. + + For each directory present in ``prefixes`` (or the global ``PREFIXES``), + this function will find its `site-packages` subdirectory depending on the + system environment, and will return a list of full paths. + """ + sitepackages = [] + seen = set() + + if prefixes is None: + prefixes = PREFIXES + + for prefix in prefixes: + if not prefix or prefix in seen: + continue + seen.add(prefix) + + implementation = _get_implementation().lower() + ver = sys.version_info + if hasattr(sys, 'abiflags') and 't' in sys.abiflags: + abi_thread = 't' + else: + abi_thread = '' + if os.sep == '/': + libdirs = [sys.platlibdir] + if sys.platlibdir != "lib": + libdirs.append("lib") + + for libdir in libdirs: + path = os.path.join(prefix, libdir, + f"{implementation}{ver[0]}.{ver[1]}{abi_thread}", + "site-packages") + sitepackages.append(path) + else: + sitepackages.append(prefix) + sitepackages.append(os.path.join(prefix, "Lib", "site-packages")) + return sitepackages + +def addsitepackages(known_paths, prefixes=None): + """Add site-packages to sys.path""" + _trace("Processing global site-packages") + for sitedir in getsitepackages(prefixes): + if os.path.isdir(sitedir): + addsitedir(sitedir, known_paths) + + return known_paths + +def setquit(): + """Define new builtins 'quit' and 'exit'. + + These are objects which make the interpreter exit when called. + The repr of each object contains a hint at how it works. + + """ + if os.sep == '\\': + eof = 'Ctrl-Z plus Return' + else: + eof = 'Ctrl-D (i.e. EOF)' + + builtins.quit = _sitebuiltins.Quitter('quit', eof) + builtins.exit = _sitebuiltins.Quitter('exit', eof) + + +def setcopyright(): + """Set 'copyright' and 'credits' in builtins""" + builtins.copyright = _sitebuiltins._Printer("copyright", sys.copyright) + builtins.credits = _sitebuiltins._Printer("credits", """\ +Thanks to CWI, CNRI, BeOpen, Zope Corporation, the Python Software +Foundation, and a cast of thousands for supporting Python +development. See www.python.org for more information.""") + files, dirs = [], [] + # Not all modules are required to have a __file__ attribute. See + # PEP 420 for more details. + here = getattr(sys, '_stdlib_dir', None) + if not here and hasattr(os, '__file__'): + here = os.path.dirname(os.__file__) + if here: + files.extend(["LICENSE.txt", "LICENSE"]) + dirs.extend([os.path.join(here, os.pardir), here, os.curdir]) + builtins.license = _sitebuiltins._Printer( + "license", + "See https://www.python.org/psf/license/", + files, dirs) + + +def sethelper(): + builtins.help = _sitebuiltins._Helper() + + +def gethistoryfile(): + """Check if the PYTHON_HISTORY environment variable is set and define + it as the .python_history file. If PYTHON_HISTORY is not set, use the + default .python_history file. + """ + if not sys.flags.ignore_environment: + history = os.environ.get("PYTHON_HISTORY") + if history: + return history + return os.path.join(os.path.expanduser('~'), + '.python_history') + + +def enablerlcompleter(): + """Enable default readline configuration on interactive prompts, by + registering a sys.__interactivehook__. + """ + sys.__interactivehook__ = register_readline + + +def register_readline(): + """Configure readline completion on interactive prompts. + + If the readline module can be imported, the hook will set the Tab key + as completion key and register ~/.python_history as history file. + This can be overridden in the sitecustomize or usercustomize module, + or in a PYTHONSTARTUP file. + """ + if not sys.flags.ignore_environment: + PYTHON_BASIC_REPL = os.getenv("PYTHON_BASIC_REPL") + else: + PYTHON_BASIC_REPL = False + + import atexit + + try: + try: + import readline + except ImportError: + readline = None + else: + import rlcompleter # noqa: F401 + except ImportError: + return + + try: + if PYTHON_BASIC_REPL: + CAN_USE_PYREPL = False + else: + original_path = sys.path + sys.path = [p for p in original_path if p != ''] + try: + import _pyrepl.readline + if os.name == "nt": + import _pyrepl.windows_console + console_errors = (_pyrepl.windows_console._error,) + else: + import _pyrepl.unix_console + console_errors = _pyrepl.unix_console._error + from _pyrepl.main import CAN_USE_PYREPL + finally: + sys.path = original_path + except ImportError: + return + + if readline is not None: + # Reading the initialization (config) file may not be enough to set a + # completion key, so we set one first and then read the file. + if readline.backend == 'editline': + readline.parse_and_bind('bind ^I rl_complete') + else: + readline.parse_and_bind('tab: complete') + + try: + readline.read_init_file() + except OSError: + # An OSError here could have many causes, but the most likely one + # is that there's no .inputrc file (or .editrc file in the case of + # Mac OS X + libedit) in the expected location. In that case, we + # want to ignore the exception. + pass + + if readline is None or readline.get_current_history_length() == 0: + # If no history was loaded, default to .python_history, + # or PYTHON_HISTORY. + # The guard is necessary to avoid doubling history size at + # each interpreter exit when readline was already configured + # through a PYTHONSTARTUP hook, see: + # http://bugs.python.org/issue5845#msg198636 + history = gethistoryfile() + + if CAN_USE_PYREPL: + readline_module = _pyrepl.readline + exceptions = (OSError, *console_errors) + else: + if readline is None: + return + readline_module = readline + exceptions = OSError + + try: + readline_module.read_history_file(history) + except exceptions: + pass + + def write_history(): + try: + readline_module.write_history_file(history) + except FileNotFoundError, PermissionError: + # home directory does not exist or is not writable + # https://bugs.python.org/issue19891 + pass + except OSError: + if errno.EROFS: + pass # gh-128066: read-only file system + else: + raise + + atexit.register(write_history) + + +def venv(known_paths): + global PREFIXES, ENABLE_USER_SITE + + env = os.environ + if sys.platform == 'darwin' and '__PYVENV_LAUNCHER__' in env: + executable = sys._base_executable = os.environ['__PYVENV_LAUNCHER__'] + else: + executable = sys.executable + exe_dir = os.path.dirname(os.path.abspath(executable)) + site_prefix = os.path.dirname(exe_dir) + sys._home = None + conf_basename = 'pyvenv.cfg' + candidate_conf = next( + ( + conffile for conffile in ( + os.path.join(exe_dir, conf_basename), + os.path.join(site_prefix, conf_basename) + ) + if os.path.isfile(conffile) + ), + None + ) + + if candidate_conf: + virtual_conf = candidate_conf + system_site = "true" + # Issue 25185: Use UTF-8, as that's what the venv module uses when + # writing the file. + with open(virtual_conf, encoding='utf-8') as f: + for line in f: + if '=' in line: + key, _, value = line.partition('=') + key = key.strip().lower() + value = value.strip() + if key == 'include-system-site-packages': + system_site = value.lower() + elif key == 'home': + sys._home = value + + if sys.prefix != site_prefix: + _warn(f'Unexpected value in sys.prefix, expected {site_prefix}, got {sys.prefix}', RuntimeWarning) + if sys.exec_prefix != site_prefix: + _warn(f'Unexpected value in sys.exec_prefix, expected {site_prefix}, got {sys.exec_prefix}', RuntimeWarning) + + # Doing this here ensures venv takes precedence over user-site + addsitepackages(known_paths, [sys.prefix]) + + if system_site == "true": + PREFIXES += [sys.base_prefix, sys.base_exec_prefix] + else: + ENABLE_USER_SITE = False + + return known_paths + + +def execsitecustomize(): + """Run custom site specific code, if available.""" + try: + try: + import sitecustomize # noqa: F401 + except ImportError as exc: + if exc.name == 'sitecustomize': + pass + else: + raise + except Exception as err: + if sys.flags.verbose: + sys.excepthook(*sys.exc_info()) + else: + sys.stderr.write( + "Error in sitecustomize; set PYTHONVERBOSE for traceback:\n" + "%s: %s\n" % + (err.__class__.__name__, err)) + + +def execusercustomize(): + """Run custom user specific code, if available.""" + try: + try: + import usercustomize # noqa: F401 + except ImportError as exc: + if exc.name == 'usercustomize': + pass + else: + raise + except Exception as err: + if sys.flags.verbose: + sys.excepthook(*sys.exc_info()) + else: + sys.stderr.write( + "Error in usercustomize; set PYTHONVERBOSE for traceback:\n" + "%s: %s\n" % + (err.__class__.__name__, err)) + + +def main(): + """Add standard site-specific directories to the module search path. + + This function is called automatically when this module is imported, + unless the python interpreter was started with the -S flag. + """ + global ENABLE_USER_SITE + + orig_path = sys.path[:] + known_paths = removeduppaths() + if orig_path != sys.path: + # removeduppaths() might make sys.path absolute. + # fix __file__ and __cached__ of already imported modules too. + abs_paths() + + known_paths = venv(known_paths) + if ENABLE_USER_SITE is None: + ENABLE_USER_SITE = check_enableusersite() + known_paths = addusersitepackages(known_paths) + known_paths = addsitepackages(known_paths) + setquit() + setcopyright() + sethelper() + if not sys.flags.isolated: + enablerlcompleter() + execsitecustomize() + if ENABLE_USER_SITE: + execusercustomize() + +# Prevent extending of sys.path when python was started with -S and +# site is imported later. +if not sys.flags.no_site: + main() + +def _script(): + help = """\ + %s [--user-base] [--user-site] + + Without arguments print some useful information + With arguments print the value of USER_BASE and/or USER_SITE separated + by '%s'. + + Exit codes with --user-base or --user-site: + 0 - user site directory is enabled + 1 - user site directory is disabled by user + 2 - user site directory is disabled by super user + or for security reasons + >2 - unknown error + """ + args = sys.argv[1:] + if not args: + user_base = getuserbase() + user_site = getusersitepackages() + print("sys.path = [") + for dir in sys.path: + print(" %r," % (dir,)) + print("]") + def exists(path): + if path is not None and os.path.isdir(path): + return "exists" + else: + return "doesn't exist" + print(f"USER_BASE: {user_base!r} ({exists(user_base)})") + print(f"USER_SITE: {user_site!r} ({exists(user_site)})") + print(f"ENABLE_USER_SITE: {ENABLE_USER_SITE!r}") + sys.exit(0) + + buffer = [] + if '--user-base' in args: + buffer.append(USER_BASE) + if '--user-site' in args: + buffer.append(USER_SITE) + + if buffer: + print(os.pathsep.join(buffer)) + if ENABLE_USER_SITE: + sys.exit(0) + elif ENABLE_USER_SITE is False: + sys.exit(1) + elif ENABLE_USER_SITE is None: + sys.exit(2) + else: + sys.exit(3) + else: + import textwrap + print(textwrap.dedent(help % (sys.argv[0], os.pathsep))) + sys.exit(10) + +if __name__ == '__main__': + _script() diff --git a/stdlib/sre_compile.py b/stdlib/sre_compile.py new file mode 100644 index 000000000..f9da61e64 --- /dev/null +++ b/stdlib/sre_compile.py @@ -0,0 +1,7 @@ +import warnings +warnings.warn(f"module {__name__!r} is deprecated", + DeprecationWarning, + stacklevel=2) + +from re import _compiler as _ +globals().update({k: v for k, v in vars(_).items() if k[:2] != '__'}) diff --git a/stdlib/sre_constants.py b/stdlib/sre_constants.py new file mode 100644 index 000000000..fa09d0442 --- /dev/null +++ b/stdlib/sre_constants.py @@ -0,0 +1,7 @@ +import warnings +warnings.warn(f"module {__name__!r} is deprecated", + DeprecationWarning, + stacklevel=2) + +from re import _constants as _ +globals().update({k: v for k, v in vars(_).items() if k[:2] != '__'}) diff --git a/stdlib/sre_parse.py b/stdlib/sre_parse.py new file mode 100644 index 000000000..25a3f557d --- /dev/null +++ b/stdlib/sre_parse.py @@ -0,0 +1,7 @@ +import warnings +warnings.warn(f"module {__name__!r} is deprecated", + DeprecationWarning, + stacklevel=2) + +from re import _parser as _ +globals().update({k: v for k, v in vars(_).items() if k[:2] != '__'}) diff --git a/stdlib/test/lock_tests.py b/stdlib/test/lock_tests.py new file mode 100644 index 000000000..fb11f4828 --- /dev/null +++ b/stdlib/test/lock_tests.py @@ -0,0 +1,1260 @@ +""" +Various tests for synchronization primitives. +""" + +import gc +import sys +import time +from _thread import start_new_thread, TIMEOUT_MAX +import threading +import unittest +import weakref + +from test import support +from test.support import threading_helper + + +requires_fork = unittest.skipUnless(support.has_fork_support, + "platform doesn't support fork " + "(no _at_fork_reinit method)") + + +def wait_threads_blocked(nthread): + # Arbitrary sleep to wait until N threads are blocked, + # like waiting for a lock. + time.sleep(0.010 * nthread) + + +class Bunch(object): + """ + A bunch of threads. + """ + def __init__(self, func, nthread, wait_before_exit=False): + """ + Construct a bunch of `nthread` threads running the same function `func`. + If `wait_before_exit` is True, the threads won't terminate until + do_finish() is called. + """ + self.func = func + self.nthread = nthread + self.started = [] + self.finished = [] + self.exceptions = [] + self._can_exit = not wait_before_exit + self._wait_thread = None + + def task(self): + tid = threading.get_ident() + self.started.append(tid) + try: + self.func() + except BaseException as exc: + self.exceptions.append(exc) + finally: + self.finished.append(tid) + for _ in support.sleeping_retry(support.SHORT_TIMEOUT): + if self._can_exit: + break + + def __enter__(self): + self._wait_thread = threading_helper.wait_threads_exit(support.SHORT_TIMEOUT) + self._wait_thread.__enter__() + + try: + for _ in range(self.nthread): + start_new_thread(self.task, ()) + except: + self._can_exit = True + raise + + for _ in support.sleeping_retry(support.SHORT_TIMEOUT): + if len(self.started) >= self.nthread: + break + + return self + + def __exit__(self, exc_type, exc_value, traceback): + for _ in support.sleeping_retry(support.SHORT_TIMEOUT): + if len(self.finished) >= self.nthread: + break + + # Wait until threads completely exit according to _thread._count() + self._wait_thread.__exit__(None, None, None) + + # Break reference cycle + exceptions = self.exceptions + self.exceptions = None + if exceptions: + raise ExceptionGroup(f"{self.func} threads raised exceptions", + exceptions) + + def do_finish(self): + self._can_exit = True + + +class BaseTestCase(unittest.TestCase): + def setUp(self): + self._threads = threading_helper.threading_setup() + + def tearDown(self): + threading_helper.threading_cleanup(*self._threads) + support.reap_children() + + def assertTimeout(self, actual, expected): + # The waiting and/or time.monotonic() can be imprecise, which + # is why comparing to the expected value would sometimes fail + # (especially under Windows). + self.assertGreaterEqual(actual, expected * 0.6) + # Test nothing insane happened + self.assertLess(actual, expected * 10.0) + + +class BaseLockTests(BaseTestCase): + """ + Tests for both recursive and non-recursive locks. + """ + + def wait_phase(self, phase, expected): + for _ in support.sleeping_retry(support.SHORT_TIMEOUT): + if len(phase) >= expected: + break + self.assertEqual(len(phase), expected) + + def test_constructor(self): + lock = self.locktype() + del lock + + def test_repr(self): + lock = self.locktype() + self.assertRegex(repr(lock), "") + del lock + + def test_locked_repr(self): + lock = self.locktype() + lock.acquire() + self.assertRegex(repr(lock), "") + del lock + + def test_acquire_destroy(self): + lock = self.locktype() + lock.acquire() + del lock + + def test_acquire_release(self): + lock = self.locktype() + lock.acquire() + lock.release() + del lock + + def test_try_acquire(self): + lock = self.locktype() + self.assertTrue(lock.acquire(False)) + lock.release() + + def test_try_acquire_contended(self): + lock = self.locktype() + lock.acquire() + result = [] + def f(): + result.append(lock.acquire(False)) + with Bunch(f, 1): + pass + self.assertFalse(result[0]) + lock.release() + + def test_acquire_contended(self): + lock = self.locktype() + lock.acquire() + def f(): + lock.acquire() + lock.release() + + N = 5 + with Bunch(f, N) as bunch: + # Threads block on lock.acquire() + wait_threads_blocked(N) + self.assertEqual(len(bunch.finished), 0) + + # Threads unblocked + lock.release() + + self.assertEqual(len(bunch.finished), N) + + def test_with(self): + lock = self.locktype() + def f(): + lock.acquire() + lock.release() + + def with_lock(err=None): + with lock: + if err is not None: + raise err + + # Acquire the lock, do nothing, with releases the lock + with lock: + pass + + # Check that the lock is unacquired + with Bunch(f, 1): + pass + + # Acquire the lock, raise an exception, with releases the lock + with self.assertRaises(TypeError): + with lock: + raise TypeError + + # Check that the lock is unacquired even if after an exception + # was raised in the previous "with lock:" block + with Bunch(f, 1): + pass + + def test_thread_leak(self): + # The lock shouldn't leak a Thread instance when used from a foreign + # (non-threading) thread. + lock = self.locktype() + def f(): + lock.acquire() + lock.release() + + # We run many threads in the hope that existing threads ids won't + # be recycled. + with Bunch(f, 15): + pass + + def test_timeout(self): + lock = self.locktype() + # Can't set timeout if not blocking + self.assertRaises(ValueError, lock.acquire, False, 1) + # Invalid timeout values + self.assertRaises(ValueError, lock.acquire, timeout=-100) + self.assertRaises(OverflowError, lock.acquire, timeout=1e100) + self.assertRaises(OverflowError, lock.acquire, timeout=TIMEOUT_MAX + 1) + # TIMEOUT_MAX is ok + lock.acquire(timeout=TIMEOUT_MAX) + lock.release() + t1 = time.monotonic() + self.assertTrue(lock.acquire(timeout=5)) + t2 = time.monotonic() + # Just a sanity test that it didn't actually wait for the timeout. + self.assertLess(t2 - t1, 5) + results = [] + def f(): + t1 = time.monotonic() + results.append(lock.acquire(timeout=0.5)) + t2 = time.monotonic() + results.append(t2 - t1) + with Bunch(f, 1): + pass + self.assertFalse(results[0]) + self.assertTimeout(results[1], 0.5) + + def test_weakref_exists(self): + lock = self.locktype() + ref = weakref.ref(lock) + self.assertIsNotNone(ref()) + + def test_weakref_deleted(self): + lock = self.locktype() + ref = weakref.ref(lock) + del lock + gc.collect() # For PyPy or other GCs. + self.assertIsNone(ref()) + + +class LockTests(BaseLockTests): + """ + Tests for non-recursive, weak locks + (which can be acquired and released from different threads). + """ + def test_reacquire(self): + # Lock needs to be released before re-acquiring. + lock = self.locktype() + phase = [] + + def f(): + lock.acquire() + phase.append(None) + lock.acquire() + phase.append(None) + + with threading_helper.wait_threads_exit(): + # Thread blocked on lock.acquire() + start_new_thread(f, ()) + self.wait_phase(phase, 1) + + # Thread unblocked + lock.release() + self.wait_phase(phase, 2) + + def test_different_thread(self): + # Lock can be released from a different thread. + lock = self.locktype() + lock.acquire() + def f(): + lock.release() + with Bunch(f, 1): + pass + lock.acquire() + lock.release() + + def test_state_after_timeout(self): + # Issue #11618: check that lock is in a proper state after a + # (non-zero) timeout. + lock = self.locktype() + lock.acquire() + self.assertFalse(lock.acquire(timeout=0.01)) + lock.release() + self.assertFalse(lock.locked()) + self.assertTrue(lock.acquire(blocking=False)) + + @requires_fork + def test_at_fork_reinit(self): + def use_lock(lock): + # make sure that the lock still works normally + # after _at_fork_reinit() + lock.acquire() + lock.release() + + # unlocked + lock = self.locktype() + lock._at_fork_reinit() + use_lock(lock) + + # locked: _at_fork_reinit() resets the lock to the unlocked state + lock2 = self.locktype() + lock2.acquire() + lock2._at_fork_reinit() + use_lock(lock2) + + +class RLockTests(BaseLockTests): + """ + Tests for recursive locks. + """ + def test_repr_count(self): + # see gh-134322: check that count values are correct: + # when a rlock is just created, + # in a second thread when rlock is acquired in the main thread. + lock = self.locktype() + self.assertIn("count=0", repr(lock)) + self.assertIn("") + evt.set() + self.assertRegex(repr(evt), r"<\w+\.Event at .*: set>") + + +class ConditionTests(BaseTestCase): + """ + Tests for condition variables. + """ + + def test_acquire(self): + cond = self.condtype() + # Be default we have an RLock: the condition can be acquired multiple + # times. + cond.acquire() + cond.acquire() + cond.release() + cond.release() + lock = threading.Lock() + cond = self.condtype(lock) + cond.acquire() + self.assertFalse(lock.acquire(False)) + cond.release() + self.assertTrue(lock.acquire(False)) + self.assertFalse(cond.acquire(False)) + lock.release() + with cond: + self.assertFalse(lock.acquire(False)) + + def test_unacquired_wait(self): + cond = self.condtype() + self.assertRaises(RuntimeError, cond.wait) + + def test_unacquired_notify(self): + cond = self.condtype() + self.assertRaises(RuntimeError, cond.notify) + + def _check_notify(self, cond): + # Note that this test is sensitive to timing. If the worker threads + # don't execute in a timely fashion, the main thread may think they + # are further along then they are. The main thread therefore issues + # wait_threads_blocked() statements to try to make sure that it doesn't + # race ahead of the workers. + # Secondly, this test assumes that condition variables are not subject + # to spurious wakeups. The absence of spurious wakeups is an implementation + # detail of Condition Variables in current CPython, but in general, not + # a guaranteed property of condition variables as a programming + # construct. In particular, it is possible that this can no longer + # be conveniently guaranteed should their implementation ever change. + ready = [] + results1 = [] + results2 = [] + phase_num = 0 + def f(): + cond.acquire() + ready.append(phase_num) + result = cond.wait() + + cond.release() + results1.append((result, phase_num)) + + cond.acquire() + ready.append(phase_num) + + result = cond.wait() + cond.release() + results2.append((result, phase_num)) + + N = 5 + with Bunch(f, N): + # first wait, to ensure all workers settle into cond.wait() before + # we continue. See issues #8799 and #30727. + for _ in support.sleeping_retry(support.SHORT_TIMEOUT): + if len(ready) >= N: + break + + ready.clear() + self.assertEqual(results1, []) + + # Notify 3 threads at first + count1 = 3 + cond.acquire() + cond.notify(count1) + wait_threads_blocked(count1) + + # Phase 1 + phase_num = 1 + cond.release() + for _ in support.sleeping_retry(support.SHORT_TIMEOUT): + if len(results1) >= count1: + break + + self.assertEqual(results1, [(True, 1)] * count1) + self.assertEqual(results2, []) + + # Wait until awaken workers are blocked on cond.wait() + for _ in support.sleeping_retry(support.SHORT_TIMEOUT): + if len(ready) >= count1 : + break + + # Notify 5 threads: they might be in their first or second wait + cond.acquire() + cond.notify(5) + wait_threads_blocked(N) + + # Phase 2 + phase_num = 2 + cond.release() + for _ in support.sleeping_retry(support.SHORT_TIMEOUT): + if len(results1) + len(results2) >= (N + count1): + break + + count2 = N - count1 + self.assertEqual(results1, [(True, 1)] * count1 + [(True, 2)] * count2) + self.assertEqual(results2, [(True, 2)] * count1) + + # Make sure all workers settle into cond.wait() + for _ in support.sleeping_retry(support.SHORT_TIMEOUT): + if len(ready) >= N: + break + + # Notify all threads: they are all in their second wait + cond.acquire() + cond.notify_all() + wait_threads_blocked(N) + + # Phase 3 + phase_num = 3 + cond.release() + for _ in support.sleeping_retry(support.SHORT_TIMEOUT): + if len(results2) >= N: + break + self.assertEqual(results1, [(True, 1)] * count1 + [(True, 2)] * count2) + self.assertEqual(results2, [(True, 2)] * count1 + [(True, 3)] * count2) + + def test_notify(self): + cond = self.condtype() + self._check_notify(cond) + # A second time, to check internal state is still ok. + self._check_notify(cond) + + def test_timeout(self): + cond = self.condtype() + timeout = 0.5 + results = [] + def f(): + cond.acquire() + t1 = time.monotonic() + result = cond.wait(timeout) + t2 = time.monotonic() + cond.release() + results.append((t2 - t1, result)) + + N = 5 + with Bunch(f, N): + pass + self.assertEqual(len(results), N) + + for dt, result in results: + self.assertTimeout(dt, timeout) + # Note that conceptually (that"s the condition variable protocol) + # a wait() may succeed even if no one notifies us and before any + # timeout occurs. Spurious wakeups can occur. + # This makes it hard to verify the result value. + # In practice, this implementation has no spurious wakeups. + self.assertFalse(result) + + def test_waitfor(self): + cond = self.condtype() + state = 0 + def f(): + with cond: + result = cond.wait_for(lambda: state == 4) + self.assertTrue(result) + self.assertEqual(state, 4) + + with Bunch(f, 1): + for i in range(4): + time.sleep(0.010) + with cond: + state += 1 + cond.notify() + + def test_waitfor_timeout(self): + cond = self.condtype() + state = 0 + success = [] + def f(): + with cond: + dt = time.monotonic() + result = cond.wait_for(lambda : state==4, timeout=0.1) + dt = time.monotonic() - dt + self.assertFalse(result) + self.assertTimeout(dt, 0.1) + success.append(None) + + with Bunch(f, 1): + # Only increment 3 times, so state == 4 is never reached. + for i in range(3): + time.sleep(0.010) + with cond: + state += 1 + cond.notify() + + self.assertEqual(len(success), 1) + + +class BaseSemaphoreTests(BaseTestCase): + """ + Common tests for {bounded, unbounded} semaphore objects. + """ + + def test_constructor(self): + self.assertRaises(ValueError, self.semtype, value = -1) + self.assertRaises(ValueError, self.semtype, value = -sys.maxsize) + + def test_acquire(self): + sem = self.semtype(1) + sem.acquire() + sem.release() + sem = self.semtype(2) + sem.acquire() + sem.acquire() + sem.release() + sem.release() + + def test_acquire_destroy(self): + sem = self.semtype() + sem.acquire() + del sem + + def test_acquire_contended(self): + sem_value = 7 + sem = self.semtype(sem_value) + sem.acquire() + + sem_results = [] + results1 = [] + results2 = [] + phase_num = 0 + + def func(): + sem_results.append(sem.acquire()) + results1.append(phase_num) + + sem_results.append(sem.acquire()) + results2.append(phase_num) + + def wait_count(count): + for _ in support.sleeping_retry(support.SHORT_TIMEOUT): + if len(results1) + len(results2) >= count: + break + + N = 10 + with Bunch(func, N): + # Phase 0 + count1 = sem_value - 1 + wait_count(count1) + self.assertEqual(results1 + results2, [0] * count1) + + # Phase 1 + phase_num = 1 + for i in range(sem_value): + sem.release() + count2 = sem_value + wait_count(count1 + count2) + self.assertEqual(sorted(results1 + results2), + [0] * count1 + [1] * count2) + + # Phase 2 + phase_num = 2 + count3 = (sem_value - 1) + for i in range(count3): + sem.release() + wait_count(count1 + count2 + count3) + self.assertEqual(sorted(results1 + results2), + [0] * count1 + [1] * count2 + [2] * count3) + # The semaphore is still locked + self.assertFalse(sem.acquire(False)) + + # Final release, to let the last thread finish + count4 = 1 + sem.release() + + self.assertEqual(sem_results, + [True] * (count1 + count2 + count3 + count4)) + + def test_multirelease(self): + sem_value = 7 + sem = self.semtype(sem_value) + sem.acquire() + + results1 = [] + results2 = [] + phase_num = 0 + def func(): + sem.acquire() + results1.append(phase_num) + + sem.acquire() + results2.append(phase_num) + + def wait_count(count): + for _ in support.sleeping_retry(support.SHORT_TIMEOUT): + if len(results1) + len(results2) >= count: + break + + with Bunch(func, 10): + # Phase 0 + count1 = sem_value - 1 + wait_count(count1) + self.assertEqual(results1 + results2, [0] * count1) + + # Phase 1 + phase_num = 1 + count2 = sem_value + sem.release(count2) + wait_count(count1 + count2) + self.assertEqual(sorted(results1 + results2), + [0] * count1 + [1] * count2) + + # Phase 2 + phase_num = 2 + count3 = sem_value - 1 + sem.release(count3) + wait_count(count1 + count2 + count3) + self.assertEqual(sorted(results1 + results2), + [0] * count1 + [1] * count2 + [2] * count3) + # The semaphore is still locked + self.assertFalse(sem.acquire(False)) + + # Final release, to let the last thread finish + sem.release() + + def test_try_acquire(self): + sem = self.semtype(2) + self.assertTrue(sem.acquire(False)) + self.assertTrue(sem.acquire(False)) + self.assertFalse(sem.acquire(False)) + sem.release() + self.assertTrue(sem.acquire(False)) + + def test_try_acquire_contended(self): + sem = self.semtype(4) + sem.acquire() + results = [] + def f(): + results.append(sem.acquire(False)) + results.append(sem.acquire(False)) + with Bunch(f, 5): + pass + # There can be a thread switch between acquiring the semaphore and + # appending the result, therefore results will not necessarily be + # ordered. + self.assertEqual(sorted(results), [False] * 7 + [True] * 3 ) + + def test_acquire_timeout(self): + sem = self.semtype(2) + self.assertRaises(ValueError, sem.acquire, False, timeout=1.0) + self.assertTrue(sem.acquire(timeout=0.005)) + self.assertTrue(sem.acquire(timeout=0.005)) + self.assertFalse(sem.acquire(timeout=0.005)) + sem.release() + self.assertTrue(sem.acquire(timeout=0.005)) + t = time.monotonic() + self.assertFalse(sem.acquire(timeout=0.5)) + dt = time.monotonic() - t + self.assertTimeout(dt, 0.5) + + def test_default_value(self): + # The default initial value is 1. + sem = self.semtype() + sem.acquire() + def f(): + sem.acquire() + sem.release() + + with Bunch(f, 1) as bunch: + # Thread blocked on sem.acquire() + wait_threads_blocked(1) + self.assertFalse(bunch.finished) + + # Thread unblocked + sem.release() + + def test_with(self): + sem = self.semtype(2) + def _with(err=None): + with sem: + self.assertTrue(sem.acquire(False)) + sem.release() + with sem: + self.assertFalse(sem.acquire(False)) + if err: + raise err + _with() + self.assertTrue(sem.acquire(False)) + sem.release() + self.assertRaises(TypeError, _with, TypeError) + self.assertTrue(sem.acquire(False)) + sem.release() + +class SemaphoreTests(BaseSemaphoreTests): + """ + Tests for unbounded semaphores. + """ + + def test_release_unacquired(self): + # Unbounded releases are allowed and increment the semaphore's value + sem = self.semtype(1) + sem.release() + sem.acquire() + sem.acquire() + sem.release() + + def test_repr(self): + sem = self.semtype(3) + self.assertRegex(repr(sem), r"<\w+\.Semaphore at .*: value=3>") + sem.acquire() + self.assertRegex(repr(sem), r"<\w+\.Semaphore at .*: value=2>") + sem.release() + sem.release() + self.assertRegex(repr(sem), r"<\w+\.Semaphore at .*: value=4>") + + +class BoundedSemaphoreTests(BaseSemaphoreTests): + """ + Tests for bounded semaphores. + """ + + def test_release_unacquired(self): + # Cannot go past the initial value + sem = self.semtype() + self.assertRaises(ValueError, sem.release) + sem.acquire() + sem.release() + self.assertRaises(ValueError, sem.release) + + def test_repr(self): + sem = self.semtype(3) + self.assertRegex(repr(sem), r"<\w+\.BoundedSemaphore at .*: value=3/3>") + sem.acquire() + self.assertRegex(repr(sem), r"<\w+\.BoundedSemaphore at .*: value=2/3>") + + +class BarrierTests(BaseTestCase): + """ + Tests for Barrier objects. + """ + N = 5 + defaultTimeout = 2.0 + + def setUp(self): + self.barrier = self.barriertype(self.N, timeout=self.defaultTimeout) + + def tearDown(self): + self.barrier.abort() + + def run_threads(self, f): + with Bunch(f, self.N): + pass + + def multipass(self, results, n): + m = self.barrier.parties + self.assertEqual(m, self.N) + for i in range(n): + results[0].append(True) + self.assertEqual(len(results[1]), i * m) + self.barrier.wait() + results[1].append(True) + self.assertEqual(len(results[0]), (i + 1) * m) + self.barrier.wait() + self.assertEqual(self.barrier.n_waiting, 0) + self.assertFalse(self.barrier.broken) + + def test_constructor(self): + self.assertRaises(ValueError, self.barriertype, parties=0) + self.assertRaises(ValueError, self.barriertype, parties=-1) + + def test_barrier(self, passes=1): + """ + Test that a barrier is passed in lockstep + """ + results = [[],[]] + def f(): + self.multipass(results, passes) + self.run_threads(f) + + def test_barrier_10(self): + """ + Test that a barrier works for 10 consecutive runs + """ + return self.test_barrier(10) + + def test_wait_return(self): + """ + test the return value from barrier.wait + """ + results = [] + def f(): + r = self.barrier.wait() + results.append(r) + + self.run_threads(f) + self.assertEqual(sum(results), sum(range(self.N))) + + def test_action(self): + """ + Test the 'action' callback + """ + results = [] + def action(): + results.append(True) + barrier = self.barriertype(self.N, action) + def f(): + barrier.wait() + self.assertEqual(len(results), 1) + + self.run_threads(f) + + def test_abort(self): + """ + Test that an abort will put the barrier in a broken state + """ + results1 = [] + results2 = [] + def f(): + try: + i = self.barrier.wait() + if i == self.N//2: + raise RuntimeError + self.barrier.wait() + results1.append(True) + except threading.BrokenBarrierError: + results2.append(True) + except RuntimeError: + self.barrier.abort() + pass + + self.run_threads(f) + self.assertEqual(len(results1), 0) + self.assertEqual(len(results2), self.N-1) + self.assertTrue(self.barrier.broken) + + def test_reset(self): + """ + Test that a 'reset' on a barrier frees the waiting threads + """ + results1 = [] + results2 = [] + results3 = [] + def f(): + i = self.barrier.wait() + if i == self.N//2: + # Wait until the other threads are all in the barrier. + for _ in support.sleeping_retry(support.SHORT_TIMEOUT): + if self.barrier.n_waiting >= (self.N - 1): + break + self.barrier.reset() + else: + try: + self.barrier.wait() + results1.append(True) + except threading.BrokenBarrierError: + results2.append(True) + # Now, pass the barrier again + self.barrier.wait() + results3.append(True) + + self.run_threads(f) + self.assertEqual(len(results1), 0) + self.assertEqual(len(results2), self.N-1) + self.assertEqual(len(results3), self.N) + + + def test_abort_and_reset(self): + """ + Test that a barrier can be reset after being broken. + """ + results1 = [] + results2 = [] + results3 = [] + barrier2 = self.barriertype(self.N) + def f(): + try: + i = self.barrier.wait() + if i == self.N//2: + raise RuntimeError + self.barrier.wait() + results1.append(True) + except threading.BrokenBarrierError: + results2.append(True) + except RuntimeError: + self.barrier.abort() + pass + # Synchronize and reset the barrier. Must synchronize first so + # that everyone has left it when we reset, and after so that no + # one enters it before the reset. + if barrier2.wait() == self.N//2: + self.barrier.reset() + barrier2.wait() + self.barrier.wait() + results3.append(True) + + self.run_threads(f) + self.assertEqual(len(results1), 0) + self.assertEqual(len(results2), self.N-1) + self.assertEqual(len(results3), self.N) + + def test_timeout(self): + """ + Test wait(timeout) + """ + def f(): + i = self.barrier.wait() + if i == self.N // 2: + # One thread is late! + time.sleep(self.defaultTimeout / 2) + # Default timeout is 2.0, so this is shorter. + self.assertRaises(threading.BrokenBarrierError, + self.barrier.wait, self.defaultTimeout / 4) + self.run_threads(f) + + def test_default_timeout(self): + """ + Test the barrier's default timeout + """ + timeout = 0.100 + barrier = self.barriertype(2, timeout=timeout) + def f(): + self.assertRaises(threading.BrokenBarrierError, + barrier.wait) + + start_time = time.monotonic() + with Bunch(f, 1): + pass + dt = time.monotonic() - start_time + self.assertGreaterEqual(dt, timeout) + + def test_single_thread(self): + b = self.barriertype(1) + b.wait() + b.wait() + + def test_repr(self): + barrier = self.barriertype(3) + timeout = support.LONG_TIMEOUT + self.assertRegex(repr(barrier), r"<\w+\.Barrier at .*: waiters=0/3>") + def f(): + barrier.wait(timeout) + + N = 2 + with Bunch(f, N): + # Threads blocked on barrier.wait() + for _ in support.sleeping_retry(support.SHORT_TIMEOUT): + if barrier.n_waiting >= N: + break + self.assertRegex(repr(barrier), + r"<\w+\.Barrier at .*: waiters=2/3>") + + # Threads unblocked + barrier.wait(timeout) + + self.assertRegex(repr(barrier), + r"<\w+\.Barrier at .*: waiters=0/3>") + + # Abort the barrier + barrier.abort() + self.assertRegex(repr(barrier), + r"<\w+\.Barrier at .*: broken>") diff --git a/stdlib/test/pyclbr_input.py b/stdlib/test/pyclbr_input.py new file mode 100644 index 000000000..5535edbfa --- /dev/null +++ b/stdlib/test/pyclbr_input.py @@ -0,0 +1,85 @@ +"""Test cases for test_pyclbr.py""" + +def f(): pass + +class Other(object): + @classmethod + def foo(c): pass + + def om(self): pass + +class B (object): + def bm(self): pass + +class C (B): + d = 10 + + # This one is correctly considered by both test_pyclbr.py and pyclbr.py + # as a non-method of C. + foo = Other().foo + + # This causes test_pyclbr.py to fail, but only because the + # introspection-based is_method() code in the test can't + # distinguish between this and a genuine method function like m(). + # + # The pyclbr.py module gets this right as it parses the text. + om = Other.om + f = f + + def m(self): pass + + @staticmethod + def sm(self): pass + + @classmethod + def cm(self): pass + +# Check that mangling is correctly handled + +class a: + def a(self): pass + def _(self): pass + def _a(self): pass + def __(self): pass + def ___(self): pass + def __a(self): pass + +class _: + def a(self): pass + def _(self): pass + def _a(self): pass + def __(self): pass + def ___(self): pass + def __a(self): pass + +class __: + def a(self): pass + def _(self): pass + def _a(self): pass + def __(self): pass + def ___(self): pass + def __a(self): pass + +class ___: + def a(self): pass + def _(self): pass + def _a(self): pass + def __(self): pass + def ___(self): pass + def __a(self): pass + +class _a: + def a(self): pass + def _(self): pass + def _a(self): pass + def __(self): pass + def ___(self): pass + def __a(self): pass + +class __a: + def a(self): pass + def _(self): pass + def _a(self): pass + def __(self): pass + def ___(self): pass + def __a(self): pass diff --git a/stdlib/test/relimport.py b/stdlib/test/relimport.py new file mode 100644 index 000000000..50aa497f7 --- /dev/null +++ b/stdlib/test/relimport.py @@ -0,0 +1 @@ +from .test_import import * diff --git a/stdlib/test/test_import/__init__.py b/stdlib/test/test_import/__init__.py new file mode 100644 index 000000000..2e1c6d72f --- /dev/null +++ b/stdlib/test/test_import/__init__.py @@ -0,0 +1,3407 @@ +import builtins +import errno +import glob +import json +import importlib.util +from importlib._bootstrap_external import _get_sourcefile +from importlib.machinery import ( + AppleFrameworkLoader, + BuiltinImporter, + ExtensionFileLoader, + FrozenImporter, + SourceFileLoader, +) +import marshal +import os +import py_compile +import random +import shutil +import stat +import subprocess +import sys +import textwrap +import threading +import time +import types +import unittest +from unittest import mock +import _imp + +from test.support import os_helper +from test.support import ( + STDLIB_DIR, + swap_attr, + swap_item, + cpython_only, + is_apple_mobile, + is_emscripten, + is_wasm32, + run_in_subinterp, + run_in_subinterp_with_config, + Py_TRACE_REFS, + requires_gil_enabled, + Py_GIL_DISABLED, + no_rerun, + force_not_colorized_test_class, + catch_unraisable_exception +) +from test.support.import_helper import ( + forget, make_legacy_pyc, unlink, unload, ready_to_import, + DirsOnSysPath, CleanImport, import_module) +from test.support.os_helper import ( + TESTFN, rmtree, temp_umask, TESTFN_UNENCODABLE) +from test.support import script_helper +from test.support import threading_helper +from test.test_importlib.util import uncache +from types import ModuleType +try: + import _testsinglephase +except ImportError: + _testsinglephase = None +try: + import _testmultiphase +except ImportError: + _testmultiphase = None +try: + import _interpreters +except ModuleNotFoundError: + _interpreters = None +try: + import _testinternalcapi +except ImportError: + _testinternalcapi = None + + +skip_if_dont_write_bytecode = unittest.skipIf( + sys.dont_write_bytecode, + "test meaningful only when writing bytecode") + + +def _require_loader(module, loader, skip): + if isinstance(module, str): + module = __import__(module) + + MODULE_KINDS = { + BuiltinImporter: 'built-in', + ExtensionFileLoader: 'extension', + AppleFrameworkLoader: 'framework extension', + FrozenImporter: 'frozen', + SourceFileLoader: 'pure Python', + } + + expected = loader + assert isinstance(expected, type), expected + expected = MODULE_KINDS[expected] + + actual = module.__spec__.loader + if not isinstance(actual, type): + actual = type(actual) + actual = MODULE_KINDS[actual] + + if actual != expected: + err = f'expected module to be {expected}, got {module.__spec__}' + if skip: + raise unittest.SkipTest(err) + raise Exception(err) + return module + +def require_builtin(module, *, skip=False): + module = _require_loader(module, BuiltinImporter, skip) + assert module.__spec__.origin == 'built-in', module.__spec__ + +def require_extension(module, *, skip=False): + # Apple extensions must be distributed as frameworks. This requires + # a specialist loader. + if is_apple_mobile: + _require_loader(module, AppleFrameworkLoader, skip) + else: + _require_loader(module, ExtensionFileLoader, skip) + +def require_frozen(module, *, skip=True): + module = _require_loader(module, FrozenImporter, skip) + assert module.__spec__.origin == 'frozen', module.__spec__ + +def require_pure_python(module, *, skip=False): + _require_loader(module, SourceFileLoader, skip) + +def create_extension_loader(modname, filename): + # Apple extensions must be distributed as frameworks. This requires + # a specialist loader. + if is_apple_mobile: + return AppleFrameworkLoader(modname, filename) + else: + return ExtensionFileLoader(modname, filename) + +def import_extension_from_file(modname, filename, *, put_in_sys_modules=True): + loader = create_extension_loader(modname, filename) + spec = importlib.util.spec_from_loader(modname, loader) + module = importlib.util.module_from_spec(spec) + loader.exec_module(module) + if put_in_sys_modules: + sys.modules[modname] = module + return module + + +def remove_files(name): + for f in (name + ".py", + name + ".pyc", + name + ".pyw", + name + "$py.class"): + unlink(f) + rmtree('__pycache__') + + +if _testsinglephase is not None: + def restore__testsinglephase(*, _orig=_testsinglephase): + # We started with the module imported and want to restore + # it to its nominal state. + sys.modules.pop('_testsinglephase', None) + _orig._clear_globals() + origin = _orig.__spec__.origin + _testinternalcapi.clear_extension('_testsinglephase', origin) + import _testsinglephase + + +def requires_singlephase_init(meth): + """Decorator to skip if single-phase init modules are not supported.""" + if not isinstance(meth, type): + def meth(self, _meth=meth): + try: + return _meth(self) + finally: + restore__testsinglephase() + meth = cpython_only(meth) + msg = "gh-117694: free-threaded build does not currently support single-phase init modules in sub-interpreters" + meth = requires_gil_enabled(msg)(meth) + return unittest.skipIf(_testsinglephase is None, + 'test requires _testsinglephase module')(meth) + + +def requires_subinterpreters(meth): + """Decorator to skip a test if subinterpreters are not supported.""" + return unittest.skipIf(_interpreters is None, + 'subinterpreters required')(meth) + + +class ModuleSnapshot(types.SimpleNamespace): + """A representation of a module for testing. + + Fields: + + * id - the module's object ID + * module - the actual module or an adequate substitute + * __file__ + * __spec__ + * name + * origin + * ns - a copy (dict) of the module's __dict__ (or None) + * ns_id - the object ID of the module's __dict__ + * cached - the sys.modules[mod.__spec__.name] entry (or None) + * cached_id - the object ID of the sys.modules entry (or None) + + In cases where the value is not available (e.g. due to serialization), + the value will be None. + """ + _fields = tuple('id module ns ns_id cached cached_id'.split()) + + @classmethod + def from_module(cls, mod): + name = mod.__spec__.name + cached = sys.modules.get(name) + return cls( + id=id(mod), + module=mod, + ns=types.SimpleNamespace(**mod.__dict__), + ns_id=id(mod.__dict__), + cached=cached, + cached_id=id(cached), + ) + + SCRIPT = textwrap.dedent(''' + {imports} + + name = {name!r} + + {prescript} + + mod = {name} + + {body} + + {postscript} + ''') + IMPORTS = textwrap.dedent(''' + import sys + ''').strip() + SCRIPT_BODY = textwrap.dedent(''' + # Capture the snapshot data. + cached = sys.modules.get(name) + snapshot = dict( + id=id(mod), + module=dict( + __file__=mod.__file__, + __spec__=dict( + name=mod.__spec__.name, + origin=mod.__spec__.origin, + ), + ), + ns=None, + ns_id=id(mod.__dict__), + cached=None, + cached_id=id(cached) if cached else None, + ) + ''').strip() + CLEANUP_SCRIPT = textwrap.dedent(''' + # Clean up the module. + sys.modules.pop(name, None) + ''').strip() + + @classmethod + def build_script(cls, name, *, + prescript=None, + import_first=False, + postscript=None, + postcleanup=False, + ): + if postcleanup is True: + postcleanup = cls.CLEANUP_SCRIPT + elif isinstance(postcleanup, str): + postcleanup = textwrap.dedent(postcleanup).strip() + postcleanup = cls.CLEANUP_SCRIPT + os.linesep + postcleanup + else: + postcleanup = '' + prescript = textwrap.dedent(prescript).strip() if prescript else '' + postscript = textwrap.dedent(postscript).strip() if postscript else '' + + if postcleanup: + if postscript: + postscript = postscript + os.linesep * 2 + postcleanup + else: + postscript = postcleanup + + if import_first: + prescript += textwrap.dedent(f''' + + # Now import the module. + assert name not in sys.modules + import {name}''') + + return cls.SCRIPT.format( + imports=cls.IMPORTS.strip(), + name=name, + prescript=prescript.strip(), + body=cls.SCRIPT_BODY.strip(), + postscript=postscript, + ) + + @classmethod + def parse(cls, text): + raw = json.loads(text) + mod = raw['module'] + mod['__spec__'] = types.SimpleNamespace(**mod['__spec__']) + raw['module'] = types.SimpleNamespace(**mod) + return cls(**raw) + + @classmethod + def from_subinterp(cls, name, interpid=None, *, pipe=None, **script_kwds): + if pipe is not None: + return cls._from_subinterp(name, interpid, pipe, script_kwds) + pipe = os.pipe() + try: + return cls._from_subinterp(name, interpid, pipe, script_kwds) + finally: + r, w = pipe + os.close(r) + os.close(w) + + @classmethod + def _from_subinterp(cls, name, interpid, pipe, script_kwargs): + r, w = pipe + + # Build the script. + postscript = textwrap.dedent(f''' + # Send the result over the pipe. + import json + import os + os.write({w}, json.dumps(snapshot).encode()) + + ''') + _postscript = script_kwargs.get('postscript') + if _postscript: + _postscript = textwrap.dedent(_postscript).lstrip() + postscript += _postscript + script_kwargs['postscript'] = postscript.strip() + script = cls.build_script(name, **script_kwargs) + + # Run the script. + if interpid is None: + ret = run_in_subinterp(script) + if ret != 0: + raise AssertionError(f'{ret} != 0') + else: + _interpreters.run_string(interpid, script) + + # Parse the results. + text = os.read(r, 1000) + return cls.parse(text.decode()) + + +@force_not_colorized_test_class +class ImportTests(unittest.TestCase): + + def setUp(self): + remove_files(TESTFN) + importlib.invalidate_caches() + + def tearDown(self): + unload(TESTFN) + + def test_import_raises_ModuleNotFoundError(self): + with self.assertRaises(ModuleNotFoundError): + import something_that_should_not_exist_anywhere + + def test_from_import_missing_module_raises_ModuleNotFoundError(self): + with self.assertRaises(ModuleNotFoundError): + from something_that_should_not_exist_anywhere import blah + + def test_from_import_missing_attr_raises_ImportError(self): + with self.assertRaises(ImportError): + from importlib import something_that_should_not_exist_anywhere + + def test_from_import_missing_attr_has_name_and_path(self): + with CleanImport('os'): + import os + with self.assertRaises(ImportError) as cm: + from os import i_dont_exist + self.assertEqual(cm.exception.name, 'os') + self.assertEqual(cm.exception.path, os.__file__) + self.assertRegex(str(cm.exception), r"cannot import name 'i_dont_exist' from 'os' \(.*os.py\)") + + @cpython_only + def test_from_import_missing_attr_has_name_and_so_path(self): + _testcapi = import_module("_testcapi") + with self.assertRaises(ImportError) as cm: + from _testcapi import i_dont_exist + self.assertEqual(cm.exception.name, '_testcapi') + if hasattr(_testcapi, "__file__"): + # The path on the exception is strictly the spec origin, not the + # module's __file__. For most cases, these are the same; but on + # iOS, the Framework relocation process results in the exception + # being raised from the spec location. + self.assertEqual(cm.exception.path, _testcapi.__spec__.origin) + self.assertRegex( + str(cm.exception), + r"cannot import name 'i_dont_exist' from '_testcapi' \(.*(\.(so|pyd))?\)" + ) + else: + self.assertEqual( + str(cm.exception), + "cannot import name 'i_dont_exist' from '_testcapi' (unknown location)" + ) + + def test_from_import_missing_attr_has_name(self): + with self.assertRaises(ImportError) as cm: + # _warning has no path as it's a built-in module. + from _warning import i_dont_exist + self.assertEqual(cm.exception.name, '_warning') + self.assertIsNone(cm.exception.path) + + def test_from_import_missing_attr_path_is_canonical(self): + with self.assertRaises(ImportError) as cm: + from os.path import i_dont_exist + self.assertIn(cm.exception.name, {'posixpath', 'ntpath'}) + self.assertIsNotNone(cm.exception) + + def test_from_import_star_invalid_type(self): + import re + with ready_to_import() as (name, path): + with open(path, 'w', encoding='utf-8') as f: + f.write("__all__ = [b'invalid_type']") + globals = {} + with self.assertRaisesRegex( + TypeError, f"{re.escape(name)}\\.__all__ must be str" + ): + exec(f"from {name} import *", globals) + self.assertNotIn(b"invalid_type", globals) + with ready_to_import() as (name, path): + with open(path, 'w', encoding='utf-8') as f: + f.write("globals()[b'invalid_type'] = object()") + globals = {} + with self.assertRaisesRegex( + TypeError, f"{re.escape(name)}\\.__dict__ must be str" + ): + exec(f"from {name} import *", globals) + self.assertNotIn(b"invalid_type", globals) + + def test_case_sensitivity(self): + # Brief digression to test that import is case-sensitive: if we got + # this far, we know for sure that "random" exists. + with self.assertRaises(ImportError): + import RAnDoM + + def test_double_const(self): + # Importing double_const checks that float constants + # serialized by marshal as PYC files don't lose precision + # (SF bug 422177). + from test.test_import.data import double_const + unload('test.test_import.data.double_const') + from test.test_import.data import double_const # noqa: F811 + + def test_import(self): + def test_with_extension(ext): + # The extension is normally ".py", perhaps ".pyw". + source = TESTFN + ext + pyc = TESTFN + ".pyc" + + with open(source, "w", encoding='utf-8') as f: + print("# This tests Python's ability to import a", + ext, "file.", file=f) + a = random.randrange(1000) + b = random.randrange(1000) + print("a =", a, file=f) + print("b =", b, file=f) + + if TESTFN in sys.modules: + del sys.modules[TESTFN] + importlib.invalidate_caches() + try: + try: + mod = __import__(TESTFN) + except ImportError as err: + self.fail("import from %s failed: %s" % (ext, err)) + + self.assertEqual(mod.a, a, + "module loaded (%s) but contents invalid" % mod) + self.assertEqual(mod.b, b, + "module loaded (%s) but contents invalid" % mod) + finally: + forget(TESTFN) + unlink(source) + unlink(pyc) + + sys.path.insert(0, os.curdir) + try: + test_with_extension(".py") + if sys.platform.startswith("win"): + for ext in [".PY", ".Py", ".pY", ".pyw", ".PYW", ".pYw"]: + test_with_extension(ext) + finally: + del sys.path[0] + + def test_module_with_large_stack(self, module='longlist'): + # Regression test for http://bugs.python.org/issue561858. + filename = module + '.py' + + # Create a file with a list of 65000 elements. + with open(filename, 'w', encoding='utf-8') as f: + f.write('d = [\n') + for i in range(65000): + f.write('"",\n') + f.write(']') + + try: + # Compile & remove .py file; we only need .pyc. + # Bytecode must be relocated from the PEP 3147 bytecode-only location. + py_compile.compile(filename) + finally: + unlink(filename) + + # Need to be able to load from current dir. + sys.path.append('') + importlib.invalidate_caches() + + namespace = {} + try: + make_legacy_pyc(filename) + # This used to crash. + exec('import ' + module, None, namespace) + finally: + # Cleanup. + del sys.path[-1] + unlink(filename + 'c') + unlink(filename + 'o') + + # Remove references to the module (unload the module) + namespace.clear() + try: + del sys.modules[module] + except KeyError: + pass + + def test_failing_import_sticks(self): + source = TESTFN + ".py" + with open(source, "w", encoding='utf-8') as f: + print("a = 1/0", file=f) + + # New in 2.4, we shouldn't be able to import that no matter how often + # we try. + sys.path.insert(0, os.curdir) + importlib.invalidate_caches() + if TESTFN in sys.modules: + del sys.modules[TESTFN] + try: + for i in [1, 2, 3]: + self.assertRaises(ZeroDivisionError, __import__, TESTFN) + self.assertNotIn(TESTFN, sys.modules, + "damaged module in sys.modules on %i try" % i) + finally: + del sys.path[0] + remove_files(TESTFN) + + def test_import_name_binding(self): + # import x.y.z binds x in the current namespace + import test as x + import test.support + self.assertIs(x, test, x.__name__) + self.assertHasAttr(test.support, "__file__") + + # import x.y.z as w binds z as w + import test.support as y + self.assertIs(y, test.support, y.__name__) + + def test_issue31286(self): + # import in a 'finally' block resulted in SystemError + try: + x = ... + finally: + import test.support.script_helper as x + + # import in a 'while' loop resulted in stack overflow + i = 0 + while i < 10: + import test.support.script_helper as x + i += 1 + + # import in a 'for' loop resulted in segmentation fault + for i in range(2): + import test.support.script_helper as x # noqa: F811 + + def test_failing_reload(self): + # A failing reload should leave the module object in sys.modules. + source = TESTFN + os.extsep + "py" + with open(source, "w", encoding='utf-8') as f: + f.write("a = 1\nb=2\n") + + sys.path.insert(0, os.curdir) + try: + mod = __import__(TESTFN) + self.assertIn(TESTFN, sys.modules) + self.assertEqual(mod.a, 1, "module has wrong attribute values") + self.assertEqual(mod.b, 2, "module has wrong attribute values") + + # On WinXP, just replacing the .py file wasn't enough to + # convince reload() to reparse it. Maybe the timestamp didn't + # move enough. We force it to get reparsed by removing the + # compiled file too. + remove_files(TESTFN) + + # Now damage the module. + with open(source, "w", encoding='utf-8') as f: + f.write("a = 10\nb=20//0\n") + + self.assertRaises(ZeroDivisionError, importlib.reload, mod) + # But we still expect the module to be in sys.modules. + mod = sys.modules.get(TESTFN) + self.assertIsNotNone(mod, "expected module to be in sys.modules") + + # We should have replaced a w/ 10, but the old b value should + # stick. + self.assertEqual(mod.a, 10, "module has wrong attribute values") + self.assertEqual(mod.b, 2, "module has wrong attribute values") + + finally: + del sys.path[0] + remove_files(TESTFN) + unload(TESTFN) + + @skip_if_dont_write_bytecode + def test_file_to_source(self): + # check if __file__ points to the source file where available + source = TESTFN + ".py" + with open(source, "w", encoding='utf-8') as f: + f.write("test = None\n") + + sys.path.insert(0, os.curdir) + try: + mod = __import__(TESTFN) + self.assertEndsWith(mod.__file__, '.py') + os.remove(source) + del sys.modules[TESTFN] + make_legacy_pyc(source) + importlib.invalidate_caches() + mod = __import__(TESTFN) + base, ext = os.path.splitext(mod.__file__) + self.assertEqual(ext, '.pyc') + finally: + del sys.path[0] + remove_files(TESTFN) + if TESTFN in sys.modules: + del sys.modules[TESTFN] + + def test_import_by_filename(self): + path = os.path.abspath(TESTFN) + encoding = sys.getfilesystemencoding() + try: + path.encode(encoding) + except UnicodeEncodeError: + self.skipTest('path is not encodable to {}'.format(encoding)) + with self.assertRaises(ImportError) as c: + __import__(path) + + def test_import_in_del_does_not_crash(self): + # Issue 4236 + testfn = script_helper.make_script('', TESTFN, textwrap.dedent("""\ + import sys + class C: + def __del__(self): + import importlib + sys.argv.insert(0, C()) + """)) + script_helper.assert_python_ok(testfn) + + @skip_if_dont_write_bytecode + def test_timestamp_overflow(self): + # A modification timestamp larger than 2**32 should not be a problem + # when importing a module (issue #11235). + sys.path.insert(0, os.curdir) + try: + source = TESTFN + ".py" + compiled = importlib.util.cache_from_source(source) + with open(source, 'w', encoding='utf-8') as f: + pass + try: + os.utime(source, (2 ** 33 - 5, 2 ** 33 - 5)) + except OverflowError: + self.skipTest("cannot set modification time to large integer") + except OSError as e: + if e.errno not in (getattr(errno, 'EOVERFLOW', None), + getattr(errno, 'EINVAL', None)): + raise + self.skipTest("cannot set modification time to large integer ({})".format(e)) + __import__(TESTFN) + # The pyc file was created. + os.stat(compiled) + finally: + del sys.path[0] + remove_files(TESTFN) + + def test_bogus_fromlist(self): + try: + __import__('http', fromlist=['blah']) + except ImportError: + self.fail("fromlist must allow bogus names") + + @cpython_only + def test_delete_builtins_import(self): + args = ["-c", "del __builtins__.__import__; import os"] + popen = script_helper.spawn_python(*args) + stdout, stderr = popen.communicate() + self.assertIn(b"ImportError", stdout) + + def test_from_import_message_for_nonexistent_module(self): + with self.assertRaisesRegex(ImportError, "^No module named 'bogus'"): + from bogus import foo + + def test_from_import_message_for_existing_module(self): + with self.assertRaisesRegex(ImportError, "^cannot import name 'bogus'"): + from re import bogus + + def test_from_import_AttributeError(self): + # Issue #24492: trying to import an attribute that raises an + # AttributeError should lead to an ImportError. + class AlwaysAttributeError: + def __getattr__(self, _): + raise AttributeError + + module_name = 'test_from_import_AttributeError' + self.addCleanup(unload, module_name) + sys.modules[module_name] = AlwaysAttributeError() + with self.assertRaises(ImportError) as cm: + from test_from_import_AttributeError import does_not_exist + + self.assertEqual(str(cm.exception), + "cannot import name 'does_not_exist' from '' (unknown location)") + + @cpython_only + def test_issue31492(self): + # There shouldn't be an assertion failure in case of failing to import + # from a module with a bad __name__ attribute, or in case of failing + # to access an attribute of such a module. + with swap_attr(os, '__name__', None): + with self.assertRaises(ImportError): + from os import does_not_exist + + with self.assertRaises(AttributeError): + os.does_not_exist + + @threading_helper.requires_working_threading() + def test_concurrency(self): + # bpo 38091: this is a hack to slow down the code that calls + # has_deadlock(); the logic was itself sometimes deadlocking. + def delay_has_deadlock(frame, event, arg): + if event == 'call' and frame.f_code.co_name == 'has_deadlock': + time.sleep(0.1) + + sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'data')) + try: + exc = None + def run(): + sys.settrace(delay_has_deadlock) + event.wait() + try: + import package + except BaseException as e: + nonlocal exc + exc = e + sys.settrace(None) + + for i in range(10): + event = threading.Event() + threads = [threading.Thread(target=run) for x in range(2)] + try: + with threading_helper.start_threads(threads, event.set): + time.sleep(0) + finally: + sys.modules.pop('package', None) + sys.modules.pop('package.submodule', None) + if exc is not None: + raise exc + finally: + del sys.path[0] + + @unittest.skipUnless(sys.platform == "win32", "Windows-specific") + def test_dll_dependency_import(self): + from _winapi import GetModuleFileName + dllname = GetModuleFileName(sys.dllhandle) + pydname = importlib.util.find_spec("_sqlite3").origin + depname = os.path.join( + os.path.dirname(pydname), + "sqlite3{}.dll".format("_d" if "_d" in pydname else "")) + + with os_helper.temp_dir() as tmp: + tmp2 = os.path.join(tmp, "DLLs") + os.mkdir(tmp2) + + pyexe = os.path.join(tmp, os.path.basename(sys.executable)) + shutil.copy(sys.executable, pyexe) + shutil.copy(dllname, tmp) + for f in glob.glob(os.path.join(glob.escape(sys.prefix), "vcruntime*.dll")): + shutil.copy(f, tmp) + + shutil.copy(pydname, tmp2) + + env = None + env = {k.upper(): os.environ[k] for k in os.environ} + env["PYTHONPATH"] = tmp2 + ";" + STDLIB_DIR + + # Test 1: import with added DLL directory + subprocess.check_call([ + pyexe, "-Sc", ";".join([ + "import os", + "p = os.add_dll_directory({!r})".format( + os.path.dirname(depname)), + "import _sqlite3", + "p.close" + ])], + stderr=subprocess.STDOUT, + env=env, + cwd=os.path.dirname(pyexe)) + + # Test 2: import with DLL adjacent to PYD + shutil.copy(depname, tmp2) + subprocess.check_call([pyexe, "-Sc", "import _sqlite3"], + stderr=subprocess.STDOUT, + env=env, + cwd=os.path.dirname(pyexe)) + + def test_issue105979(self): + # this used to crash + with self.assertRaises(ImportError) as cm: + _imp.get_frozen_object("x", b"6\'\xd5Cu\x12") + self.assertIn("Frozen object named 'x' is invalid", + str(cm.exception)) + + def test_frozen_module_from_import_error(self): + with self.assertRaises(ImportError) as cm: + from os import this_will_never_exist + self.assertIn( + f"cannot import name 'this_will_never_exist' from 'os' ({os.__file__})", + str(cm.exception), + ) + with self.assertRaises(ImportError) as cm: + from sys import this_will_never_exist + self.assertIn( + "cannot import name 'this_will_never_exist' from 'sys' (unknown location)", + str(cm.exception), + ) + + scripts = [ + """ +import os +os.__spec__.has_location = False +os.__file__ = [] +from os import this_will_never_exist +""", + """ +import os +os.__spec__.has_location = False +del os.__file__ +from os import this_will_never_exist +""", + """ +import os +os.__spec__.origin = [] +os.__file__ = [] +from os import this_will_never_exist +""" + ] + for script in scripts: + with self.subTest(script=script): + expected_error = ( + b"cannot import name 'this_will_never_exist' " + b"from 'os' (unknown location)" + ) + popen = script_helper.spawn_python("-c", script) + stdout, stderr = popen.communicate() + self.assertIn(expected_error, stdout) + + def test_non_module_from_import_error(self): + prefix = """ +import sys +class NotAModule: ... +nm = NotAModule() +nm.symbol = 123 +sys.modules["not_a_module"] = nm +from not_a_module import symbol +""" + scripts = [ + prefix + "from not_a_module import missing_symbol", + prefix + "nm.__spec__ = []\nfrom not_a_module import missing_symbol", + ] + for script in scripts: + with self.subTest(script=script): + expected_error = ( + b"ImportError: cannot import name 'missing_symbol' from " + b"'' (unknown location)" + ) + popen = script_helper.spawn_python("-c", script) + stdout, stderr = popen.communicate() + self.assertIn(expected_error, stdout) + + def test_script_shadowing_stdlib(self): + script_errors = [ + ( + "import fractions\nfractions.Fraction", + rb"AttributeError: module 'fractions' has no attribute 'Fraction'" + ), + ( + "from fractions import Fraction", + rb"ImportError: cannot import name 'Fraction' from 'fractions'" + ) + ] + for script, error in script_errors: + with self.subTest(script=script), os_helper.temp_dir() as tmp: + with open(os.path.join(tmp, "fractions.py"), "w", encoding='utf-8') as f: + f.write(script) + + expected_error = error + ( + rb" \(consider renaming '.*fractions.py' since it has the " + rb"same name as the standard library module named 'fractions' " + rb"and prevents importing that standard library module\)" + ) + + popen = script_helper.spawn_python(os.path.join(tmp, "fractions.py"), cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + popen = script_helper.spawn_python('-m', 'fractions', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + popen = script_helper.spawn_python('-c', 'import fractions', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + # and there's no error at all when using -P + popen = script_helper.spawn_python('-P', 'fractions.py', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertEqual(stdout, b'') + + tmp_child = os.path.join(tmp, "child") + os.mkdir(tmp_child) + + # test the logic with different cwd + popen = script_helper.spawn_python(os.path.join(tmp, "fractions.py"), cwd=tmp_child) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + popen = script_helper.spawn_python('-m', 'fractions', cwd=tmp_child) + stdout, stderr = popen.communicate() + self.assertEqual(stdout, b'') # no error + + popen = script_helper.spawn_python('-c', 'import fractions', cwd=tmp_child) + stdout, stderr = popen.communicate() + self.assertEqual(stdout, b'') # no error + + def test_package_shadowing_stdlib_module(self): + script_errors = [ + ( + "fractions.Fraction", + rb"AttributeError: module 'fractions' has no attribute 'Fraction'" + ), + ( + "from fractions import Fraction", + rb"ImportError: cannot import name 'Fraction' from 'fractions'" + ) + ] + for script, error in script_errors: + with self.subTest(script=script), os_helper.temp_dir() as tmp: + os.mkdir(os.path.join(tmp, "fractions")) + with open( + os.path.join(tmp, "fractions", "__init__.py"), "w", encoding='utf-8' + ) as f: + f.write("shadowing_module = True") + with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: + f.write("import fractions; fractions.shadowing_module\n") + f.write(script) + + expected_error = error + ( + rb" \(consider renaming '.*[\\/]fractions[\\/]+__init__.py' since it has the " + rb"same name as the standard library module named 'fractions' " + rb"and prevents importing that standard library module\)" + ) + + popen = script_helper.spawn_python(os.path.join(tmp, "main.py"), cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + popen = script_helper.spawn_python('-m', 'main', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + # and there's no shadowing at all when using -P + popen = script_helper.spawn_python('-P', 'main.py', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, b"module 'fractions' has no attribute 'shadowing_module'") + + def test_script_shadowing_third_party(self): + script_errors = [ + ( + "import numpy\nnumpy.array", + rb"AttributeError: module 'numpy' has no attribute 'array'" + ), + ( + "from numpy import array", + rb"ImportError: cannot import name 'array' from 'numpy'" + ) + ] + for script, error in script_errors: + with self.subTest(script=script), os_helper.temp_dir() as tmp: + with open(os.path.join(tmp, "numpy.py"), "w", encoding='utf-8') as f: + f.write(script) + + expected_error = error + ( + rb" \(consider renaming '.*numpy.py' if it has the " + rb"same name as a library you intended to import\)\s+\z" + ) + + popen = script_helper.spawn_python(os.path.join(tmp, "numpy.py")) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + popen = script_helper.spawn_python('-m', 'numpy', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + popen = script_helper.spawn_python('-c', 'import numpy', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + def test_script_maybe_not_shadowing_third_party(self): + with os_helper.temp_dir() as tmp: + with open(os.path.join(tmp, "numpy.py"), "w", encoding='utf-8') as f: + f.write("this_script_does_not_attempt_to_import_numpy = True") + + expected_error = ( + rb"AttributeError: module 'numpy' has no attribute 'attr'\s+\z" + ) + popen = script_helper.spawn_python('-c', 'import numpy; numpy.attr', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + expected_error = ( + rb"ImportError: cannot import name 'attr' from 'numpy' \(.*\)\s+\z" + ) + popen = script_helper.spawn_python('-c', 'from numpy import attr', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + def test_script_shadowing_stdlib_edge_cases(self): + with os_helper.temp_dir() as tmp: + with open(os.path.join(tmp, "fractions.py"), "w", encoding='utf-8') as f: + f.write("shadowing_module = True") + + # Unhashable str subclass + with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: + f.write(""" +import fractions +fractions.shadowing_module +class substr(str): + __hash__ = None +fractions.__name__ = substr('fractions') +try: + fractions.Fraction +except TypeError as e: + print(str(e)) +""") + popen = script_helper.spawn_python("main.py", cwd=tmp) + stdout, stderr = popen.communicate() + self.assertIn(b"unhashable type: 'substr'", stdout.rstrip()) + + with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: + f.write(""" +import fractions +fractions.shadowing_module +class substr(str): + __hash__ = None +fractions.__name__ = substr('fractions') +try: + from fractions import Fraction +except TypeError as e: + print(str(e)) +""") + + popen = script_helper.spawn_python("main.py", cwd=tmp) + stdout, stderr = popen.communicate() + self.assertIn(b"unhashable type: 'substr'", stdout.rstrip()) + + # Various issues with sys module + with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: + f.write(""" +import fractions +fractions.shadowing_module + +import sys +sys.stdlib_module_names = None +try: + fractions.Fraction +except AttributeError as e: + print(str(e)) + +del sys.stdlib_module_names +try: + fractions.Fraction +except AttributeError as e: + print(str(e)) + +sys.path = [0] +try: + fractions.Fraction +except AttributeError as e: + print(str(e)) +""") + popen = script_helper.spawn_python("main.py", cwd=tmp) + stdout, stderr = popen.communicate() + lines = stdout.splitlines() + self.assertEqual(len(lines), 3) + for line in lines: + self.assertEqual(line, b"module 'fractions' has no attribute 'Fraction'") + + with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: + f.write(""" +import fractions +fractions.shadowing_module + +import sys +sys.stdlib_module_names = None +try: + from fractions import Fraction +except ImportError as e: + print(str(e)) + +del sys.stdlib_module_names +try: + from fractions import Fraction +except ImportError as e: + print(str(e)) + +sys.path = [0] +try: + from fractions import Fraction +except ImportError as e: + print(str(e)) +""") + popen = script_helper.spawn_python("main.py", cwd=tmp) + stdout, stderr = popen.communicate() + lines = stdout.splitlines() + self.assertEqual(len(lines), 3) + for line in lines: + self.assertRegex(line, rb"cannot import name 'Fraction' from 'fractions' \(.*\)") + + # Various issues with origin + with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: + f.write(""" +import fractions +fractions.shadowing_module +del fractions.__spec__.origin +try: + fractions.Fraction +except AttributeError as e: + print(str(e)) + +fractions.__spec__.origin = [] +try: + fractions.Fraction +except AttributeError as e: + print(str(e)) +""") + + popen = script_helper.spawn_python("main.py", cwd=tmp) + stdout, stderr = popen.communicate() + lines = stdout.splitlines() + self.assertEqual(len(lines), 2) + for line in lines: + self.assertEqual(line, b"module 'fractions' has no attribute 'Fraction'") + + with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: + f.write(""" +import fractions +fractions.shadowing_module +del fractions.__spec__.origin +try: + from fractions import Fraction +except ImportError as e: + print(str(e)) + +fractions.__spec__.origin = [] +try: + from fractions import Fraction +except ImportError as e: + print(str(e)) +""") + popen = script_helper.spawn_python("main.py", cwd=tmp) + stdout, stderr = popen.communicate() + lines = stdout.splitlines() + self.assertEqual(len(lines), 2) + for line in lines: + self.assertRegex(line, rb"cannot import name 'Fraction' from 'fractions' \(.*\)") + + @unittest.skipIf(sys.platform == 'win32', 'Cannot delete cwd on Windows') + @unittest.skipIf(sys.platform == 'sunos5', 'Cannot delete cwd on Solaris/Illumos') + @unittest.skipIf(sys.platform.startswith('aix'), 'Cannot delete cwd on AIX') + def test_script_shadowing_stdlib_cwd_failure(self): + with os_helper.temp_dir() as tmp: + subtmp = os.path.join(tmp, "subtmp") + os.mkdir(subtmp) + with open(os.path.join(subtmp, "main.py"), "w", encoding='utf-8') as f: + f.write(f""" +import sys +assert sys.path[0] == '' + +import os +import shutil +shutil.rmtree(os.getcwd()) + +os.does_not_exist +""") + # Use -c to ensure sys.path[0] is "" + popen = script_helper.spawn_python("-c", "import main", cwd=subtmp) + stdout, stderr = popen.communicate() + expected_error = rb"AttributeError: module 'os' has no attribute 'does_not_exist'" + self.assertRegex(stdout, expected_error) + + def test_script_shadowing_stdlib_sys_path_modification(self): + script_errors = [ + ( + "import fractions\nfractions.Fraction", + rb"AttributeError: module 'fractions' has no attribute 'Fraction'" + ), + ( + "from fractions import Fraction", + rb"ImportError: cannot import name 'Fraction' from 'fractions'" + ) + ] + for script, error in script_errors: + with self.subTest(script=script), os_helper.temp_dir() as tmp: + with open(os.path.join(tmp, "fractions.py"), "w", encoding='utf-8') as f: + f.write("shadowing_module = True") + with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: + f.write('import sys; sys.path.insert(0, "this_folder_does_not_exist")\n') + f.write(script) + expected_error = error + ( + rb" \(consider renaming '.*fractions.py' since it has the " + rb"same name as the standard library module named 'fractions' " + rb"and prevents importing that standard library module\)" + ) + + popen = script_helper.spawn_python("main.py", cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + + def test_create_dynamic_null(self): + with self.assertRaisesRegex(ValueError, 'embedded null character'): + class Spec: + name = "a\x00b" + origin = "abc" + _imp.create_dynamic(Spec()) + + with self.assertRaisesRegex(ValueError, 'embedded null character'): + class Spec2: + name = "abc" + origin = "a\x00b" + _imp.create_dynamic(Spec2()) + + +@skip_if_dont_write_bytecode +class FilePermissionTests(unittest.TestCase): + # tests for file mode on cached .pyc files + + @unittest.skipUnless(os.name == 'posix', + "test meaningful only on posix systems") + @unittest.skipIf( + is_wasm32, + "Emscripten's/WASI's umask is a stub." + ) + def test_creation_mode(self): + mask = 0o022 + with temp_umask(mask), ready_to_import() as (name, path): + cached_path = importlib.util.cache_from_source(path) + module = __import__(name) + if not os.path.exists(cached_path): + self.fail("__import__ did not result in creation of " + "a .pyc file") + stat_info = os.stat(cached_path) + + # Check that the umask is respected, and the executable bits + # aren't set. + self.assertEqual(oct(stat.S_IMODE(stat_info.st_mode)), + oct(0o666 & ~mask)) + + @unittest.skipUnless(os.name == 'posix', + "test meaningful only on posix systems") + @os_helper.skip_unless_working_chmod + def test_cached_mode_issue_2051(self): + # permissions of .pyc should match those of .py, regardless of mask + mode = 0o600 + with temp_umask(0o022), ready_to_import() as (name, path): + cached_path = importlib.util.cache_from_source(path) + os.chmod(path, mode) + __import__(name) + if not os.path.exists(cached_path): + self.fail("__import__ did not result in creation of " + "a .pyc file") + stat_info = os.stat(cached_path) + + self.assertEqual(oct(stat.S_IMODE(stat_info.st_mode)), oct(mode)) + + @unittest.skipUnless(os.name == 'posix', + "test meaningful only on posix systems") + @os_helper.skip_unless_working_chmod + def test_cached_readonly(self): + mode = 0o400 + with temp_umask(0o022), ready_to_import() as (name, path): + cached_path = importlib.util.cache_from_source(path) + os.chmod(path, mode) + __import__(name) + if not os.path.exists(cached_path): + self.fail("__import__ did not result in creation of " + "a .pyc file") + stat_info = os.stat(cached_path) + + expected = mode | 0o200 # Account for fix for issue #6074 + self.assertEqual(oct(stat.S_IMODE(stat_info.st_mode)), oct(expected)) + + def test_pyc_always_writable(self): + # Initially read-only .pyc files on Windows used to cause problems + # with later updates, see issue #6074 for details + with ready_to_import() as (name, path): + # Write a Python file, make it read-only and import it + with open(path, 'w', encoding='utf-8') as f: + f.write("x = 'original'\n") + # Tweak the mtime of the source to ensure pyc gets updated later + s = os.stat(path) + os.utime(path, (s.st_atime, s.st_mtime-100000000)) + os.chmod(path, 0o400) + m = __import__(name) + self.assertEqual(m.x, 'original') + # Change the file and then reimport it + os.chmod(path, 0o600) + with open(path, 'w', encoding='utf-8') as f: + f.write("x = 'rewritten'\n") + unload(name) + importlib.invalidate_caches() + m = __import__(name) + self.assertEqual(m.x, 'rewritten') + # Now delete the source file and check the pyc was rewritten + unlink(path) + unload(name) + importlib.invalidate_caches() + bytecode_only = path + "c" + os.rename(importlib.util.cache_from_source(path), bytecode_only) + m = __import__(name) + self.assertEqual(m.x, 'rewritten') + + +class PycRewritingTests(unittest.TestCase): + # Test that the `co_filename` attribute on code objects always points + # to the right file, even when various things happen (e.g. both the .py + # and the .pyc file are renamed). + + module_name = "unlikely_module_name" + module_source = """ +import sys +code_filename = sys._getframe().f_code.co_filename +module_filename = __file__ +constant = 1000 +def func(): + pass +func_filename = func.__code__.co_filename +""" + dir_name = os.path.abspath(TESTFN) + file_name = os.path.join(dir_name, module_name) + os.extsep + "py" + compiled_name = importlib.util.cache_from_source(file_name) + + def setUp(self): + self.sys_path = sys.path[:] + self.orig_module = sys.modules.pop(self.module_name, None) + os.mkdir(self.dir_name) + with open(self.file_name, "w", encoding='utf-8') as f: + f.write(self.module_source) + sys.path.insert(0, self.dir_name) + importlib.invalidate_caches() + + def tearDown(self): + sys.path[:] = self.sys_path + if self.orig_module is not None: + sys.modules[self.module_name] = self.orig_module + else: + unload(self.module_name) + unlink(self.file_name) + unlink(self.compiled_name) + rmtree(self.dir_name) + + def import_module(self): + ns = globals() + __import__(self.module_name, ns, ns) + return sys.modules[self.module_name] + + def test_basics(self): + mod = self.import_module() + self.assertEqual(mod.module_filename, self.file_name) + self.assertEqual(mod.code_filename, self.file_name) + self.assertEqual(mod.func_filename, self.file_name) + del sys.modules[self.module_name] + mod = self.import_module() + self.assertEqual(mod.module_filename, self.file_name) + self.assertEqual(mod.code_filename, self.file_name) + self.assertEqual(mod.func_filename, self.file_name) + + def test_incorrect_code_name(self): + py_compile.compile(self.file_name, dfile="another_module.py") + mod = self.import_module() + self.assertEqual(mod.module_filename, self.file_name) + self.assertEqual(mod.code_filename, self.file_name) + self.assertEqual(mod.func_filename, self.file_name) + + def test_module_without_source(self): + target = "another_module.py" + py_compile.compile(self.file_name, dfile=target) + os.remove(self.file_name) + pyc_file = make_legacy_pyc(self.file_name) + importlib.invalidate_caches() + mod = self.import_module() + self.assertEqual(mod.module_filename, pyc_file) + self.assertEqual(mod.code_filename, target) + self.assertEqual(mod.func_filename, target) + + def test_foreign_code(self): + py_compile.compile(self.file_name) + with open(self.compiled_name, "rb") as f: + header = f.read(16) + code = marshal.load(f) + constants = list(code.co_consts) + foreign_code = importlib.import_module.__code__ + pos = constants.index(1000) + constants[pos] = foreign_code + code = code.replace(co_consts=tuple(constants)) + with open(self.compiled_name, "wb") as f: + f.write(header) + marshal.dump(code, f) + mod = self.import_module() + self.assertEqual(mod.constant.co_filename, foreign_code.co_filename) + + +class PathsTests(unittest.TestCase): + SAMPLES = ('test', 'test\u00e4\u00f6\u00fc\u00df', 'test\u00e9\u00e8', + 'test\u00b0\u00b3\u00b2') + path = TESTFN + + def setUp(self): + os.mkdir(self.path) + self.syspath = sys.path[:] + + def tearDown(self): + rmtree(self.path) + sys.path[:] = self.syspath + + # Regression test for http://bugs.python.org/issue1293. + def test_trailing_slash(self): + with open(os.path.join(self.path, 'test_trailing_slash.py'), + 'w', encoding='utf-8') as f: + f.write("testdata = 'test_trailing_slash'") + sys.path.append(self.path+'/') + mod = __import__("test_trailing_slash") + self.assertEqual(mod.testdata, 'test_trailing_slash') + unload("test_trailing_slash") + + # Regression test for http://bugs.python.org/issue3677. + @unittest.skipUnless(sys.platform == 'win32', 'Windows-specific') + def test_UNC_path(self): + with open(os.path.join(self.path, 'test_unc_path.py'), 'w') as f: + f.write("testdata = 'test_unc_path'") + importlib.invalidate_caches() + # Create the UNC path, like \\myhost\c$\foo\bar. + path = os.path.abspath(self.path) + import socket + hn = socket.gethostname() + drive = path[0] + unc = "\\\\%s\\%s$"%(hn, drive) + unc += path[2:] + try: + os.listdir(unc) + except OSError as e: + if e.errno in (errno.EPERM, errno.EACCES, errno.ENOENT): + # See issue #15338 + self.skipTest("cannot access administrative share %r" % (unc,)) + raise + sys.path.insert(0, unc) + try: + mod = __import__("test_unc_path") + except ImportError as e: + self.fail("could not import 'test_unc_path' from %r: %r" + % (unc, e)) + self.assertEqual(mod.testdata, 'test_unc_path') + self.assertStartsWith(mod.__file__, unc) + unload("test_unc_path") + + +class RelativeImportTests(unittest.TestCase): + + def tearDown(self): + unload("test.relimport") + setUp = tearDown + + def test_relimport_star(self): + # This will import * from .test_import. + from .. import relimport + self.assertHasAttr(relimport, "RelativeImportTests") + + def test_issue3221(self): + # Note for mergers: the 'absolute' tests from the 2.x branch + # are missing in Py3k because implicit relative imports are + # a thing of the past + # + # Regression test for http://bugs.python.org/issue3221. + def check_relative(): + exec("from . import relimport", ns) + + # Check relative import OK with __package__ and __name__ correct + ns = dict(__package__='test', __name__='test.notarealmodule') + check_relative() + + # Check relative import OK with only __name__ wrong + ns = dict(__package__='test', __name__='notarealpkg.notarealmodule') + check_relative() + + # Check relative import fails with only __package__ wrong + ns = dict(__package__='foo', __name__='test.notarealmodule') + self.assertRaises(ModuleNotFoundError, check_relative) + + # Check relative import fails with __package__ and __name__ wrong + ns = dict(__package__='foo', __name__='notarealpkg.notarealmodule') + self.assertRaises(ModuleNotFoundError, check_relative) + + # Check relative import fails with package set to a non-string + ns = dict(__package__=object()) + self.assertRaises(TypeError, check_relative) + + def test_parentless_import_shadowed_by_global(self): + # Test as if this were done from the REPL where this error most commonly occurs (bpo-37409). + script_helper.assert_python_failure('-W', 'ignore', '-c', + "foo = 1; from . import foo") + + def test_absolute_import_without_future(self): + # If explicit relative import syntax is used, then do not try + # to perform an absolute import in the face of failure. + # Issue #7902. + with self.assertRaises(ImportError): + from .os import sep + self.fail("explicit relative import triggered an " + "implicit absolute import") + + def test_import_from_non_package(self): + path = os.path.join(os.path.dirname(__file__), 'data', 'package2') + with uncache('submodule1', 'submodule2'), DirsOnSysPath(path): + with self.assertRaises(ImportError): + import submodule1 + self.assertNotIn('submodule1', sys.modules) + self.assertNotIn('submodule2', sys.modules) + + def test_import_from_unloaded_package(self): + with uncache('package2', 'package2.submodule1', 'package2.submodule2'), \ + DirsOnSysPath(os.path.join(os.path.dirname(__file__), 'data')): + import package2.submodule1 + package2.submodule1.submodule2 + + def test_rebinding(self): + # The same data is also used for testing pkgutil.resolve_name() + # in test_pkgutil and mock.patch in test_unittest. + path = os.path.join(os.path.dirname(__file__), 'data') + with uncache('package3', 'package3.submodule'), DirsOnSysPath(path): + from package3 import submodule + self.assertEqual(submodule.attr, 'rebound') + import package3.submodule as submodule + self.assertEqual(submodule.attr, 'rebound') + with uncache('package3', 'package3.submodule'), DirsOnSysPath(path): + import package3.submodule as submodule + self.assertEqual(submodule.attr, 'rebound') + from package3 import submodule + self.assertEqual(submodule.attr, 'rebound') + + def test_rebinding2(self): + path = os.path.join(os.path.dirname(__file__), 'data') + with uncache('package4', 'package4.submodule'), DirsOnSysPath(path): + import package4.submodule as submodule + self.assertEqual(submodule.attr, 'submodule') + from package4 import submodule + self.assertEqual(submodule.attr, 'submodule') + with uncache('package4', 'package4.submodule'), DirsOnSysPath(path): + from package4 import submodule + self.assertEqual(submodule.attr, 'origin') + import package4.submodule as submodule + self.assertEqual(submodule.attr, 'submodule') + + +class OverridingImportBuiltinTests(unittest.TestCase): + def test_override_builtin(self): + # Test that overriding builtins.__import__ can bypass sys.modules. + import os + + def foo(): + import os + return os + self.assertEqual(foo(), os) # Quick sanity check. + + with swap_attr(builtins, "__import__", lambda *x: 5): + self.assertEqual(foo(), 5) + + # Test what happens when we shadow __import__ in globals(); this + # currently does not impact the import process, but if this changes, + # other code will need to change, so keep this test as a tripwire. + with swap_item(globals(), "__import__", lambda *x: 5): + self.assertEqual(foo(), os) + + +class PycacheTests(unittest.TestCase): + # Test the various PEP 3147/488-related behaviors. + + def _clean(self): + forget(TESTFN) + rmtree('__pycache__') + unlink(self.source) + + def setUp(self): + self.source = TESTFN + '.py' + self._clean() + with open(self.source, 'w', encoding='utf-8') as fp: + print('# This is a test file written by test_import.py', file=fp) + sys.path.insert(0, os.curdir) + importlib.invalidate_caches() + + def tearDown(self): + assert sys.path[0] == os.curdir, 'Unexpected sys.path[0]' + del sys.path[0] + self._clean() + + @skip_if_dont_write_bytecode + def test_import_pyc_path(self): + self.assertFalse(os.path.exists('__pycache__')) + __import__(TESTFN) + self.assertTrue(os.path.exists('__pycache__')) + pyc_path = importlib.util.cache_from_source(self.source) + self.assertTrue(os.path.exists(pyc_path), + 'bytecode file {!r} for {!r} does not ' + 'exist'.format(pyc_path, TESTFN)) + + @unittest.skipUnless(os.name == 'posix', + "test meaningful only on posix systems") + @skip_if_dont_write_bytecode + @os_helper.skip_unless_working_chmod + @os_helper.skip_if_dac_override + @unittest.skipIf(is_emscripten, "umask is a stub") + def test_unwritable_directory(self): + # When the umask causes the new __pycache__ directory to be + # unwritable, the import still succeeds but no .pyc file is written. + with temp_umask(0o222): + __import__(TESTFN) + self.assertTrue(os.path.exists('__pycache__')) + pyc_path = importlib.util.cache_from_source(self.source) + self.assertFalse(os.path.exists(pyc_path), + 'bytecode file {!r} for {!r} ' + 'exists'.format(pyc_path, TESTFN)) + + @skip_if_dont_write_bytecode + def test_missing_source(self): + # With PEP 3147 cache layout, removing the source but leaving the pyc + # file does not satisfy the import. + __import__(TESTFN) + pyc_file = importlib.util.cache_from_source(self.source) + self.assertTrue(os.path.exists(pyc_file)) + os.remove(self.source) + forget(TESTFN) + importlib.invalidate_caches() + self.assertRaises(ImportError, __import__, TESTFN) + + @skip_if_dont_write_bytecode + def test_missing_source_legacy(self): + # Like test_missing_source() except that for backward compatibility, + # when the pyc file lives where the py file would have been (and named + # without the tag), it is importable. The __file__ of the imported + # module is the pyc location. + __import__(TESTFN) + # pyc_file gets removed in _clean() via tearDown(). + pyc_file = make_legacy_pyc(self.source) + os.remove(self.source) + unload(TESTFN) + importlib.invalidate_caches() + m = __import__(TESTFN) + try: + self.assertEqual(m.__file__, + os.path.join(os.getcwd(), os.path.relpath(pyc_file))) + finally: + os.remove(pyc_file) + + def test___cached__(self): + # Modules now also have an __cached__ that points to the pyc file. + m = __import__(TESTFN) + pyc_file = importlib.util.cache_from_source(TESTFN + '.py') + self.assertEqual(m.__cached__, os.path.join(os.getcwd(), pyc_file)) + + @skip_if_dont_write_bytecode + def test___cached___legacy_pyc(self): + # Like test___cached__() except that for backward compatibility, + # when the pyc file lives where the py file would have been (and named + # without the tag), it is importable. The __cached__ of the imported + # module is the pyc location. + __import__(TESTFN) + # pyc_file gets removed in _clean() via tearDown(). + pyc_file = make_legacy_pyc(self.source) + os.remove(self.source) + unload(TESTFN) + importlib.invalidate_caches() + m = __import__(TESTFN) + self.assertEqual(m.__cached__, + os.path.join(os.getcwd(), os.path.relpath(pyc_file))) + + @skip_if_dont_write_bytecode + def test_package___cached__(self): + # Like test___cached__ but for packages. + def cleanup(): + rmtree('pep3147') + unload('pep3147.foo') + unload('pep3147') + os.mkdir('pep3147') + self.addCleanup(cleanup) + # Touch the __init__.py + with open(os.path.join('pep3147', '__init__.py'), 'wb'): + pass + with open(os.path.join('pep3147', 'foo.py'), 'wb'): + pass + importlib.invalidate_caches() + m = __import__('pep3147.foo') + init_pyc = importlib.util.cache_from_source( + os.path.join('pep3147', '__init__.py')) + self.assertEqual(m.__cached__, os.path.join(os.getcwd(), init_pyc)) + foo_pyc = importlib.util.cache_from_source(os.path.join('pep3147', 'foo.py')) + self.assertEqual(sys.modules['pep3147.foo'].__cached__, + os.path.join(os.getcwd(), foo_pyc)) + + def test_package___cached___from_pyc(self): + # Like test___cached__ but ensuring __cached__ when imported from a + # PEP 3147 pyc file. + def cleanup(): + rmtree('pep3147') + unload('pep3147.foo') + unload('pep3147') + os.mkdir('pep3147') + self.addCleanup(cleanup) + # Touch the __init__.py + with open(os.path.join('pep3147', '__init__.py'), 'wb'): + pass + with open(os.path.join('pep3147', 'foo.py'), 'wb'): + pass + importlib.invalidate_caches() + m = __import__('pep3147.foo') + unload('pep3147.foo') + unload('pep3147') + importlib.invalidate_caches() + m = __import__('pep3147.foo') + init_pyc = importlib.util.cache_from_source( + os.path.join('pep3147', '__init__.py')) + self.assertEqual(m.__cached__, os.path.join(os.getcwd(), init_pyc)) + foo_pyc = importlib.util.cache_from_source(os.path.join('pep3147', 'foo.py')) + self.assertEqual(sys.modules['pep3147.foo'].__cached__, + os.path.join(os.getcwd(), foo_pyc)) + + def test_recompute_pyc_same_second(self): + # Even when the source file doesn't change timestamp, a change in + # source size is enough to trigger recomputation of the pyc file. + __import__(TESTFN) + unload(TESTFN) + with open(self.source, 'a', encoding='utf-8') as fp: + print("x = 5", file=fp) + m = __import__(TESTFN) + self.assertEqual(m.x, 5) + + +class TestSymbolicallyLinkedPackage(unittest.TestCase): + package_name = 'sample' + tagged = package_name + '-tagged' + + def setUp(self): + os_helper.rmtree(self.tagged) + os_helper.rmtree(self.package_name) + self.orig_sys_path = sys.path[:] + + # create a sample package; imagine you have a package with a tag and + # you want to symbolically link it from its untagged name. + os.mkdir(self.tagged) + self.addCleanup(os_helper.rmtree, self.tagged) + init_file = os.path.join(self.tagged, '__init__.py') + os_helper.create_empty_file(init_file) + assert os.path.exists(init_file) + + # now create a symlink to the tagged package + # sample -> sample-tagged + os.symlink(self.tagged, self.package_name, target_is_directory=True) + self.addCleanup(os_helper.unlink, self.package_name) + importlib.invalidate_caches() + + self.assertEqual(os.path.isdir(self.package_name), True) + + assert os.path.isfile(os.path.join(self.package_name, '__init__.py')) + + def tearDown(self): + sys.path[:] = self.orig_sys_path + + # regression test for issue6727 + @unittest.skipUnless( + not hasattr(sys, 'getwindowsversion') + or sys.getwindowsversion() >= (6, 0), + "Windows Vista or later required") + @os_helper.skip_unless_symlink + def test_symlinked_dir_importable(self): + # make sure sample can only be imported from the current directory. + sys.path[:] = ['.'] + assert os.path.exists(self.package_name) + assert os.path.exists(os.path.join(self.package_name, '__init__.py')) + + # Try to import the package + importlib.import_module(self.package_name) + + +@cpython_only +class ImportlibBootstrapTests(unittest.TestCase): + # These tests check that importlib is bootstrapped. + + def test_frozen_importlib(self): + mod = sys.modules['_frozen_importlib'] + self.assertTrue(mod) + + def test_frozen_importlib_is_bootstrap(self): + from importlib import _bootstrap + mod = sys.modules['_frozen_importlib'] + self.assertIs(mod, _bootstrap) + self.assertEqual(mod.__name__, 'importlib._bootstrap') + self.assertEqual(mod.__package__, 'importlib') + self.assertEndsWith(mod.__file__, '_bootstrap.py') + + def test_frozen_importlib_external_is_bootstrap_external(self): + from importlib import _bootstrap_external + mod = sys.modules['_frozen_importlib_external'] + self.assertIs(mod, _bootstrap_external) + self.assertEqual(mod.__name__, 'importlib._bootstrap_external') + self.assertEqual(mod.__package__, 'importlib') + self.assertEndsWith(mod.__file__, '_bootstrap_external.py') + + def test_there_can_be_only_one(self): + # Issue #15386 revealed a tricky loophole in the bootstrapping + # This test is technically redundant, since the bug caused importing + # this test module to crash completely, but it helps prove the point + from importlib import machinery + mod = sys.modules['_frozen_importlib'] + self.assertIs(machinery.ModuleSpec, mod.ModuleSpec) + + +@cpython_only +class GetSourcefileTests(unittest.TestCase): + + """Test importlib._bootstrap_external._get_sourcefile() as used by the C API. + + Because of the peculiarities of the need of this function, the tests are + knowingly whitebox tests. + + """ + + def test_get_sourcefile(self): + # Given a valid bytecode path, return the path to the corresponding + # source file if it exists. + with mock.patch('importlib._bootstrap_external._path_isfile') as _path_isfile: + _path_isfile.return_value = True + path = TESTFN + '.pyc' + expect = TESTFN + '.py' + self.assertEqual(_get_sourcefile(path), expect) + + def test_get_sourcefile_no_source(self): + # Given a valid bytecode path without a corresponding source path, + # return the original bytecode path. + with mock.patch('importlib._bootstrap_external._path_isfile') as _path_isfile: + _path_isfile.return_value = False + path = TESTFN + '.pyc' + self.assertEqual(_get_sourcefile(path), path) + + def test_get_sourcefile_bad_ext(self): + # Given a path with an invalid bytecode extension, return the + # bytecode path passed as the argument. + path = TESTFN + '.bad_ext' + self.assertEqual(_get_sourcefile(path), path) + + +class ImportTracebackTests(unittest.TestCase): + + def setUp(self): + os.mkdir(TESTFN) + self.old_path = sys.path[:] + sys.path.insert(0, TESTFN) + + def tearDown(self): + sys.path[:] = self.old_path + rmtree(TESTFN) + + def create_module(self, mod, contents, ext=".py"): + fname = os.path.join(TESTFN, mod + ext) + with open(fname, "w", encoding='utf-8') as f: + f.write(contents) + self.addCleanup(unload, mod) + importlib.invalidate_caches() + return fname + + def assert_traceback(self, tb, files): + deduped_files = [] + while tb: + code = tb.tb_frame.f_code + fn = code.co_filename + if not deduped_files or fn != deduped_files[-1]: + deduped_files.append(fn) + tb = tb.tb_next + self.assertEqual(len(deduped_files), len(files), deduped_files) + for fn, pat in zip(deduped_files, files): + self.assertIn(pat, fn) + + def test_nonexistent_module(self): + try: + # assertRaises() clears __traceback__ + import nonexistent_xyzzy + except ImportError as e: + tb = e.__traceback__ + else: + self.fail("ImportError should have been raised") + self.assert_traceback(tb, [__file__]) + + def test_nonexistent_module_nested(self): + self.create_module("foo", "import nonexistent_xyzzy") + try: + import foo + except ImportError as e: + tb = e.__traceback__ + else: + self.fail("ImportError should have been raised") + self.assert_traceback(tb, [__file__, 'foo.py']) + + def test_exec_failure(self): + self.create_module("foo", "1/0") + try: + import foo + except ZeroDivisionError as e: + tb = e.__traceback__ + else: + self.fail("ZeroDivisionError should have been raised") + self.assert_traceback(tb, [__file__, 'foo.py']) + + def test_exec_failure_nested(self): + self.create_module("foo", "import bar") + self.create_module("bar", "1/0") + try: + import foo + except ZeroDivisionError as e: + tb = e.__traceback__ + else: + self.fail("ZeroDivisionError should have been raised") + self.assert_traceback(tb, [__file__, 'foo.py', 'bar.py']) + + # A few more examples from issue #15425 + def test_syntax_error(self): + self.create_module("foo", "invalid syntax is invalid") + try: + import foo + except SyntaxError as e: + tb = e.__traceback__ + else: + self.fail("SyntaxError should have been raised") + self.assert_traceback(tb, [__file__]) + + def _setup_broken_package(self, parent, child): + pkg_name = "_parent_foo" + self.addCleanup(unload, pkg_name) + pkg_path = os.path.join(TESTFN, pkg_name) + os.mkdir(pkg_path) + # Touch the __init__.py + init_path = os.path.join(pkg_path, '__init__.py') + with open(init_path, 'w', encoding='utf-8') as f: + f.write(parent) + bar_path = os.path.join(pkg_path, 'bar.py') + with open(bar_path, 'w', encoding='utf-8') as f: + f.write(child) + importlib.invalidate_caches() + return init_path, bar_path + + def test_broken_submodule(self): + init_path, bar_path = self._setup_broken_package("", "1/0") + try: + import _parent_foo.bar + except ZeroDivisionError as e: + tb = e.__traceback__ + else: + self.fail("ZeroDivisionError should have been raised") + self.assert_traceback(tb, [__file__, bar_path]) + + def test_broken_from(self): + init_path, bar_path = self._setup_broken_package("", "1/0") + try: + from _parent_foo import bar + except ZeroDivisionError as e: + tb = e.__traceback__ + else: + self.fail("ImportError should have been raised") + self.assert_traceback(tb, [__file__, bar_path]) + + def test_broken_parent(self): + init_path, bar_path = self._setup_broken_package("1/0", "") + try: + import _parent_foo.bar + except ZeroDivisionError as e: + tb = e.__traceback__ + else: + self.fail("ZeroDivisionError should have been raised") + self.assert_traceback(tb, [__file__, init_path]) + + def test_broken_parent_from(self): + init_path, bar_path = self._setup_broken_package("1/0", "") + try: + from _parent_foo import bar + except ZeroDivisionError as e: + tb = e.__traceback__ + else: + self.fail("ZeroDivisionError should have been raised") + self.assert_traceback(tb, [__file__, init_path]) + + @cpython_only + def test_import_bug(self): + # We simulate a bug in importlib and check that it's not stripped + # away from the traceback. + self.create_module("foo", "") + importlib = sys.modules['_frozen_importlib_external'] + if 'load_module' in vars(importlib.SourceLoader): + old_exec_module = importlib.SourceLoader.exec_module + else: + old_exec_module = None + try: + def exec_module(*args): + 1/0 + importlib.SourceLoader.exec_module = exec_module + try: + import foo + except ZeroDivisionError as e: + tb = e.__traceback__ + else: + self.fail("ZeroDivisionError should have been raised") + self.assert_traceback(tb, [__file__, 'imports.modules_by_index was set for the module). + self.assertEqual(snap.lookedup_id, snap.id) + self.assertEqual(snap.cached_id, snap.id) + with self.assertRaises(AttributeError): + snap.spam + else: + self.assertIs(snap.lookedup, mod) + self.assertIs(snap.cached, mod) + + def check_direct(self, loaded): + # The module has its own PyModuleDef, with a matching name. + self.assertEqual(loaded.module.__name__, loaded.name) + self.assertIs(loaded.snapshot.lookedup, loaded.module) + + def check_indirect(self, loaded, orig): + # The module re-uses another's PyModuleDef, with a different name. + assert orig is not loaded.module + assert orig.__name__ != loaded.name + self.assertNotEqual(loaded.module.__name__, loaded.name) + self.assertIs(loaded.snapshot.lookedup, loaded.module) + + def check_basic(self, loaded, expected_init_count): + # m_size == -1 + # The module loads fresh the first time and copies m_copy after. + snap = loaded.snapshot + self.assertIsNot(snap.state_initialized, None) + self.assertIsInstance(snap.init_count, int) + self.assertGreater(snap.init_count, 0) + self.assertEqual(snap.init_count, expected_init_count) + + def check_with_reinit(self, loaded): + # m_size >= 0 + # The module loads fresh every time. + pass + + def check_fresh(self, loaded): + """ + The module had not been loaded before (at least since fully reset). + """ + snap = loaded.snapshot + # The module's init func was run. + # A copy of the module's __dict__ was stored in def->m_base.m_copy. + # The previous m_copy was deleted first. + # _PyRuntime.imports.extensions was set. + self.assertEqual(snap.init_count, 1) + # The global state was initialized. + # The module attrs were initialized from that state. + self.assertEqual(snap.module._module_initialized, + snap.state_initialized) + + def check_semi_fresh(self, loaded, base, prev): + """ + The module had been loaded before and then reset + (but the module global state wasn't). + """ + snap = loaded.snapshot + # The module's init func was run again. + # A copy of the module's __dict__ was stored in def->m_base.m_copy. + # The previous m_copy was deleted first. + # The module globals did not get reset. + self.assertNotEqual(snap.id, base.snapshot.id) + self.assertNotEqual(snap.id, prev.snapshot.id) + self.assertEqual(snap.init_count, prev.snapshot.init_count + 1) + # The global state was updated. + # The module attrs were initialized from that state. + self.assertEqual(snap.module._module_initialized, + snap.state_initialized) + self.assertNotEqual(snap.state_initialized, + base.snapshot.state_initialized) + self.assertNotEqual(snap.state_initialized, + prev.snapshot.state_initialized) + + def check_copied(self, loaded, base): + """ + The module had been loaded before and never reset. + """ + snap = loaded.snapshot + # The module's init func was not run again. + # The interpreter copied m_copy, as set by the other interpreter, + # with objects owned by the other interpreter. + # The module globals did not get reset. + self.assertNotEqual(snap.id, base.snapshot.id) + self.assertEqual(snap.init_count, base.snapshot.init_count) + # The global state was not updated since the init func did not run. + # The module attrs were not directly initialized from that state. + # The state and module attrs still match the previous loading. + self.assertEqual(snap.module._module_initialized, + snap.state_initialized) + self.assertEqual(snap.state_initialized, + base.snapshot.state_initialized) + + ######################### + # the tests + + def test_cleared_globals(self): + loaded = self.load(self.NAME) + _testsinglephase = loaded.module + init_before = _testsinglephase.state_initialized() + + _testsinglephase._clear_globals() + init_after = _testsinglephase.state_initialized() + init_count = _testsinglephase.initialized_count() + + self.assertGreater(init_before, 0) + self.assertEqual(init_after, 0) + self.assertEqual(init_count, -1) + + def test_variants(self): + # Exercise the most meaningful variants described in Python/import.c. + self.maxDiff = None + + # Check the "basic" module. + + name = self.NAME + expected_init_count = 1 + with self.subTest(name): + loaded = self.load(name) + + self.check_common(loaded) + self.check_direct(loaded) + self.check_basic(loaded, expected_init_count) + basic = loaded.module + + # Check its indirect variants. + + name = f'{self.NAME}_basic_wrapper' + self.add_module_cleanup(name) + expected_init_count += 1 + with self.subTest(name): + loaded = self.load(name) + + self.check_common(loaded) + self.check_indirect(loaded, basic) + self.check_basic(loaded, expected_init_count) + + # Currently PyState_AddModule() always replaces the cached module. + self.assertIs(basic.look_up_self(), loaded.module) + self.assertEqual(basic.initialized_count(), expected_init_count) + + # The cached module shouldn't change after this point. + basic_lookedup = loaded.module + + # Check its direct variant. + + name = f'{self.NAME}_basic_copy' + self.add_module_cleanup(name) + expected_init_count += 1 + with self.subTest(name): + loaded = self.load(name) + + self.check_common(loaded) + self.check_direct(loaded) + self.check_basic(loaded, expected_init_count) + + # This should change the cached module for _testsinglephase. + self.assertIs(basic.look_up_self(), basic_lookedup) + self.assertEqual(basic.initialized_count(), expected_init_count) + + # Check the non-basic variant that has no state. + + name = f'{self.NAME}_with_reinit' + self.add_module_cleanup(name) + with self.subTest(name): + loaded = self.load(name) + + self.check_common(loaded) + self.assertIs(loaded.snapshot.state_initialized, None) + self.check_direct(loaded) + self.check_with_reinit(loaded) + + # This should change the cached module for _testsinglephase. + self.assertIs(basic.look_up_self(), basic_lookedup) + self.assertEqual(basic.initialized_count(), expected_init_count) + + # Check the basic variant that has state. + + name = f'{self.NAME}_with_state' + self.add_module_cleanup(name) + with self.subTest(name): + loaded = self.load(name) + self.addCleanup(loaded.module._clear_module_state) + + self.check_common(loaded) + self.assertIsNot(loaded.snapshot.state_initialized, None) + self.check_direct(loaded) + self.check_with_reinit(loaded) + + # This should change the cached module for _testsinglephase. + self.assertIs(basic.look_up_self(), basic_lookedup) + self.assertEqual(basic.initialized_count(), expected_init_count) + + def test_basic_reloaded(self): + # m_copy is copied into the existing module object. + # Global state is not changed. + self.maxDiff = None + + for name in [ + self.NAME, # the "basic" module + f'{self.NAME}_basic_wrapper', # the indirect variant + f'{self.NAME}_basic_copy', # the direct variant + ]: + self.add_module_cleanup(name) + with self.subTest(name): + loaded = self.load(name) + reloaded = self.re_load(name, loaded.module) + + self.check_common(loaded) + self.check_common(reloaded) + + # Make sure the original __dict__ did not get replaced. + self.assertEqual(id(loaded.module.__dict__), + loaded.snapshot.ns_id) + self.assertEqual(loaded.snapshot.ns.__dict__, + loaded.module.__dict__) + + self.assertEqual(reloaded.module.__spec__.name, reloaded.name) + self.assertEqual(reloaded.module.__name__, + reloaded.snapshot.ns.__name__) + + self.assertIs(reloaded.module, loaded.module) + self.assertIs(reloaded.module.__dict__, loaded.module.__dict__) + # It only happens to be the same but that's good enough here. + # We really just want to verify that the re-loaded attrs + # didn't change. + self.assertIs(reloaded.snapshot.lookedup, + loaded.snapshot.lookedup) + self.assertEqual(reloaded.snapshot.state_initialized, + loaded.snapshot.state_initialized) + self.assertEqual(reloaded.snapshot.init_count, + loaded.snapshot.init_count) + + self.assertIs(reloaded.snapshot.cached, reloaded.module) + + def test_with_reinit_reloaded(self): + # The module's m_init func is run again. + self.maxDiff = None + + # Keep a reference around. + basic = self.load(self.NAME) + + for name, has_state in [ + (f'{self.NAME}_with_reinit', False), # m_size == 0 + (f'{self.NAME}_with_state', True), # m_size > 0 + ]: + self.add_module_cleanup(name) + with self.subTest(name=name, has_state=has_state): + loaded = self.load(name) + if has_state: + self.addCleanup(loaded.module._clear_module_state) + + reloaded = self.re_load(name, loaded.module) + if has_state: + self.addCleanup(reloaded.module._clear_module_state) + + self.check_common(loaded) + self.check_common(reloaded) + + # Make sure the original __dict__ did not get replaced. + self.assertEqual(id(loaded.module.__dict__), + loaded.snapshot.ns_id) + self.assertEqual(loaded.snapshot.ns.__dict__, + loaded.module.__dict__) + + self.assertEqual(reloaded.module.__spec__.name, reloaded.name) + self.assertEqual(reloaded.module.__name__, + reloaded.snapshot.ns.__name__) + + self.assertIsNot(reloaded.module, loaded.module) + self.assertNotEqual(reloaded.module.__dict__, + loaded.module.__dict__) + self.assertIs(reloaded.snapshot.lookedup, reloaded.module) + if loaded.snapshot.state_initialized is None: + self.assertIs(reloaded.snapshot.state_initialized, None) + else: + self.assertGreater(reloaded.snapshot.state_initialized, + loaded.snapshot.state_initialized) + + self.assertIs(reloaded.snapshot.cached, reloaded.module) + + @unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi") + def test_check_state_first(self): + for variant in ['', '_with_reinit', '_with_state']: + name = f'{self.NAME}{variant}_check_cache_first' + with self.subTest(name): + mod = self._load_dynamic(name, self.ORIGIN) + self.assertEqual(mod.__name__, name) + sys.modules.pop(name, None) + _testinternalcapi.clear_extension(name, self.ORIGIN) + + # Currently, for every single-phrase init module loaded + # in multiple interpreters, those interpreters share a + # PyModuleDef for that object, which can be a problem. + # Also, we test with a single-phase module that has global state, + # which is shared by all interpreters. + + @no_rerun(reason="module state is not cleared (see gh-140657)") + @requires_subinterpreters + def test_basic_multiple_interpreters_main_no_reset(self): + # without resetting; already loaded in main interpreter + + # At this point: + # * alive in 0 interpreters + # * module def may or may not be loaded already + # * module def not in _PyRuntime.imports.extensions + # * mod init func has not run yet (since reset, at least) + # * m_copy not set (hasn't been loaded yet or already cleared) + # * module's global state has not been initialized yet + # (or already cleared) + + main_loaded = self.load(self.NAME) + _testsinglephase = main_loaded.module + # Attrs set after loading are not in m_copy. + _testsinglephase.spam = 'spam, spam, spam, spam, eggs, and spam' + + self.check_common(main_loaded) + self.check_fresh(main_loaded) + + interpid1 = self.add_subinterpreter() + interpid2 = self.add_subinterpreter() + + # At this point: + # * alive in 1 interpreter (main) + # * module def in _PyRuntime.imports.extensions + # * mod init func ran for the first time (since reset, at least) + # * m_copy was copied from the main interpreter (was NULL) + # * module's global state was initialized + + # Use an interpreter that gets destroyed right away. + loaded = self.import_in_subinterp() + self.check_common(loaded) + self.check_copied(loaded, main_loaded) + + # At this point: + # * alive in 1 interpreter (main) + # * module def still in _PyRuntime.imports.extensions + # * mod init func ran again + # * m_copy is NULL (cleared when the interpreter was destroyed) + # (was from main interpreter) + # * module's global state was updated, not reset + + # Use a subinterpreter that sticks around. + loaded = self.import_in_subinterp(interpid1) + self.check_common(loaded) + self.check_copied(loaded, main_loaded) + + # At this point: + # * alive in 2 interpreters (main, interp1) + # * module def still in _PyRuntime.imports.extensions + # * mod init func ran again + # * m_copy was copied from interp1 + # * module's global state was updated, not reset + + # Use a subinterpreter while the previous one is still alive. + loaded = self.import_in_subinterp(interpid2) + self.check_common(loaded) + self.check_copied(loaded, main_loaded) + + # At this point: + # * alive in 3 interpreters (main, interp1, interp2) + # * module def still in _PyRuntime.imports.extensions + # * mod init func ran again + # * m_copy was copied from interp2 (was from interp1) + # * module's global state was updated, not reset + + @no_rerun(reason="rerun not possible; module state is never cleared (see gh-102251)") + @requires_subinterpreters + def test_basic_multiple_interpreters_deleted_no_reset(self): + # without resetting; already loaded in a deleted interpreter + + if Py_TRACE_REFS: + # It's a Py_TRACE_REFS build. + # This test breaks interpreter isolation a little, + # which causes problems on Py_TRACE_REF builds. + raise unittest.SkipTest('crashes on Py_TRACE_REFS builds') + + # At this point: + # * alive in 0 interpreters + # * module def may or may not be loaded already + # * module def not in _PyRuntime.imports.extensions + # * mod init func has not run yet (since reset, at least) + # * m_copy not set (hasn't been loaded yet or already cleared) + # * module's global state has not been initialized yet + # (or already cleared) + + interpid1 = self.add_subinterpreter() + interpid2 = self.add_subinterpreter() + + # First, load in the main interpreter but then completely clear it. + loaded_main = self.load(self.NAME) + loaded_main.module._clear_globals() + _testinternalcapi.clear_extension(self.NAME, self.ORIGIN) + + # At this point: + # * alive in 0 interpreters + # * module def loaded already + # * module def was in _PyRuntime.imports.extensions, but cleared + # * mod init func ran for the first time (since reset, at least) + # * m_copy was set, but cleared (was NULL) + # * module's global state was initialized but cleared + + # Start with an interpreter that gets destroyed right away. + base = self.import_in_subinterp( + postscript=''' + # Attrs set after loading are not in m_copy. + mod.spam = 'spam, spam, mash, spam, eggs, and spam' + ''') + self.check_common(base) + self.check_fresh(base) + + # At this point: + # * alive in 0 interpreters + # * module def in _PyRuntime.imports.extensions + # * mod init func ran for the first time (since reset) + # * m_copy is still set (owned by main interpreter) + # * module's global state was initialized, not reset + + # Use a subinterpreter that sticks around. + loaded_interp1 = self.import_in_subinterp(interpid1) + self.check_common(loaded_interp1) + self.check_copied(loaded_interp1, base) + + # At this point: + # * alive in 1 interpreter (interp1) + # * module def still in _PyRuntime.imports.extensions + # * mod init func did not run again + # * m_copy was not changed + # * module's global state was not touched + + # Use a subinterpreter while the previous one is still alive. + loaded_interp2 = self.import_in_subinterp(interpid2) + self.check_common(loaded_interp2) + self.check_copied(loaded_interp2, loaded_interp1) + + # At this point: + # * alive in 2 interpreters (interp1, interp2) + # * module def still in _PyRuntime.imports.extensions + # * mod init func did not run again + # * m_copy was not changed + # * module's global state was not touched + + @requires_subinterpreters + def test_basic_multiple_interpreters_reset_each(self): + # resetting between each interpreter + + # At this point: + # * alive in 0 interpreters + # * module def may or may not be loaded already + # * module def not in _PyRuntime.imports.extensions + # * mod init func has not run yet (since reset, at least) + # * m_copy not set (hasn't been loaded yet or already cleared) + # * module's global state has not been initialized yet + # (or already cleared) + + interpid1 = self.add_subinterpreter() + interpid2 = self.add_subinterpreter() + + # Use an interpreter that gets destroyed right away. + loaded = self.import_in_subinterp( + postscript=''' + # Attrs set after loading are not in m_copy. + mod.spam = 'spam, spam, mash, spam, eggs, and spam' + ''', + postcleanup=True, + ) + self.check_common(loaded) + self.check_fresh(loaded) + + # At this point: + # * alive in 0 interpreters + # * module def in _PyRuntime.imports.extensions + # * mod init func ran for the first time (since reset, at least) + # * m_copy is NULL (cleared when the interpreter was destroyed) + # * module's global state was initialized, not reset + + # Use a subinterpreter that sticks around. + loaded = self.import_in_subinterp(interpid1, postcleanup=True) + self.check_common(loaded) + self.check_fresh(loaded) + + # At this point: + # * alive in 1 interpreter (interp1) + # * module def still in _PyRuntime.imports.extensions + # * mod init func ran again + # * m_copy was copied from interp1 (was NULL) + # * module's global state was initialized, not reset + + # Use a subinterpreter while the previous one is still alive. + loaded = self.import_in_subinterp(interpid2, postcleanup=True) + self.check_common(loaded) + self.check_fresh(loaded) + + # At this point: + # * alive in 2 interpreters (interp2, interp2) + # * module def still in _PyRuntime.imports.extensions + # * mod init func ran again + # * m_copy was copied from interp2 (was from interp1) + # * module's global state was initialized, not reset + + +@cpython_only +class TestMagicNumber(unittest.TestCase): + def test_magic_number_endianness(self): + magic_number_bytes = _imp.pyc_magic_number_token.to_bytes(4, 'little') + self.assertEqual(magic_number_bytes[2:], b'\r\n') + # Starting with Python 3.11, Python 3.n starts with magic number 2900+50n. + magic_number = int.from_bytes(magic_number_bytes[:2], 'little') + start = 2900 + sys.version_info.minor * 50 + self.assertIn(magic_number, range(start, start + 50)) + + +if __name__ == '__main__': + # Test needs to be a package, so we can do relative imports. + unittest.main() diff --git a/stdlib/test/test_import/__main__.py b/stdlib/test/test_import/__main__.py new file mode 100644 index 000000000..24f02a171 --- /dev/null +++ b/stdlib/test/test_import/__main__.py @@ -0,0 +1,3 @@ +import unittest + +unittest.main('test.test_import') diff --git a/stdlib/test/test_import/data/circular_imports/basic.py b/stdlib/test/test_import/data/circular_imports/basic.py new file mode 100644 index 000000000..3e41e395d --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/basic.py @@ -0,0 +1,2 @@ +"""Circular imports through direct, relative imports.""" +from . import basic2 diff --git a/stdlib/test/test_import/data/circular_imports/basic2.py b/stdlib/test/test_import/data/circular_imports/basic2.py new file mode 100644 index 000000000..00bd2f29f --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/basic2.py @@ -0,0 +1 @@ +from . import basic diff --git a/stdlib/test/test_import/data/circular_imports/binding.py b/stdlib/test/test_import/data/circular_imports/binding.py new file mode 100644 index 000000000..1fbf929ab --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/binding.py @@ -0,0 +1 @@ +import test.test_import.data.circular_imports.binding2 as binding2 diff --git a/stdlib/test/test_import/data/circular_imports/binding2.py b/stdlib/test/test_import/data/circular_imports/binding2.py new file mode 100644 index 000000000..3d6693769 --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/binding2.py @@ -0,0 +1 @@ +import test.test_import.data.circular_imports.binding as binding diff --git a/stdlib/test/test_import/data/circular_imports/from_cycle1.py b/stdlib/test/test_import/data/circular_imports/from_cycle1.py new file mode 100644 index 000000000..aacfd5f46 --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/from_cycle1.py @@ -0,0 +1,2 @@ +from .from_cycle2 import a +b = 1 diff --git a/stdlib/test/test_import/data/circular_imports/from_cycle2.py b/stdlib/test/test_import/data/circular_imports/from_cycle2.py new file mode 100644 index 000000000..62a66e1cf --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/from_cycle2.py @@ -0,0 +1,2 @@ +from .from_cycle1 import b +a = 1 diff --git a/stdlib/test/test_import/data/circular_imports/import_cycle.py b/stdlib/test/test_import/data/circular_imports/import_cycle.py new file mode 100644 index 000000000..cd9507b5f --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/import_cycle.py @@ -0,0 +1,3 @@ +import test.test_import.data.circular_imports.import_cycle as m + +m.some_attribute diff --git a/stdlib/test/test_import/data/circular_imports/indirect.py b/stdlib/test/test_import/data/circular_imports/indirect.py new file mode 100644 index 000000000..6925788d6 --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/indirect.py @@ -0,0 +1 @@ +from . import basic, basic2 diff --git a/stdlib/test/test_import/data/circular_imports/rebinding.py b/stdlib/test/test_import/data/circular_imports/rebinding.py new file mode 100644 index 000000000..2b7737555 --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/rebinding.py @@ -0,0 +1,3 @@ +"""Test the binding of names when a circular import shares the same name as an +attribute.""" +from .rebinding2 import util diff --git a/stdlib/test/test_import/data/circular_imports/rebinding2.py b/stdlib/test/test_import/data/circular_imports/rebinding2.py new file mode 100644 index 000000000..57a9e6945 --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/rebinding2.py @@ -0,0 +1,3 @@ +from .subpkg import util +from . import rebinding +util = util.util diff --git a/stdlib/test/test_import/data/circular_imports/singlephase.py b/stdlib/test/test_import/data/circular_imports/singlephase.py new file mode 100644 index 000000000..05618bc72 --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/singlephase.py @@ -0,0 +1,13 @@ +"""Circular import involving a single-phase-init extension. + +This module is imported from the _testsinglephase_circular module from +_testsinglephase, and imports that module again. +""" + +import importlib +import _testsinglephase +from test.test_import import import_extension_from_file + +name = '_testsinglephase_circular' +filename = _testsinglephase.__file__ +mod = import_extension_from_file(name, filename) diff --git a/stdlib/test/test_import/data/circular_imports/source.py b/stdlib/test/test_import/data/circular_imports/source.py new file mode 100644 index 000000000..f10409904 --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/source.py @@ -0,0 +1,2 @@ +from . import use +spam = 1 diff --git a/stdlib/test/test_import/data/circular_imports/subpackage.py b/stdlib/test/test_import/data/circular_imports/subpackage.py new file mode 100644 index 000000000..7b412f76f --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/subpackage.py @@ -0,0 +1,2 @@ +"""Circular import involving a sub-package.""" +from .subpkg import subpackage2 diff --git a/stdlib/test/test_import/data/circular_imports/subpkg/subpackage2.py b/stdlib/test/test_import/data/circular_imports/subpkg/subpackage2.py new file mode 100644 index 000000000..17b893a1a --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/subpkg/subpackage2.py @@ -0,0 +1,2 @@ +#from .util import util +from .. import subpackage diff --git a/stdlib/test/test_import/data/circular_imports/subpkg/util.py b/stdlib/test/test_import/data/circular_imports/subpkg/util.py new file mode 100644 index 000000000..343bd843b --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/subpkg/util.py @@ -0,0 +1,2 @@ +def util(): + pass diff --git a/stdlib/test/test_import/data/circular_imports/subpkg2/__init__.py b/stdlib/test/test_import/data/circular_imports/subpkg2/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/stdlib/test/test_import/data/circular_imports/subpkg2/parent/__init__.py b/stdlib/test/test_import/data/circular_imports/subpkg2/parent/__init__.py new file mode 100644 index 000000000..9745f60a7 --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/subpkg2/parent/__init__.py @@ -0,0 +1 @@ +import test.test_import.data.circular_imports.subpkg2.parent.child diff --git a/stdlib/test/test_import/data/circular_imports/subpkg2/parent/child.py b/stdlib/test/test_import/data/circular_imports/subpkg2/parent/child.py new file mode 100644 index 000000000..1995a3730 --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/subpkg2/parent/child.py @@ -0,0 +1,3 @@ +import test.test_import.data.circular_imports.subpkg2.parent + +test.test_import.data.circular_imports.subpkg2.parent diff --git a/stdlib/test/test_import/data/circular_imports/use.py b/stdlib/test/test_import/data/circular_imports/use.py new file mode 100644 index 000000000..418f9e268 --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/use.py @@ -0,0 +1,2 @@ +from . import source +source.spam diff --git a/stdlib/test/test_import/data/circular_imports/util.py b/stdlib/test/test_import/data/circular_imports/util.py new file mode 100644 index 000000000..343bd843b --- /dev/null +++ b/stdlib/test/test_import/data/circular_imports/util.py @@ -0,0 +1,2 @@ +def util(): + pass diff --git a/stdlib/test/test_import/data/double_const.py b/stdlib/test/test_import/data/double_const.py new file mode 100644 index 000000000..67852aaf9 --- /dev/null +++ b/stdlib/test/test_import/data/double_const.py @@ -0,0 +1,30 @@ +from test.support import TestFailed + +# A test for SF bug 422177: manifest float constants varied way too much in +# precision depending on whether Python was loading a module for the first +# time, or reloading it from a precompiled .pyc. The "expected" failure +# mode is that when test_import imports this after all .pyc files have been +# erased, it passes, but when test_import imports this from +# double_const.pyc, it fails. This indicates a woeful loss of precision in +# the marshal format for doubles. It's also possible that repr() doesn't +# produce enough digits to get reasonable precision for this box. + +PI = 3.14159265358979324 +TWOPI = 6.28318530717958648 + +PI_str = "3.14159265358979324" +TWOPI_str = "6.28318530717958648" + +# Verify that the double x is within a few bits of eval(x_str). +def check_ok(x, x_str): + assert x > 0.0 + x2 = eval(x_str) + assert x2 > 0.0 + diff = abs(x - x2) + # If diff is no larger than 3 ULP (wrt x2), then diff/8 is no larger + # than 0.375 ULP, so adding diff/8 to x2 should have no effect. + if x2 + (diff / 8.) != x2: + raise TestFailed("Manifest const %s lost too much precision " % x_str) + +check_ok(PI, PI_str) +check_ok(TWOPI, TWOPI_str) diff --git a/stdlib/test/test_import/data/package/__init__.py b/stdlib/test/test_import/data/package/__init__.py new file mode 100644 index 000000000..a4f2bc340 --- /dev/null +++ b/stdlib/test/test_import/data/package/__init__.py @@ -0,0 +1,2 @@ +import package.submodule +package.submodule diff --git a/stdlib/test/test_import/data/package/submodule.py b/stdlib/test/test_import/data/package/submodule.py new file mode 100644 index 000000000..e69de29bb diff --git a/stdlib/test/test_import/data/package2/submodule1.py b/stdlib/test/test_import/data/package2/submodule1.py new file mode 100644 index 000000000..0698ed6de --- /dev/null +++ b/stdlib/test/test_import/data/package2/submodule1.py @@ -0,0 +1,3 @@ +import sys +sys.modules.pop(__package__, None) +from . import submodule2 diff --git a/stdlib/test/test_import/data/package2/submodule2.py b/stdlib/test/test_import/data/package2/submodule2.py new file mode 100644 index 000000000..e69de29bb diff --git a/stdlib/test/test_import/data/package3/__init__.py b/stdlib/test/test_import/data/package3/__init__.py new file mode 100644 index 000000000..7033c22a7 --- /dev/null +++ b/stdlib/test/test_import/data/package3/__init__.py @@ -0,0 +1,2 @@ +"""Rebinding the package attribute after importing the module.""" +from .submodule import submodule diff --git a/stdlib/test/test_import/data/package3/submodule.py b/stdlib/test/test_import/data/package3/submodule.py new file mode 100644 index 000000000..cd7b30db1 --- /dev/null +++ b/stdlib/test/test_import/data/package3/submodule.py @@ -0,0 +1,7 @@ +attr = 'submodule' +class A: + attr = 'submodule' +class submodule: + attr = 'rebound' + class B: + attr = 'rebound' diff --git a/stdlib/test/test_import/data/package4/__init__.py b/stdlib/test/test_import/data/package4/__init__.py new file mode 100644 index 000000000..d8af60ab3 --- /dev/null +++ b/stdlib/test/test_import/data/package4/__init__.py @@ -0,0 +1,5 @@ +"""Binding the package attribute without importing the module.""" +class submodule: + attr = 'origin' + class B: + attr = 'origin' diff --git a/stdlib/test/test_import/data/package4/submodule.py b/stdlib/test/test_import/data/package4/submodule.py new file mode 100644 index 000000000..c861417ae --- /dev/null +++ b/stdlib/test/test_import/data/package4/submodule.py @@ -0,0 +1,3 @@ +attr = 'submodule' +class A: + attr = 'submodule' diff --git a/stdlib/test/test_import/data/unwritable/__init__.py b/stdlib/test/test_import/data/unwritable/__init__.py new file mode 100644 index 000000000..da4ddb3d0 --- /dev/null +++ b/stdlib/test/test_import/data/unwritable/__init__.py @@ -0,0 +1,12 @@ +import sys + +class MyMod(object): + __slots__ = ['__builtins__', '__cached__', '__doc__', + '__file__', '__loader__', '__name__', + '__package__', '__path__', '__spec__'] + def __init__(self): + for attr in self.__slots__: + setattr(self, attr, globals()[attr]) + + +sys.modules[__name__] = MyMod() diff --git a/stdlib/test/test_import/data/unwritable/x.py b/stdlib/test/test_import/data/unwritable/x.py new file mode 100644 index 000000000..e69de29bb diff --git a/stdlib/test/test_importlib/__init__.py b/stdlib/test/test_importlib/__init__.py new file mode 100644 index 000000000..4b16ecc31 --- /dev/null +++ b/stdlib/test/test_importlib/__init__.py @@ -0,0 +1,5 @@ +import os +from test.support import load_package_tests + +def load_tests(*args): + return load_package_tests(os.path.dirname(__file__), *args) diff --git a/stdlib/test/test_importlib/__main__.py b/stdlib/test/test_importlib/__main__.py new file mode 100644 index 000000000..40a23a297 --- /dev/null +++ b/stdlib/test/test_importlib/__main__.py @@ -0,0 +1,4 @@ +from . import load_tests +import unittest + +unittest.main() diff --git a/stdlib/test/test_importlib/abc.py b/stdlib/test/test_importlib/abc.py new file mode 100644 index 000000000..5d4b95876 --- /dev/null +++ b/stdlib/test/test_importlib/abc.py @@ -0,0 +1,93 @@ +import abc + + +class FinderTests(metaclass=abc.ABCMeta): + + """Basic tests for a finder to pass.""" + + @abc.abstractmethod + def test_module(self): + # Test importing a top-level module. + pass + + @abc.abstractmethod + def test_package(self): + # Test importing a package. + pass + + @abc.abstractmethod + def test_module_in_package(self): + # Test importing a module contained within a package. + # A value for 'path' should be used if for a meta_path finder. + pass + + @abc.abstractmethod + def test_package_in_package(self): + # Test importing a subpackage. + # A value for 'path' should be used if for a meta_path finder. + pass + + @abc.abstractmethod + def test_package_over_module(self): + # Test that packages are chosen over modules. + pass + + @abc.abstractmethod + def test_failure(self): + # Test trying to find a module that cannot be handled. + pass + + +class LoaderTests(metaclass=abc.ABCMeta): + + @abc.abstractmethod + def test_module(self): + """A module should load without issue. + + After the loader returns the module should be in sys.modules. + + Attributes to verify: + + * __file__ + * __loader__ + * __name__ + * No __path__ + + """ + pass + + @abc.abstractmethod + def test_package(self): + """Loading a package should work. + + After the loader returns the module should be in sys.modules. + + Attributes to verify: + + * __name__ + * __file__ + * __package__ + * __path__ + * __loader__ + + """ + pass + + @abc.abstractmethod + def test_lacking_parent(self): + """A loader should not be dependent on it's parent package being + imported.""" + pass + + @abc.abstractmethod + def test_state_after_failure(self): + """If a module is already in sys.modules and a reload fails + (e.g. a SyntaxError), the module should be in the state it was before + the reload began.""" + pass + + @abc.abstractmethod + def test_unloadable(self): + """Test ImportError is raised when the loader is asked to load a module + it can't.""" + pass diff --git a/stdlib/test/test_importlib/builtin/__init__.py b/stdlib/test/test_importlib/builtin/__init__.py new file mode 100644 index 000000000..4b16ecc31 --- /dev/null +++ b/stdlib/test/test_importlib/builtin/__init__.py @@ -0,0 +1,5 @@ +import os +from test.support import load_package_tests + +def load_tests(*args): + return load_package_tests(os.path.dirname(__file__), *args) diff --git a/stdlib/test/test_importlib/builtin/__main__.py b/stdlib/test/test_importlib/builtin/__main__.py new file mode 100644 index 000000000..40a23a297 --- /dev/null +++ b/stdlib/test/test_importlib/builtin/__main__.py @@ -0,0 +1,4 @@ +from . import load_tests +import unittest + +unittest.main() diff --git a/stdlib/test/test_importlib/builtin/test_finder.py b/stdlib/test/test_importlib/builtin/test_finder.py new file mode 100644 index 000000000..1fb1d2f9e --- /dev/null +++ b/stdlib/test/test_importlib/builtin/test_finder.py @@ -0,0 +1,46 @@ +from test.test_importlib import abc, util + +machinery = util.import_importlib('importlib.machinery') + +import sys +import unittest + + +@unittest.skipIf(util.BUILTINS.good_name is None, 'no reasonable builtin module') +class FindSpecTests(abc.FinderTests): + + """Test find_spec() for built-in modules.""" + + def test_module(self): + # Common case. + with util.uncache(util.BUILTINS.good_name): + found = self.machinery.BuiltinImporter.find_spec(util.BUILTINS.good_name) + self.assertTrue(found) + self.assertEqual(found.origin, 'built-in') + + # Built-in modules cannot be a package. + test_package = None + + # Built-in modules cannot be in a package. + test_module_in_package = None + + # Built-in modules cannot be a package. + test_package_in_package = None + + # Built-in modules cannot be a package. + test_package_over_module = None + + def test_failure(self): + name = 'importlib' + assert name not in sys.builtin_module_names + spec = self.machinery.BuiltinImporter.find_spec(name) + self.assertIsNone(spec) + + +(Frozen_FindSpecTests, + Source_FindSpecTests + ) = util.test_both(FindSpecTests, machinery=machinery) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/builtin/test_loader.py b/stdlib/test/test_importlib/builtin/test_loader.py new file mode 100644 index 000000000..7e9d1b196 --- /dev/null +++ b/stdlib/test/test_importlib/builtin/test_loader.py @@ -0,0 +1,110 @@ +from test.test_importlib import abc, util + +machinery = util.import_importlib('importlib.machinery') + +import sys +import types +import unittest +import warnings + +@unittest.skipIf(util.BUILTINS.good_name is None, 'no reasonable builtin module') +class LoaderTests(abc.LoaderTests): + + """Test load_module() for built-in modules.""" + + def setUp(self): + self.verification = {'__name__': 'errno', '__package__': '', + '__loader__': self.machinery.BuiltinImporter} + + def verify(self, module): + """Verify that the module matches against what it should have.""" + self.assertIsInstance(module, types.ModuleType) + for attr, value in self.verification.items(): + self.assertEqual(getattr(module, attr), value) + self.assertIn(module.__name__, sys.modules) + + def load_module(self, name): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + return self.machinery.BuiltinImporter.load_module(name) + + def test_module(self): + # Common case. + with util.uncache(util.BUILTINS.good_name): + module = self.load_module(util.BUILTINS.good_name) + self.verify(module) + + # Built-in modules cannot be a package. + test_package = test_lacking_parent = None + + # No way to force an import failure. + test_state_after_failure = None + + def test_module_reuse(self): + # Test that the same module is used in a reload. + with util.uncache(util.BUILTINS.good_name): + module1 = self.load_module(util.BUILTINS.good_name) + module2 = self.load_module(util.BUILTINS.good_name) + self.assertIs(module1, module2) + + def test_unloadable(self): + name = 'dssdsdfff' + assert name not in sys.builtin_module_names + with self.assertRaises(ImportError) as cm: + self.load_module(name) + self.assertEqual(cm.exception.name, name) + + def test_already_imported(self): + # Using the name of a module already imported but not a built-in should + # still fail. + module_name = 'builtin_reload_test' + assert module_name not in sys.builtin_module_names + with util.uncache(module_name): + module = types.ModuleType(module_name) + sys.modules[module_name] = module + with self.assertRaises(ImportError) as cm: + self.load_module(module_name) + self.assertEqual(cm.exception.name, module_name) + + +(Frozen_LoaderTests, + Source_LoaderTests + ) = util.test_both(LoaderTests, machinery=machinery) + + +@unittest.skipIf(util.BUILTINS.good_name is None, 'no reasonable builtin module') +class InspectLoaderTests: + + """Tests for InspectLoader methods for BuiltinImporter.""" + + def test_get_code(self): + # There is no code object. + result = self.machinery.BuiltinImporter.get_code(util.BUILTINS.good_name) + self.assertIsNone(result) + + def test_get_source(self): + # There is no source. + result = self.machinery.BuiltinImporter.get_source(util.BUILTINS.good_name) + self.assertIsNone(result) + + def test_is_package(self): + # Cannot be a package. + result = self.machinery.BuiltinImporter.is_package(util.BUILTINS.good_name) + self.assertFalse(result) + + @unittest.skipIf(util.BUILTINS.bad_name is None, 'all modules are built in') + def test_not_builtin(self): + # Modules not built-in should raise ImportError. + for meth_name in ('get_code', 'get_source', 'is_package'): + method = getattr(self.machinery.BuiltinImporter, meth_name) + with self.assertRaises(ImportError) as cm: + method(util.BUILTINS.bad_name) + + +(Frozen_InspectLoaderTests, + Source_InspectLoaderTests + ) = util.test_both(InspectLoaderTests, machinery=machinery) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/extension/__init__.py b/stdlib/test/test_importlib/extension/__init__.py new file mode 100644 index 000000000..4b16ecc31 --- /dev/null +++ b/stdlib/test/test_importlib/extension/__init__.py @@ -0,0 +1,5 @@ +import os +from test.support import load_package_tests + +def load_tests(*args): + return load_package_tests(os.path.dirname(__file__), *args) diff --git a/stdlib/test/test_importlib/extension/__main__.py b/stdlib/test/test_importlib/extension/__main__.py new file mode 100644 index 000000000..40a23a297 --- /dev/null +++ b/stdlib/test/test_importlib/extension/__main__.py @@ -0,0 +1,4 @@ +from . import load_tests +import unittest + +unittest.main() diff --git a/stdlib/test/test_importlib/extension/_test_nonmodule_cases.py b/stdlib/test/test_importlib/extension/_test_nonmodule_cases.py new file mode 100644 index 000000000..8ffd18d22 --- /dev/null +++ b/stdlib/test/test_importlib/extension/_test_nonmodule_cases.py @@ -0,0 +1,44 @@ +import types +import unittest +from test.test_importlib import util + +machinery = util.import_importlib('importlib.machinery') + +from test.test_importlib.extension.test_loader import MultiPhaseExtensionModuleTests + + +class NonModuleExtensionTests: + setUp = MultiPhaseExtensionModuleTests.setUp + load_module_by_name = MultiPhaseExtensionModuleTests.load_module_by_name + + def _test_nonmodule(self): + # Test returning a non-module object from create works. + name = self.name + '_nonmodule' + mod = self.load_module_by_name(name) + self.assertNotEqual(type(mod), type(unittest)) + self.assertEqual(mod.three, 3) + + # issue 27782 + def test_nonmodule_with_methods(self): + # Test creating a non-module object with methods defined. + name = self.name + '_nonmodule_with_methods' + mod = self.load_module_by_name(name) + self.assertNotEqual(type(mod), type(unittest)) + self.assertEqual(mod.three, 3) + self.assertEqual(mod.bar(10, 1), 9) + + def test_null_slots(self): + # Test that NULL slots aren't a problem. + name = self.name + '_null_slots' + module = self.load_module_by_name(name) + self.assertIsInstance(module, types.ModuleType) + self.assertEqual(module.__name__, name) + + +(Frozen_NonModuleExtensionTests, + Source_NonModuleExtensionTests + ) = util.test_both(NonModuleExtensionTests, machinery=machinery) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/extension/test_case_sensitivity.py b/stdlib/test/test_importlib/extension/test_case_sensitivity.py new file mode 100644 index 000000000..518371916 --- /dev/null +++ b/stdlib/test/test_importlib/extension/test_case_sensitivity.py @@ -0,0 +1,48 @@ +from test.support import os_helper +import unittest +import sys +from test.test_importlib import util + +importlib = util.import_importlib('importlib') +machinery = util.import_importlib('importlib.machinery') + + +@unittest.skipIf(util.EXTENSIONS is None or util.EXTENSIONS.filename is None, + 'dynamic loading not supported or test module not available') +@util.case_insensitive_tests +class ExtensionModuleCaseSensitivityTest(util.CASEOKTestBase): + + def find_spec(self): + good_name = util.EXTENSIONS.name + bad_name = good_name.upper() + assert good_name != bad_name + finder = self.machinery.FileFinder(util.EXTENSIONS.path, + (self.machinery.ExtensionFileLoader, + self.machinery.EXTENSION_SUFFIXES)) + return finder.find_spec(bad_name) + + @unittest.skipIf(sys.flags.ignore_environment, 'ignore_environment flag was set') + def test_case_sensitive(self): + with os_helper.EnvironmentVarGuard() as env: + env.unset('PYTHONCASEOK') + self.caseok_env_changed(should_exist=False) + spec = self.find_spec() + self.assertIsNone(spec) + + @unittest.skipIf(sys.flags.ignore_environment, 'ignore_environment flag was set') + def test_case_insensitivity(self): + with os_helper.EnvironmentVarGuard() as env: + env.set('PYTHONCASEOK', '1') + self.caseok_env_changed(should_exist=True) + spec = self.find_spec() + self.assertTrue(spec) + + +(Frozen_ExtensionCaseSensitivity, + Source_ExtensionCaseSensitivity + ) = util.test_both(ExtensionModuleCaseSensitivityTest, importlib=importlib, + machinery=machinery) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/extension/test_finder.py b/stdlib/test/test_importlib/extension/test_finder.py new file mode 100644 index 000000000..cdc8884d6 --- /dev/null +++ b/stdlib/test/test_importlib/extension/test_finder.py @@ -0,0 +1,69 @@ +from test.support import is_apple_mobile +from test.test_importlib import abc, util + +machinery = util.import_importlib('importlib.machinery') + +import unittest +import sys + + +class FinderTests(abc.FinderTests): + + """Test the finder for extension modules.""" + + def setUp(self): + if not self.machinery.EXTENSION_SUFFIXES or not util.EXTENSIONS: + raise unittest.SkipTest("Requires dynamic loading support.") + if util.EXTENSIONS.name in sys.builtin_module_names: + raise unittest.SkipTest( + f"{util.EXTENSIONS.name} is a builtin module" + ) + + def find_spec(self, fullname): + if is_apple_mobile: + # Apple mobile platforms require a specialist loader that uses + # .fwork files as placeholders for the true `.so` files. + loaders = [ + ( + self.machinery.AppleFrameworkLoader, + [ + ext.replace(".so", ".fwork") + for ext in self.machinery.EXTENSION_SUFFIXES + ] + ) + ] + else: + loaders = [ + ( + self.machinery.ExtensionFileLoader, + self.machinery.EXTENSION_SUFFIXES + ) + ] + + importer = self.machinery.FileFinder(util.EXTENSIONS.path, *loaders) + + return importer.find_spec(fullname) + + def test_module(self): + self.assertTrue(self.find_spec(util.EXTENSIONS.name)) + + # No extension module as an __init__ available for testing. + test_package = test_package_in_package = None + + # No extension module in a package available for testing. + test_module_in_package = None + + # Extension modules cannot be an __init__ for a package. + test_package_over_module = None + + def test_failure(self): + self.assertIsNone(self.find_spec('asdfjkl;')) + + +(Frozen_FinderTests, + Source_FinderTests + ) = util.test_both(FinderTests, machinery=machinery) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/extension/test_loader.py b/stdlib/test/test_importlib/extension/test_loader.py new file mode 100644 index 000000000..0dd21e079 --- /dev/null +++ b/stdlib/test/test_importlib/extension/test_loader.py @@ -0,0 +1,392 @@ +from test.support import is_apple_mobile +from test.test_importlib import abc, util + +machinery = util.import_importlib('importlib.machinery') + +import os.path +import sys +import types +import unittest +import warnings +import importlib.util +import importlib +from test import support +from test.support import MISSING_C_DOCSTRINGS, script_helper + + +class LoaderTests: + + """Test ExtensionFileLoader.""" + + def setUp(self): + if not self.machinery.EXTENSION_SUFFIXES or not util.EXTENSIONS: + raise unittest.SkipTest("Requires dynamic loading support.") + if util.EXTENSIONS.name in sys.builtin_module_names: + raise unittest.SkipTest( + f"{util.EXTENSIONS.name} is a builtin module" + ) + + # Apple extensions must be distributed as frameworks. This requires + # a specialist loader. + if is_apple_mobile: + self.LoaderClass = self.machinery.AppleFrameworkLoader + else: + self.LoaderClass = self.machinery.ExtensionFileLoader + + self.loader = self.LoaderClass(util.EXTENSIONS.name, util.EXTENSIONS.file_path) + + def load_module(self, fullname): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + return self.loader.load_module(fullname) + + def test_equality(self): + other = self.LoaderClass(util.EXTENSIONS.name, util.EXTENSIONS.file_path) + self.assertEqual(self.loader, other) + + def test_inequality(self): + other = self.LoaderClass('_' + util.EXTENSIONS.name, util.EXTENSIONS.file_path) + self.assertNotEqual(self.loader, other) + + def test_load_module_API(self): + # Test the default argument for load_module(). + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + self.loader.load_module() + self.loader.load_module(None) + with self.assertRaises(ImportError): + self.load_module('XXX') + + def test_module(self): + with util.uncache(util.EXTENSIONS.name): + module = self.load_module(util.EXTENSIONS.name) + for attr, value in [('__name__', util.EXTENSIONS.name), + ('__file__', util.EXTENSIONS.file_path), + ('__package__', '')]: + self.assertEqual(getattr(module, attr), value) + self.assertIn(util.EXTENSIONS.name, sys.modules) + self.assertIsInstance(module.__loader__, self.LoaderClass) + + # No extension module as __init__ available for testing. + test_package = None + + # No extension module in a package available for testing. + test_lacking_parent = None + + # No easy way to trigger a failure after a successful import. + test_state_after_failure = None + + def test_unloadable(self): + name = 'asdfjkl;' + with self.assertRaises(ImportError) as cm: + self.load_module(name) + self.assertEqual(cm.exception.name, name) + + def test_module_reuse(self): + with util.uncache(util.EXTENSIONS.name): + module1 = self.load_module(util.EXTENSIONS.name) + module2 = self.load_module(util.EXTENSIONS.name) + self.assertIs(module1, module2) + + def test_is_package(self): + self.assertFalse(self.loader.is_package(util.EXTENSIONS.name)) + for suffix in self.machinery.EXTENSION_SUFFIXES: + path = os.path.join('some', 'path', 'pkg', '__init__' + suffix) + loader = self.LoaderClass('pkg', path) + self.assertTrue(loader.is_package('pkg')) + + +(Frozen_LoaderTests, + Source_LoaderTests + ) = util.test_both(LoaderTests, machinery=machinery) + + +class SinglePhaseExtensionModuleTests(abc.LoaderTests): + # Test loading extension modules without multi-phase initialization. + + def setUp(self): + if not self.machinery.EXTENSION_SUFFIXES or not util.EXTENSIONS: + raise unittest.SkipTest("Requires dynamic loading support.") + + # Apple extensions must be distributed as frameworks. This requires + # a specialist loader. + if is_apple_mobile: + self.LoaderClass = self.machinery.AppleFrameworkLoader + else: + self.LoaderClass = self.machinery.ExtensionFileLoader + + self.name = '_testsinglephase' + if self.name in sys.builtin_module_names: + raise unittest.SkipTest( + f"{self.name} is a builtin module" + ) + finder = self.machinery.FileFinder(None) + self.spec = importlib.util.find_spec(self.name) + assert self.spec + + self.loader = self.LoaderClass(self.name, self.spec.origin) + + def load_module(self): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + return self.loader.load_module(self.name) + + def load_module_by_name(self, fullname): + # Load a module from the test extension by name. + origin = self.spec.origin + loader = self.LoaderClass(fullname, origin) + spec = importlib.util.spec_from_loader(fullname, loader) + module = importlib.util.module_from_spec(spec) + loader.exec_module(module) + return module + + def test_module(self): + # Test loading an extension module. + with util.uncache(self.name): + module = self.load_module() + for attr, value in [('__name__', self.name), + ('__file__', self.spec.origin), + ('__package__', '')]: + self.assertEqual(getattr(module, attr), value) + with self.assertRaises(AttributeError): + module.__path__ + self.assertIs(module, sys.modules[self.name]) + self.assertIsInstance(module.__loader__, self.LoaderClass) + + # No extension module as __init__ available for testing. + test_package = None + + # No extension module in a package available for testing. + test_lacking_parent = None + + # No easy way to trigger a failure after a successful import. + test_state_after_failure = None + + def test_unloadable(self): + name = 'asdfjkl;' + with self.assertRaises(ImportError) as cm: + self.load_module_by_name(name) + self.assertEqual(cm.exception.name, name) + + def test_unloadable_nonascii(self): + # Test behavior with nonexistent module with non-ASCII name. + name = 'fo\xf3' + with self.assertRaises(ImportError) as cm: + self.load_module_by_name(name) + self.assertEqual(cm.exception.name, name) + + # It may make sense to add the equivalent to + # the following MultiPhaseExtensionModuleTests tests: + # + # * test_nonmodule + # * test_nonmodule_with_methods + # * test_bad_modules + # * test_nonascii + + +(Frozen_SinglePhaseExtensionModuleTests, + Source_SinglePhaseExtensionModuleTests + ) = util.test_both(SinglePhaseExtensionModuleTests, machinery=machinery) + + +class MultiPhaseExtensionModuleTests(abc.LoaderTests): + # Test loading extension modules with multi-phase initialization (PEP 489). + + def setUp(self): + if not self.machinery.EXTENSION_SUFFIXES or not util.EXTENSIONS: + raise unittest.SkipTest("Requires dynamic loading support.") + + # Apple extensions must be distributed as frameworks. This requires + # a specialist loader. + if is_apple_mobile: + self.LoaderClass = self.machinery.AppleFrameworkLoader + else: + self.LoaderClass = self.machinery.ExtensionFileLoader + + self.name = '_testmultiphase' + if self.name in sys.builtin_module_names: + raise unittest.SkipTest( + f"{self.name} is a builtin module" + ) + finder = self.machinery.FileFinder(None) + self.spec = importlib.util.find_spec(self.name) + assert self.spec + self.loader = self.LoaderClass(self.name, self.spec.origin) + + def load_module(self): + # Load the module from the test extension. + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + return self.loader.load_module(self.name) + + def load_module_by_name(self, fullname): + # Load a module from the test extension by name. + origin = self.spec.origin + loader = self.LoaderClass(fullname, origin) + spec = importlib.util.spec_from_loader(fullname, loader) + module = importlib.util.module_from_spec(spec) + loader.exec_module(module) + return module + + # No extension module as __init__ available for testing. + test_package = None + + # No extension module in a package available for testing. + test_lacking_parent = None + + # Handling failure on reload is the up to the module. + test_state_after_failure = None + + def test_module(self): + # Test loading an extension module. + with util.uncache(self.name): + module = self.load_module() + for attr, value in [('__name__', self.name), + ('__file__', self.spec.origin), + ('__package__', '')]: + self.assertEqual(getattr(module, attr), value) + with self.assertRaises(AttributeError): + module.__path__ + self.assertIs(module, sys.modules[self.name]) + self.assertIsInstance(module.__loader__, self.LoaderClass) + + def test_functionality(self): + # Test basic functionality of stuff defined in an extension module. + with util.uncache(self.name): + module = self.load_module() + self.assertIsInstance(module, types.ModuleType) + ex = module.Example() + self.assertEqual(ex.demo('abcd'), 'abcd') + self.assertEqual(ex.demo(), None) + with self.assertRaises(AttributeError): + ex.abc + ex.abc = 0 + self.assertEqual(ex.abc, 0) + self.assertEqual(module.foo(9, 9), 18) + self.assertIsInstance(module.Str(), str) + self.assertEqual(module.Str(1) + '23', '123') + with self.assertRaises(module.error): + raise module.error() + self.assertEqual(module.int_const, 1969) + self.assertEqual(module.str_const, 'something different') + + def test_reload(self): + # Test that reload didn't re-set the module's attributes. + with util.uncache(self.name): + module = self.load_module() + ex_class = module.Example + importlib.reload(module) + self.assertIs(ex_class, module.Example) + + def test_try_registration(self): + # Assert that the PyState_{Find,Add,Remove}Module C API doesn't work. + with util.uncache(self.name): + module = self.load_module() + with self.subTest('PyState_FindModule'): + self.assertEqual(module.call_state_registration_func(0), None) + with self.subTest('PyState_AddModule'): + with self.assertRaises(SystemError): + module.call_state_registration_func(1) + with self.subTest('PyState_RemoveModule'): + with self.assertRaises(SystemError): + module.call_state_registration_func(2) + + def test_load_submodule(self): + # Test loading a simulated submodule. + module = self.load_module_by_name('pkg.' + self.name) + self.assertIsInstance(module, types.ModuleType) + self.assertEqual(module.__name__, 'pkg.' + self.name) + self.assertEqual(module.str_const, 'something different') + + def test_load_short_name(self): + # Test loading module with a one-character name. + module = self.load_module_by_name('x') + self.assertIsInstance(module, types.ModuleType) + self.assertEqual(module.__name__, 'x') + self.assertEqual(module.str_const, 'something different') + self.assertNotIn('x', sys.modules) + + def test_load_twice(self): + # Test that 2 loads result in 2 module objects. + module1 = self.load_module_by_name(self.name) + module2 = self.load_module_by_name(self.name) + self.assertIsNot(module1, module2) + + def test_unloadable(self): + # Test nonexistent module. + name = 'asdfjkl;' + with self.assertRaises(ImportError) as cm: + self.load_module_by_name(name) + self.assertEqual(cm.exception.name, name) + + def test_unloadable_nonascii(self): + # Test behavior with nonexistent module with non-ASCII name. + name = 'fo\xf3' + with self.assertRaises(ImportError) as cm: + self.load_module_by_name(name) + self.assertEqual(cm.exception.name, name) + + def test_bad_modules(self): + # Test SystemError is raised for misbehaving extensions. + for name_base in [ + 'bad_slot_large', + 'bad_slot_negative', + 'create_int_with_state', + 'negative_size', + 'export_null', + 'export_uninitialized', + 'export_raise', + 'export_unreported_exception', + 'create_null', + 'create_raise', + 'create_unreported_exception', + 'nonmodule_with_exec_slots', + 'exec_err', + 'exec_raise', + 'exec_unreported_exception', + 'multiple_create_slots', + 'multiple_multiple_interpreters_slots', + ]: + with self.subTest(name_base): + name = self.name + '_' + name_base + with self.assertRaises(SystemError) as cm: + self.load_module_by_name(name) + + # If there is an unreported exception, it should be chained + # with the `SystemError`. + if "unreported_exception" in name_base: + self.assertIsNotNone(cm.exception.__cause__) + + def test_nonascii(self): + # Test that modules with non-ASCII names can be loaded. + # punycode behaves slightly differently in some-ASCII and no-ASCII + # cases, so test both. + cases = [ + (self.name + '_zkou\u0161ka_na\u010dten\xed', 'Czech'), + ('\uff3f\u30a4\u30f3\u30dd\u30fc\u30c8\u30c6\u30b9\u30c8', + 'Japanese'), + ] + for name, lang in cases: + with self.subTest(name): + module = self.load_module_by_name(name) + self.assertEqual(module.__name__, name) + if not MISSING_C_DOCSTRINGS: + self.assertEqual(module.__doc__, "Module named in %s" % lang) + + +(Frozen_MultiPhaseExtensionModuleTests, + Source_MultiPhaseExtensionModuleTests + ) = util.test_both(MultiPhaseExtensionModuleTests, machinery=machinery) + + +class NonModuleExtensionTests(unittest.TestCase): + def test_nonmodule_cases(self): + # The test cases in this file cause the GIL to be enabled permanently + # in free-threaded builds, so they are run in a subprocess to isolate + # this effect. + script = support.findfile("test_importlib/extension/_test_nonmodule_cases.py") + script_helper.run_test_script(script) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/extension/test_path_hook.py b/stdlib/test/test_importlib/extension/test_path_hook.py new file mode 100644 index 000000000..941dcd543 --- /dev/null +++ b/stdlib/test/test_importlib/extension/test_path_hook.py @@ -0,0 +1,33 @@ +from test.test_importlib import util + +machinery = util.import_importlib('importlib.machinery') + +import unittest + + +@unittest.skipIf(util.EXTENSIONS is None or util.EXTENSIONS.filename is None, + 'dynamic loading not supported or test module not available') +class PathHookTests: + + """Test the path hook for extension modules.""" + # XXX Should it only succeed for pre-existing directories? + # XXX Should it only work for directories containing an extension module? + + def hook(self, entry): + return self.machinery.FileFinder.path_hook( + (self.machinery.ExtensionFileLoader, + self.machinery.EXTENSION_SUFFIXES))(entry) + + def test_success(self): + # Path hook should handle a directory where a known extension module + # exists. + self.assertHasAttr(self.hook(util.EXTENSIONS.path), 'find_spec') + + +(Frozen_PathHooksTests, + Source_PathHooksTests + ) = util.test_both(PathHookTests, machinery=machinery) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/frozen/__init__.py b/stdlib/test/test_importlib/frozen/__init__.py new file mode 100644 index 000000000..4b16ecc31 --- /dev/null +++ b/stdlib/test/test_importlib/frozen/__init__.py @@ -0,0 +1,5 @@ +import os +from test.support import load_package_tests + +def load_tests(*args): + return load_package_tests(os.path.dirname(__file__), *args) diff --git a/stdlib/test/test_importlib/frozen/__main__.py b/stdlib/test/test_importlib/frozen/__main__.py new file mode 100644 index 000000000..40a23a297 --- /dev/null +++ b/stdlib/test/test_importlib/frozen/__main__.py @@ -0,0 +1,4 @@ +from . import load_tests +import unittest + +unittest.main() diff --git a/stdlib/test/test_importlib/frozen/test_finder.py b/stdlib/test/test_importlib/frozen/test_finder.py new file mode 100644 index 000000000..971cc28b6 --- /dev/null +++ b/stdlib/test/test_importlib/frozen/test_finder.py @@ -0,0 +1,183 @@ +from test.test_importlib import abc, util + +machinery = util.import_importlib('importlib.machinery') + +import os.path +import unittest + +from test.support import import_helper, REPO_ROOT, STDLIB_DIR + + +def resolve_stdlib_file(name, ispkg=False): + assert name + if ispkg: + return os.path.join(STDLIB_DIR, *name.split('.'), '__init__.py') + else: + return os.path.join(STDLIB_DIR, *name.split('.')) + '.py' + + +class FindSpecTests(abc.FinderTests): + + """Test finding frozen modules.""" + + def find(self, name, **kwargs): + finder = self.machinery.FrozenImporter + with import_helper.frozen_modules(): + return finder.find_spec(name, **kwargs) + + def check_basic(self, spec, name, ispkg=False): + self.assertEqual(spec.name, name) + self.assertIs(spec.loader, self.machinery.FrozenImporter) + self.assertEqual(spec.origin, 'frozen') + self.assertFalse(spec.has_location) + if ispkg: + self.assertIsNotNone(spec.submodule_search_locations) + else: + self.assertIsNone(spec.submodule_search_locations) + self.assertIsNotNone(spec.loader_state) + + def check_loader_state(self, spec, origname=None, filename=None): + if not filename: + if not origname: + origname = spec.name + filename = resolve_stdlib_file(origname) + + actual = dict(vars(spec.loader_state)) + + # Check the rest of spec.loader_state. + expected = dict( + origname=origname, + filename=filename if origname else None, + ) + self.assertDictEqual(actual, expected) + + def check_search_locations(self, spec): + """This is only called when testing packages.""" + missing = object() + filename = getattr(spec.loader_state, 'filename', missing) + origname = getattr(spec.loader_state, 'origname', None) + if not origname or filename is missing: + # We deal with this in check_loader_state(). + return + if not filename: + expected = [] + elif origname != spec.name and not origname.startswith('<'): + expected = [] + else: + expected = [os.path.dirname(filename)] + self.assertListEqual(spec.submodule_search_locations, expected) + + def test_module(self): + modules = [ + '__hello__', + '__phello__.spam', + '__phello__.ham.eggs', + ] + for name in modules: + with self.subTest(f'{name} -> {name}'): + spec = self.find(name) + self.check_basic(spec, name) + self.check_loader_state(spec) + modules = { + '__hello_alias__': '__hello__', + '_frozen_importlib': 'importlib._bootstrap', + } + for name, origname in modules.items(): + with self.subTest(f'{name} -> {origname}'): + spec = self.find(name) + self.check_basic(spec, name) + self.check_loader_state(spec, origname) + modules = [ + '__phello__.__init__', + '__phello__.ham.__init__', + ] + for name in modules: + origname = '<' + name.rpartition('.')[0] + filename = resolve_stdlib_file(name) + with self.subTest(f'{name} -> {origname}'): + spec = self.find(name) + self.check_basic(spec, name) + self.check_loader_state(spec, origname, filename) + modules = { + '__hello_only__': ('Tools', 'freeze', 'flag.py'), + } + for name, path in modules.items(): + origname = None + filename = os.path.join(REPO_ROOT, *path) + with self.subTest(f'{name} -> {filename}'): + spec = self.find(name) + self.check_basic(spec, name) + self.check_loader_state(spec, origname, filename) + + def test_package(self): + packages = [ + '__phello__', + '__phello__.ham', + ] + for name in packages: + filename = resolve_stdlib_file(name, ispkg=True) + with self.subTest(f'{name} -> {name}'): + spec = self.find(name) + self.check_basic(spec, name, ispkg=True) + self.check_loader_state(spec, name, filename) + self.check_search_locations(spec) + packages = { + '__phello_alias__': '__hello__', + } + for name, origname in packages.items(): + filename = resolve_stdlib_file(origname, ispkg=False) + with self.subTest(f'{name} -> {origname}'): + spec = self.find(name) + self.check_basic(spec, name, ispkg=True) + self.check_loader_state(spec, origname, filename) + self.check_search_locations(spec) + + # These are covered by test_module() and test_package(). + test_module_in_package = None + test_package_in_package = None + + # No easy way to test. + test_package_over_module = None + + def test_path_ignored(self): + for name in ('__hello__', '__phello__', '__phello__.spam'): + actual = self.find(name) + for path in (None, object(), '', 'eggs', [], [''], ['eggs']): + with self.subTest((name, path)): + spec = self.find(name, path=path) + self.assertEqual(spec, actual) + + def test_target_ignored(self): + imported = ('__hello__', '__phello__') + with import_helper.CleanImport(*imported, usefrozen=True): + import __hello__ as match + import __phello__ as nonmatch + name = '__hello__' + actual = self.find(name) + for target in (None, match, nonmatch, object(), 'not-a-module-object'): + with self.subTest(target): + spec = self.find(name, target=target) + self.assertEqual(spec, actual) + + def test_failure(self): + spec = self.find('') + self.assertIsNone(spec) + + def test_not_using_frozen(self): + finder = self.machinery.FrozenImporter + with import_helper.frozen_modules(enabled=False): + # both frozen and not frozen + spec1 = finder.find_spec('__hello__') + # only frozen + spec2 = finder.find_spec('__hello_only__') + self.assertIsNone(spec1) + self.assertIsNone(spec2) + + +(Frozen_FindSpecTests, + Source_FindSpecTests + ) = util.test_both(FindSpecTests, machinery=machinery) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/frozen/test_loader.py b/stdlib/test/test_importlib/frozen/test_loader.py new file mode 100644 index 000000000..c808bb732 --- /dev/null +++ b/stdlib/test/test_importlib/frozen/test_loader.py @@ -0,0 +1,172 @@ +from test.test_importlib import abc, util + +machinery = util.import_importlib('importlib.machinery') + +from test.support import captured_stdout, import_helper, STDLIB_DIR +import contextlib +import os.path +import types +import unittest +import warnings + + +@contextlib.contextmanager +def deprecated(): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + yield + + +@contextlib.contextmanager +def fresh(name, *, oldapi=False): + with util.uncache(name): + with import_helper.frozen_modules(): + if oldapi: + with deprecated(): + yield + else: + yield + + +def resolve_stdlib_file(name, ispkg=False): + assert name + if ispkg: + return os.path.join(STDLIB_DIR, *name.split('.'), '__init__.py') + else: + return os.path.join(STDLIB_DIR, *name.split('.')) + '.py' + + +class ExecModuleTests(abc.LoaderTests): + + def exec_module(self, name, origname=None): + with import_helper.frozen_modules(): + is_package = self.machinery.FrozenImporter.is_package(name) + spec = self.machinery.ModuleSpec( + name, + self.machinery.FrozenImporter, + origin='frozen', + is_package=is_package, + loader_state=types.SimpleNamespace( + origname=origname or name, + filename=resolve_stdlib_file(origname or name, is_package), + ), + ) + module = types.ModuleType(name) + module.__spec__ = spec + assert not hasattr(module, 'initialized') + + with fresh(name): + self.machinery.FrozenImporter.exec_module(module) + with captured_stdout() as stdout: + module.main() + + self.assertTrue(module.initialized) + self.assertHasAttr(module, '__spec__') + self.assertEqual(module.__spec__.origin, 'frozen') + return module, stdout.getvalue() + + def test_module(self): + name = '__hello__' + module, output = self.exec_module(name) + check = {'__name__': name} + for attr, value in check.items(): + self.assertEqual(getattr(module, attr), value) + self.assertEqual(output, 'Hello world!\n') + self.assertHasAttr(module, '__spec__') + self.assertEqual(module.__spec__.loader_state.origname, name) + + def test_package(self): + name = '__phello__' + module, output = self.exec_module(name) + check = {'__name__': name} + for attr, value in check.items(): + attr_value = getattr(module, attr) + self.assertEqual(attr_value, value, + 'for {name}.{attr}, {given!r} != {expected!r}'.format( + name=name, attr=attr, given=attr_value, + expected=value)) + self.assertEqual(output, 'Hello world!\n') + self.assertEqual(module.__spec__.loader_state.origname, name) + + def test_lacking_parent(self): + name = '__phello__.spam' + with util.uncache('__phello__'): + module, output = self.exec_module(name) + check = {'__name__': name} + for attr, value in check.items(): + attr_value = getattr(module, attr) + self.assertEqual(attr_value, value, + 'for {name}.{attr}, {given} != {expected!r}'.format( + name=name, attr=attr, given=attr_value, + expected=value)) + self.assertEqual(output, 'Hello world!\n') + + def test_module_repr_indirect_through_spec(self): + name = '__hello__' + module, output = self.exec_module(name) + self.assertEqual(repr(module), + "") + + # No way to trigger an error in a frozen module. + test_state_after_failure = None + + def test_unloadable(self): + with import_helper.frozen_modules(): + assert self.machinery.FrozenImporter.find_spec('_not_real') is None + with self.assertRaises(ImportError) as cm: + self.exec_module('_not_real') + self.assertEqual(cm.exception.name, '_not_real') + + +(Frozen_ExecModuleTests, + Source_ExecModuleTests + ) = util.test_both(ExecModuleTests, machinery=machinery) + + +class InspectLoaderTests: + + """Tests for the InspectLoader methods for FrozenImporter.""" + + def test_get_code(self): + # Make sure that the code object is good. + name = '__hello__' + with import_helper.frozen_modules(): + code = self.machinery.FrozenImporter.get_code(name) + mod = types.ModuleType(name) + exec(code, mod.__dict__) + with captured_stdout() as stdout: + mod.main() + self.assertHasAttr(mod, 'initialized') + self.assertEqual(stdout.getvalue(), 'Hello world!\n') + + def test_get_source(self): + # Should always return None. + with import_helper.frozen_modules(): + result = self.machinery.FrozenImporter.get_source('__hello__') + self.assertIsNone(result) + + def test_is_package(self): + # Should be able to tell what is a package. + test_for = (('__hello__', False), ('__phello__', True), + ('__phello__.spam', False)) + for name, is_package in test_for: + with import_helper.frozen_modules(): + result = self.machinery.FrozenImporter.is_package(name) + self.assertEqual(bool(result), is_package) + + def test_failure(self): + # Raise ImportError for modules that are not frozen. + for meth_name in ('get_code', 'get_source', 'is_package'): + method = getattr(self.machinery.FrozenImporter, meth_name) + with self.assertRaises(ImportError) as cm: + with import_helper.frozen_modules(): + method('importlib') + self.assertEqual(cm.exception.name, 'importlib') + +(Frozen_ILTests, + Source_ILTests + ) = util.test_both(InspectLoaderTests, machinery=machinery) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/import_/__init__.py b/stdlib/test/test_importlib/import_/__init__.py new file mode 100644 index 000000000..4b16ecc31 --- /dev/null +++ b/stdlib/test/test_importlib/import_/__init__.py @@ -0,0 +1,5 @@ +import os +from test.support import load_package_tests + +def load_tests(*args): + return load_package_tests(os.path.dirname(__file__), *args) diff --git a/stdlib/test/test_importlib/import_/__main__.py b/stdlib/test/test_importlib/import_/__main__.py new file mode 100644 index 000000000..40a23a297 --- /dev/null +++ b/stdlib/test/test_importlib/import_/__main__.py @@ -0,0 +1,4 @@ +from . import load_tests +import unittest + +unittest.main() diff --git a/stdlib/test/test_importlib/import_/test___loader__.py b/stdlib/test/test_importlib/import_/test___loader__.py new file mode 100644 index 000000000..858b37eff --- /dev/null +++ b/stdlib/test/test_importlib/import_/test___loader__.py @@ -0,0 +1,34 @@ +from importlib import machinery +import unittest + +from test.test_importlib import util + + +class SpecLoaderMock: + + def find_spec(self, fullname, path=None, target=None): + return machinery.ModuleSpec(fullname, self) + + def create_module(self, spec): + return None + + def exec_module(self, module): + pass + + +class SpecLoaderAttributeTests: + + def test___loader__(self): + loader = SpecLoaderMock() + with util.uncache('blah'), util.import_state(meta_path=[loader]): + module = self.__import__('blah') + self.assertEqual(loader, module.__loader__) + + +(Frozen_SpecTests, + Source_SpecTests + ) = util.test_both(SpecLoaderAttributeTests, __import__=util.__import__) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/import_/test___package__.py b/stdlib/test/test_importlib/import_/test___package__.py new file mode 100644 index 000000000..7130c99a6 --- /dev/null +++ b/stdlib/test/test_importlib/import_/test___package__.py @@ -0,0 +1,152 @@ +"""PEP 366 ("Main module explicit relative imports") specifies the +semantics for the __package__ attribute on modules. This attribute is +used, when available, to detect which package a module belongs to (instead +of using the typical __path__/__name__ test). + +""" +import unittest +import warnings +from test.test_importlib import util + + +class Using__package__: + + """Use of __package__ supersedes the use of __name__/__path__ to calculate + what package a module belongs to. The basic algorithm is [__package__]:: + + def resolve_name(name, package, level): + level -= 1 + base = package.rsplit('.', level)[0] + return '{0}.{1}'.format(base, name) + + But since there is no guarantee that __package__ has been set (or not been + set to None [None]), there has to be a way to calculate the attribute's value + [__name__]:: + + def calc_package(caller_name, has___path__): + if has__path__: + return caller_name + else: + return caller_name.rsplit('.', 1)[0] + + Then the normal algorithm for relative name imports can proceed as if + __package__ had been set. + + """ + + def import_module(self, globals_): + with self.mock_modules('pkg.__init__', 'pkg.fake') as importer: + with util.import_state(meta_path=[importer]): + self.__import__('pkg.fake') + module = self.__import__('', + globals=globals_, + fromlist=['attr'], level=2) + return module + + def test_using___package__(self): + # [__package__] + module = self.import_module({'__package__': 'pkg.fake'}) + self.assertEqual(module.__name__, 'pkg') + + def test_using___name__(self): + # [__name__] + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + module = self.import_module({'__name__': 'pkg.fake', + '__path__': []}) + self.assertEqual(module.__name__, 'pkg') + + def test_warn_when_using___name__(self): + with self.assertWarns(ImportWarning): + self.import_module({'__name__': 'pkg.fake', '__path__': []}) + + def test_None_as___package__(self): + # [None] + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + module = self.import_module({ + '__name__': 'pkg.fake', '__path__': [], '__package__': None }) + self.assertEqual(module.__name__, 'pkg') + + def test_spec_fallback(self): + # If __package__ isn't defined, fall back on __spec__.parent. + module = self.import_module({'__spec__': FakeSpec('pkg.fake')}) + self.assertEqual(module.__name__, 'pkg') + + def test_warn_when_package_and_spec_disagree(self): + # Raise a DeprecationWarning if __package__ != __spec__.parent. + with self.assertWarns(DeprecationWarning): + self.import_module({'__package__': 'pkg.fake', + '__spec__': FakeSpec('pkg.fakefake')}) + + def test_bad__package__(self): + globals = {'__package__': ''} + with self.assertRaises(ModuleNotFoundError): + self.__import__('', globals, {}, ['relimport'], 1) + + def test_bunk__package__(self): + globals = {'__package__': 42} + with self.assertRaises(TypeError): + self.__import__('', globals, {}, ['relimport'], 1) + + +class FakeSpec: + def __init__(self, parent): + self.parent = parent + + +class Using__package__PEP451(Using__package__): + mock_modules = util.mock_spec + + +(Frozen_UsingPackagePEP451, + Source_UsingPackagePEP451 + ) = util.test_both(Using__package__PEP451, __import__=util.__import__) + + +class Setting__package__: + + """Because __package__ is a new feature, it is not always set by a loader. + Import will set it as needed to help with the transition to relying on + __package__. + + For a top-level module, __package__ is set to None [top-level]. For a + package __name__ is used for __package__ [package]. For submodules the + value is __name__.rsplit('.', 1)[0] [submodule]. + + """ + + __import__ = util.__import__['Source'] + + # [top-level] + def test_top_level(self): + with self.mock_modules('top_level') as mock: + with util.import_state(meta_path=[mock]): + del mock['top_level'].__package__ + module = self.__import__('top_level') + self.assertEqual(module.__package__, '') + + # [package] + def test_package(self): + with self.mock_modules('pkg.__init__') as mock: + with util.import_state(meta_path=[mock]): + del mock['pkg'].__package__ + module = self.__import__('pkg') + self.assertEqual(module.__package__, 'pkg') + + # [submodule] + def test_submodule(self): + with self.mock_modules('pkg.__init__', 'pkg.mod') as mock: + with util.import_state(meta_path=[mock]): + del mock['pkg.mod'].__package__ + pkg = self.__import__('pkg.mod') + module = getattr(pkg, 'mod') + self.assertEqual(module.__package__, 'pkg') + + +class Setting__package__PEP451(Setting__package__, unittest.TestCase): + mock_modules = util.mock_spec + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/import_/test_api.py b/stdlib/test/test_importlib/import_/test_api.py new file mode 100644 index 000000000..d6ad590b3 --- /dev/null +++ b/stdlib/test/test_importlib/import_/test_api.py @@ -0,0 +1,145 @@ +from test.test_importlib import util + +from importlib import machinery +import sys +import types +import unittest +import warnings + +PKG_NAME = 'fine' +SUBMOD_NAME = 'fine.bogus' + + +class BadSpecFinderLoader: + @classmethod + def find_spec(cls, fullname, path=None, target=None): + if fullname == SUBMOD_NAME: + spec = machinery.ModuleSpec(fullname, cls) + return spec + + @staticmethod + def create_module(spec): + return None + + @staticmethod + def exec_module(module): + if module.__name__ == SUBMOD_NAME: + raise ImportError('I cannot be loaded!') + + +class BadLoaderFinder: + @classmethod + def load_module(cls, fullname): + if fullname == SUBMOD_NAME: + raise ImportError('I cannot be loaded!') + + +class APITest: + + """Test API-specific details for __import__ (e.g. raising the right + exception when passing in an int for the module name).""" + + def test_raises_ModuleNotFoundError(self): + with self.assertRaises(ModuleNotFoundError): + util.import_importlib('some module that does not exist') + + def test_name_requires_rparition(self): + # Raise TypeError if a non-string is passed in for the module name. + with self.assertRaises(TypeError): + self.__import__(42) + + def test_negative_level(self): + # Raise ValueError when a negative level is specified. + # PEP 328 did away with sys.module None entries and the ambiguity of + # absolute/relative imports. + with self.assertRaises(ValueError): + self.__import__('os', globals(), level=-1) + + def test_nonexistent_fromlist_entry(self): + # If something in fromlist doesn't exist, that's okay. + # issue15715 + mod = types.ModuleType(PKG_NAME) + mod.__path__ = ['XXX'] + with util.import_state(meta_path=[self.bad_finder_loader]): + with util.uncache(PKG_NAME): + sys.modules[PKG_NAME] = mod + self.__import__(PKG_NAME, fromlist=['not here']) + + def test_fromlist_load_error_propagates(self): + # If something in fromlist triggers an exception not related to not + # existing, let that exception propagate. + # issue15316 + mod = types.ModuleType(PKG_NAME) + mod.__path__ = ['XXX'] + with util.import_state(meta_path=[self.bad_finder_loader]): + with util.uncache(PKG_NAME): + sys.modules[PKG_NAME] = mod + with self.assertRaises(ImportError): + self.__import__(PKG_NAME, + fromlist=[SUBMOD_NAME.rpartition('.')[-1]]) + + def test_blocked_fromlist(self): + # If fromlist entry is None, let a ModuleNotFoundError propagate. + # issue31642 + mod = types.ModuleType(PKG_NAME) + mod.__path__ = [] + with util.import_state(meta_path=[self.bad_finder_loader]): + with util.uncache(PKG_NAME, SUBMOD_NAME): + sys.modules[PKG_NAME] = mod + sys.modules[SUBMOD_NAME] = None + with self.assertRaises(ModuleNotFoundError) as cm: + self.__import__(PKG_NAME, + fromlist=[SUBMOD_NAME.rpartition('.')[-1]]) + self.assertEqual(cm.exception.name, SUBMOD_NAME) + + +class OldAPITests(APITest): + bad_finder_loader = BadLoaderFinder + + def test_raises_ModuleNotFoundError(self): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ImportWarning) + super().test_raises_ModuleNotFoundError() + + def test_name_requires_rparition(self): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ImportWarning) + super().test_name_requires_rparition() + + def test_negative_level(self): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ImportWarning) + super().test_negative_level() + + def test_nonexistent_fromlist_entry(self): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ImportWarning) + super().test_nonexistent_fromlist_entry() + + def test_fromlist_load_error_propagates(self): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ImportWarning) + super().test_fromlist_load_error_propagates + + def test_blocked_fromlist(self): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ImportWarning) + super().test_blocked_fromlist() + + +(Frozen_OldAPITests, + Source_OldAPITests + ) = util.test_both(OldAPITests, __import__=util.__import__) + + +class SpecAPITests(APITest): + bad_finder_loader = BadSpecFinderLoader + + +(Frozen_SpecAPITests, + Source_SpecAPITests + ) = util.test_both(SpecAPITests, __import__=util.__import__) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/import_/test_caching.py b/stdlib/test/test_importlib/import_/test_caching.py new file mode 100644 index 000000000..718e7d041 --- /dev/null +++ b/stdlib/test/test_importlib/import_/test_caching.py @@ -0,0 +1,97 @@ +"""Test that sys.modules is used properly by import.""" +from test.test_importlib import util +import sys +from types import MethodType +import unittest +import warnings + + +class UseCache: + + """When it comes to sys.modules, import prefers it over anything else. + + Once a name has been resolved, sys.modules is checked to see if it contains + the module desired. If so, then it is returned [use cache]. If it is not + found, then the proper steps are taken to perform the import, but + sys.modules is still used to return the imported module (e.g., not what a + loader returns) [from cache on return]. This also applies to imports of + things contained within a package and thus get assigned as an attribute + [from cache to attribute] or pulled in thanks to a fromlist import + [from cache for fromlist]. But if sys.modules contains None then + ImportError is raised [None in cache]. + + """ + + def test_using_cache(self): + # [use cache] + module_to_use = "some module found!" + with util.uncache('some_module'): + sys.modules['some_module'] = module_to_use + module = self.__import__('some_module') + self.assertEqual(id(module_to_use), id(module)) + + def test_None_in_cache(self): + #[None in cache] + name = 'using_None' + with util.uncache(name): + sys.modules[name] = None + with self.assertRaises(ImportError) as cm: + self.__import__(name) + self.assertEqual(cm.exception.name, name) + + +(Frozen_UseCache, + Source_UseCache + ) = util.test_both(UseCache, __import__=util.__import__) + + +class ImportlibUseCache(UseCache, unittest.TestCase): + + # Pertinent only to PEP 302; exec_module() doesn't return a module. + + __import__ = util.__import__['Source'] + + def create_mock(self, *names, return_=None): + mock = util.mock_spec(*names) + original_spec = mock.find_spec + def find_spec(self, fullname, path, target=None): + return original_spec(fullname) + mock.find_spec = MethodType(find_spec, mock) + return mock + + # __import__ inconsistent between loaders and built-in import when it comes + # to when to use the module in sys.modules and when not to. + def test_using_cache_after_loader(self): + # [from cache on return] + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ImportWarning) + with self.create_mock('module') as mock: + with util.import_state(meta_path=[mock]): + module = self.__import__('module') + self.assertEqual(id(module), id(sys.modules['module'])) + + # See test_using_cache_after_loader() for reasoning. + def test_using_cache_for_assigning_to_attribute(self): + # [from cache to attribute] + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ImportWarning) + with self.create_mock('pkg.__init__', 'pkg.module') as importer: + with util.import_state(meta_path=[importer]): + module = self.__import__('pkg.module') + self.assertHasAttr(module, 'module') + self.assertEqual(id(module.module), + id(sys.modules['pkg.module'])) + + # See test_using_cache_after_loader() for reasoning. + def test_using_cache_for_fromlist(self): + # [from cache for fromlist] + with self.create_mock('pkg.__init__', 'pkg.module') as importer: + with util.import_state(meta_path=[importer]): + module = self.__import__('pkg', fromlist=['module']) + self.assertHasAttr(module, 'module') + self.assertEqual(id(module.module), + id(sys.modules['pkg.module'])) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/import_/test_fromlist.py b/stdlib/test/test_importlib/import_/test_fromlist.py new file mode 100644 index 000000000..feccc7be0 --- /dev/null +++ b/stdlib/test/test_importlib/import_/test_fromlist.py @@ -0,0 +1,175 @@ +"""Test that the semantics relating to the 'fromlist' argument are correct.""" +from test.test_importlib import util +import warnings +import unittest + + +class ReturnValue: + + """The use of fromlist influences what import returns. + + If direct ``import ...`` statement is used, the root module or package is + returned [import return]. But if fromlist is set, then the specified module + is actually returned (whether it is a relative import or not) + [from return]. + + """ + + def test_return_from_import(self): + # [import return] + with util.mock_spec('pkg.__init__', 'pkg.module') as importer: + with util.import_state(meta_path=[importer]): + module = self.__import__('pkg.module') + self.assertEqual(module.__name__, 'pkg') + + def test_return_from_from_import(self): + # [from return] + with util.mock_spec('pkg.__init__', 'pkg.module')as importer: + with util.import_state(meta_path=[importer]): + module = self.__import__('pkg.module', fromlist=['attr']) + self.assertEqual(module.__name__, 'pkg.module') + + +(Frozen_ReturnValue, + Source_ReturnValue + ) = util.test_both(ReturnValue, __import__=util.__import__) + + +class HandlingFromlist: + + """Using fromlist triggers different actions based on what is being asked + of it. + + If fromlist specifies an object on a module, nothing special happens + [object case]. This is even true if the object does not exist [bad object]. + + If a package is being imported, then what is listed in fromlist may be + treated as a module to be imported [module]. And this extends to what is + contained in __all__ when '*' is imported [using *]. And '*' does not need + to be the only name in the fromlist [using * with others]. + + """ + + def test_object(self): + # [object case] + with util.mock_spec('module') as importer: + with util.import_state(meta_path=[importer]): + module = self.__import__('module', fromlist=['attr']) + self.assertEqual(module.__name__, 'module') + + def test_nonexistent_object(self): + # [bad object] + with util.mock_spec('module') as importer: + with util.import_state(meta_path=[importer]): + module = self.__import__('module', fromlist=['non_existent']) + self.assertEqual(module.__name__, 'module') + self.assertNotHasAttr(module, 'non_existent') + + def test_module_from_package(self): + # [module] + with util.mock_spec('pkg.__init__', 'pkg.module') as importer: + with util.import_state(meta_path=[importer]): + module = self.__import__('pkg', fromlist=['module']) + self.assertEqual(module.__name__, 'pkg') + self.assertHasAttr(module, 'module') + self.assertEqual(module.module.__name__, 'pkg.module') + + def test_nonexistent_from_package(self): + with util.mock_spec('pkg.__init__') as importer: + with util.import_state(meta_path=[importer]): + module = self.__import__('pkg', fromlist=['non_existent']) + self.assertEqual(module.__name__, 'pkg') + self.assertNotHasAttr(module, 'non_existent') + + def test_module_from_package_triggers_ModuleNotFoundError(self): + # If a submodule causes an ModuleNotFoundError because it tries + # to import a module which doesn't exist, that should let the + # ModuleNotFoundError propagate. + def module_code(): + import i_do_not_exist + with util.mock_spec('pkg.__init__', 'pkg.mod', + module_code={'pkg.mod': module_code}) as importer: + with util.import_state(meta_path=[importer]): + with self.assertRaises(ModuleNotFoundError) as exc: + self.__import__('pkg', fromlist=['mod']) + self.assertEqual('i_do_not_exist', exc.exception.name) + + def test_empty_string(self): + with util.mock_spec('pkg.__init__', 'pkg.mod') as importer: + with util.import_state(meta_path=[importer]): + module = self.__import__('pkg.mod', fromlist=['']) + self.assertEqual(module.__name__, 'pkg.mod') + + def basic_star_test(self, fromlist=['*']): + # [using *] + with util.mock_spec('pkg.__init__', 'pkg.module') as mock: + with util.import_state(meta_path=[mock]): + mock['pkg'].__all__ = ['module'] + module = self.__import__('pkg', fromlist=fromlist) + self.assertEqual(module.__name__, 'pkg') + self.assertHasAttr(module, 'module') + self.assertEqual(module.module.__name__, 'pkg.module') + + def test_using_star(self): + # [using *] + self.basic_star_test() + + def test_fromlist_as_tuple(self): + self.basic_star_test(('*',)) + + def test_star_with_others(self): + # [using * with others] + context = util.mock_spec('pkg.__init__', 'pkg.module1', 'pkg.module2') + with context as mock: + with util.import_state(meta_path=[mock]): + mock['pkg'].__all__ = ['module1'] + module = self.__import__('pkg', fromlist=['module2', '*']) + self.assertEqual(module.__name__, 'pkg') + self.assertHasAttr(module, 'module1') + self.assertHasAttr(module, 'module2') + self.assertEqual(module.module1.__name__, 'pkg.module1') + self.assertEqual(module.module2.__name__, 'pkg.module2') + + def test_nonexistent_in_all(self): + with util.mock_spec('pkg.__init__') as importer: + with util.import_state(meta_path=[importer]): + importer['pkg'].__all__ = ['non_existent'] + module = self.__import__('pkg', fromlist=['*']) + self.assertEqual(module.__name__, 'pkg') + self.assertNotHasAttr(module, 'non_existent') + + def test_star_in_all(self): + with util.mock_spec('pkg.__init__') as importer: + with util.import_state(meta_path=[importer]): + importer['pkg'].__all__ = ['*'] + module = self.__import__('pkg', fromlist=['*']) + self.assertEqual(module.__name__, 'pkg') + self.assertNotHasAttr(module, '*') + + def test_invalid_type(self): + with util.mock_spec('pkg.__init__') as importer: + with util.import_state(meta_path=[importer]), \ + warnings.catch_warnings(): + warnings.simplefilter('error', BytesWarning) + with self.assertRaisesRegex(TypeError, r'\bfrom\b'): + self.__import__('pkg', fromlist=[b'attr']) + with self.assertRaisesRegex(TypeError, r'\bfrom\b'): + self.__import__('pkg', fromlist=iter([b'attr'])) + + def test_invalid_type_in_all(self): + with util.mock_spec('pkg.__init__') as importer: + with util.import_state(meta_path=[importer]), \ + warnings.catch_warnings(): + warnings.simplefilter('error', BytesWarning) + importer['pkg'].__all__ = [b'attr'] + with self.assertRaisesRegex(TypeError, r'\bpkg\.__all__\b'): + self.__import__('pkg', fromlist=['*']) + + +(Frozen_FromList, + Source_FromList + ) = util.test_both(HandlingFromlist, __import__=util.__import__) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/import_/test_helpers.py b/stdlib/test/test_importlib/import_/test_helpers.py new file mode 100644 index 000000000..550f88d1d --- /dev/null +++ b/stdlib/test/test_importlib/import_/test_helpers.py @@ -0,0 +1,184 @@ +"""Tests for helper functions used by import.c .""" + +from importlib import _bootstrap_external, machinery +import os.path +from types import ModuleType, SimpleNamespace +import unittest +import warnings + +from .. import util + + +class FixUpModuleTests: + + def test_no_loader_but_spec(self): + loader = object() + name = "hello" + path = "hello.py" + spec = machinery.ModuleSpec(name, loader) + ns = {"__spec__": spec} + _bootstrap_external._fix_up_module(ns, name, path) + + expected = {"__spec__": spec, "__loader__": loader, "__file__": path, + "__cached__": None} + self.assertEqual(ns, expected) + + def test_no_loader_no_spec_but_sourceless(self): + name = "hello" + path = "hello.py" + ns = {} + _bootstrap_external._fix_up_module(ns, name, path, path) + + expected = {"__file__": path, "__cached__": path} + + for key, val in expected.items(): + with self.subTest(f"{key}: {val}"): + self.assertEqual(ns[key], val) + + spec = ns["__spec__"] + self.assertIsInstance(spec, machinery.ModuleSpec) + self.assertEqual(spec.name, name) + self.assertEqual(spec.origin, os.path.abspath(path)) + self.assertEqual(spec.cached, os.path.abspath(path)) + self.assertIsInstance(spec.loader, machinery.SourcelessFileLoader) + self.assertEqual(spec.loader.name, name) + self.assertEqual(spec.loader.path, path) + self.assertEqual(spec.loader, ns["__loader__"]) + + def test_no_loader_no_spec_but_source(self): + name = "hello" + path = "hello.py" + ns = {} + _bootstrap_external._fix_up_module(ns, name, path) + + expected = {"__file__": path, "__cached__": None} + + for key, val in expected.items(): + with self.subTest(f"{key}: {val}"): + self.assertEqual(ns[key], val) + + spec = ns["__spec__"] + self.assertIsInstance(spec, machinery.ModuleSpec) + self.assertEqual(spec.name, name) + self.assertEqual(spec.origin, os.path.abspath(path)) + self.assertIsInstance(spec.loader, machinery.SourceFileLoader) + self.assertEqual(spec.loader.name, name) + self.assertEqual(spec.loader.path, path) + self.assertEqual(spec.loader, ns["__loader__"]) + + +FrozenFixUpModuleTests, SourceFixUpModuleTests = util.test_both(FixUpModuleTests) + + +class TestBlessMyLoader(unittest.TestCase): + # GH#86298 is part of the migration away from module attributes and toward + # __spec__ attributes. There are several cases to test here. This will + # have to change in Python 3.14 when we actually remove/ignore __loader__ + # in favor of requiring __spec__.loader. + + def test_gh86298_no_loader_and_no_spec(self): + bar = ModuleType('bar') + del bar.__loader__ + del bar.__spec__ + # 2022-10-06(warsaw): For backward compatibility with the + # implementation in _warnings.c, this can't raise an + # AttributeError. See _bless_my_loader() in _bootstrap_external.py + # If working with a module: + ## self.assertRaises( + ## AttributeError, _bootstrap_external._bless_my_loader, + ## bar.__dict__) + self.assertIsNone(_bootstrap_external._bless_my_loader(bar.__dict__)) + + def test_gh86298_loader_is_none_and_no_spec(self): + bar = ModuleType('bar') + bar.__loader__ = None + del bar.__spec__ + # 2022-10-06(warsaw): For backward compatibility with the + # implementation in _warnings.c, this can't raise an + # AttributeError. See _bless_my_loader() in _bootstrap_external.py + # If working with a module: + ## self.assertRaises( + ## AttributeError, _bootstrap_external._bless_my_loader, + ## bar.__dict__) + self.assertIsNone(_bootstrap_external._bless_my_loader(bar.__dict__)) + + def test_gh86298_no_loader_and_spec_is_none(self): + bar = ModuleType('bar') + del bar.__loader__ + bar.__spec__ = None + self.assertRaises( + ValueError, + _bootstrap_external._bless_my_loader, bar.__dict__) + + def test_gh86298_loader_is_none_and_spec_is_none(self): + bar = ModuleType('bar') + bar.__loader__ = None + bar.__spec__ = None + self.assertRaises( + ValueError, + _bootstrap_external._bless_my_loader, bar.__dict__) + + def test_gh86298_loader_is_none_and_spec_loader_is_none(self): + bar = ModuleType('bar') + bar.__loader__ = None + bar.__spec__ = SimpleNamespace(loader=None) + self.assertRaises( + ValueError, + _bootstrap_external._bless_my_loader, bar.__dict__) + + def test_gh86298_no_spec(self): + bar = ModuleType('bar') + bar.__loader__ = object() + del bar.__spec__ + with warnings.catch_warnings(): + self.assertWarns( + DeprecationWarning, + _bootstrap_external._bless_my_loader, bar.__dict__) + + def test_gh86298_spec_is_none(self): + bar = ModuleType('bar') + bar.__loader__ = object() + bar.__spec__ = None + with warnings.catch_warnings(): + self.assertWarns( + DeprecationWarning, + _bootstrap_external._bless_my_loader, bar.__dict__) + + def test_gh86298_no_spec_loader(self): + bar = ModuleType('bar') + bar.__loader__ = object() + bar.__spec__ = SimpleNamespace() + with warnings.catch_warnings(): + self.assertWarns( + DeprecationWarning, + _bootstrap_external._bless_my_loader, bar.__dict__) + + def test_gh86298_loader_and_spec_loader_disagree(self): + bar = ModuleType('bar') + bar.__loader__ = object() + bar.__spec__ = SimpleNamespace(loader=object()) + with warnings.catch_warnings(): + self.assertWarns( + DeprecationWarning, + _bootstrap_external._bless_my_loader, bar.__dict__) + + def test_gh86298_no_loader_and_no_spec_loader(self): + bar = ModuleType('bar') + del bar.__loader__ + bar.__spec__ = SimpleNamespace() + self.assertRaises( + AttributeError, + _bootstrap_external._bless_my_loader, bar.__dict__) + + def test_gh86298_no_loader_with_spec_loader_okay(self): + bar = ModuleType('bar') + del bar.__loader__ + loader = object() + bar.__spec__ = SimpleNamespace(loader=loader) + self.assertEqual( + _bootstrap_external._bless_my_loader(bar.__dict__), + loader) + + +if __name__ == "__main__": + unittest.main() diff --git a/stdlib/test/test_importlib/import_/test_meta_path.py b/stdlib/test/test_importlib/import_/test_meta_path.py new file mode 100644 index 000000000..4c00f6068 --- /dev/null +++ b/stdlib/test/test_importlib/import_/test_meta_path.py @@ -0,0 +1,127 @@ +from test.test_importlib import util +import importlib._bootstrap +import sys +from types import MethodType +import unittest +import warnings + + +class CallingOrder: + + """Calls to the importers on sys.meta_path happen in order that they are + specified in the sequence, starting with the first importer + [first called], and then continuing on down until one is found that doesn't + return None [continuing].""" + + + def test_first_called(self): + # [first called] + mod = 'top_level' + with util.mock_spec(mod) as first, util.mock_spec(mod) as second: + with util.import_state(meta_path=[first, second]): + self.assertIs(self.__import__(mod), first.modules[mod]) + + def test_continuing(self): + # [continuing] + mod_name = 'for_real' + with util.mock_spec('nonexistent') as first, \ + util.mock_spec(mod_name) as second: + first.find_spec = lambda self, fullname, path=None, parent=None: None + with util.import_state(meta_path=[first, second]): + self.assertIs(self.__import__(mod_name), second.modules[mod_name]) + + def test_empty(self): + # Raise an ImportWarning if sys.meta_path is empty. + module_name = 'nothing' + try: + del sys.modules[module_name] + except KeyError: + pass + with util.import_state(meta_path=[]): + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + self.assertIsNone(importlib._bootstrap._find_spec('nothing', + None)) + self.assertEqual(len(w), 1) + self.assertIsSubclass(w[-1].category, ImportWarning) + + +(Frozen_CallingOrder, + Source_CallingOrder + ) = util.test_both(CallingOrder, __import__=util.__import__) + + +class CallSignature: + + """If there is no __path__ entry on the parent module, then 'path' is None + [no path]. Otherwise, the value for __path__ is passed in for the 'path' + argument [path set].""" + + def log_finder(self, importer): + fxn = getattr(importer, self.finder_name) + log = [] + def wrapper(self, *args, **kwargs): + log.append([args, kwargs]) + return fxn(*args, **kwargs) + return log, wrapper + + def test_no_path(self): + # [no path] + mod_name = 'top_level' + assert '.' not in mod_name + with self.mock_modules(mod_name) as importer: + log, wrapped_call = self.log_finder(importer) + setattr(importer, self.finder_name, MethodType(wrapped_call, importer)) + with util.import_state(meta_path=[importer]): + self.__import__(mod_name) + assert len(log) == 1 + args = log[0][0] + # Assuming all arguments are positional. + self.assertEqual(args[0], mod_name) + self.assertIsNone(args[1]) + + def test_with_path(self): + # [path set] + pkg_name = 'pkg' + mod_name = pkg_name + '.module' + path = [42] + assert '.' in mod_name + with self.mock_modules(pkg_name+'.__init__', mod_name) as importer: + importer.modules[pkg_name].__path__ = path + log, wrapped_call = self.log_finder(importer) + setattr(importer, self.finder_name, MethodType(wrapped_call, importer)) + with util.import_state(meta_path=[importer]): + self.__import__(mod_name) + assert len(log) == 2 + args = log[1][0] + kwargs = log[1][1] + # Assuming all arguments are positional. + self.assertFalse(kwargs) + self.assertEqual(args[0], mod_name) + self.assertIs(args[1], path) + +class CallSignoreSuppressImportWarning(CallSignature): + + def test_no_path(self): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ImportWarning) + super().test_no_path() + + def test_with_path(self): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ImportWarning) + super().test_no_path() + + +class CallSignaturePEP451(CallSignature): + mock_modules = util.mock_spec + finder_name = 'find_spec' + + +(Frozen_CallSignaturePEP451, + Source_CallSignaturePEP451 + ) = util.test_both(CallSignaturePEP451, __import__=util.__import__) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/import_/test_packages.py b/stdlib/test/test_importlib/import_/test_packages.py new file mode 100644 index 000000000..0c29d6083 --- /dev/null +++ b/stdlib/test/test_importlib/import_/test_packages.py @@ -0,0 +1,110 @@ +from test.test_importlib import util +import sys +import unittest +from test.support import import_helper + + +class ParentModuleTests: + + """Importing a submodule should import the parent modules.""" + + def test_import_parent(self): + with util.mock_spec('pkg.__init__', 'pkg.module') as mock: + with util.import_state(meta_path=[mock]): + module = self.__import__('pkg.module') + self.assertIn('pkg', sys.modules) + + def test_bad_parent(self): + with util.mock_spec('pkg.module') as mock: + with util.import_state(meta_path=[mock]): + with self.assertRaises(ImportError) as cm: + self.__import__('pkg.module') + self.assertEqual(cm.exception.name, 'pkg') + + def test_raising_parent_after_importing_child(self): + def __init__(): + import pkg.module + 1/0 + mock = util.mock_spec('pkg.__init__', 'pkg.module', + module_code={'pkg': __init__}) + with mock: + with util.import_state(meta_path=[mock]): + with self.assertRaises(ZeroDivisionError): + self.__import__('pkg') + self.assertNotIn('pkg', sys.modules) + self.assertIn('pkg.module', sys.modules) + with self.assertRaises(ZeroDivisionError): + self.__import__('pkg.module') + self.assertNotIn('pkg', sys.modules) + self.assertIn('pkg.module', sys.modules) + + def test_raising_parent_after_relative_importing_child(self): + def __init__(): + from . import module + 1/0 + mock = util.mock_spec('pkg.__init__', 'pkg.module', + module_code={'pkg': __init__}) + with mock: + with util.import_state(meta_path=[mock]): + with self.assertRaises((ZeroDivisionError, ImportError)): + # This raises ImportError on the "from . import module" + # line, not sure why. + self.__import__('pkg') + self.assertNotIn('pkg', sys.modules) + with self.assertRaises((ZeroDivisionError, ImportError)): + self.__import__('pkg.module') + self.assertNotIn('pkg', sys.modules) + # XXX False + #self.assertIn('pkg.module', sys.modules) + + def test_raising_parent_after_double_relative_importing_child(self): + def __init__(): + from ..subpkg import module + 1/0 + mock = util.mock_spec('pkg.__init__', 'pkg.subpkg.__init__', + 'pkg.subpkg.module', + module_code={'pkg.subpkg': __init__}) + with mock: + with util.import_state(meta_path=[mock]): + with self.assertRaises((ZeroDivisionError, ImportError)): + # This raises ImportError on the "from ..subpkg import module" + # line, not sure why. + self.__import__('pkg.subpkg') + self.assertNotIn('pkg.subpkg', sys.modules) + with self.assertRaises((ZeroDivisionError, ImportError)): + self.__import__('pkg.subpkg.module') + self.assertNotIn('pkg.subpkg', sys.modules) + # XXX False + #self.assertIn('pkg.subpkg.module', sys.modules) + + def test_module_not_package(self): + # Try to import a submodule from a non-package should raise ImportError. + assert not hasattr(sys, '__path__') + with self.assertRaises(ImportError) as cm: + self.__import__('sys.no_submodules_here') + self.assertEqual(cm.exception.name, 'sys.no_submodules_here') + + def test_module_not_package_but_side_effects(self): + # If a module injects something into sys.modules as a side-effect, then + # pick up on that fact. + name = 'mod' + subname = name + '.b' + def module_injection(): + sys.modules[subname] = 'total bunk' + mock_spec = util.mock_spec('mod', + module_code={'mod': module_injection}) + with mock_spec as mock: + with util.import_state(meta_path=[mock]): + try: + submodule = self.__import__(subname) + finally: + import_helper.unload(subname) + + +(Frozen_ParentTests, + Source_ParentTests + ) = util.test_both(ParentModuleTests, __import__=util.__import__) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/import_/test_path.py b/stdlib/test/test_importlib/import_/test_path.py new file mode 100644 index 000000000..79e0bdca9 --- /dev/null +++ b/stdlib/test/test_importlib/import_/test_path.py @@ -0,0 +1,269 @@ +from test.support import os_helper +from test.test_importlib import util + +importlib = util.import_importlib('importlib') +machinery = util.import_importlib('importlib.machinery') + +import os +import sys +import tempfile +from types import ModuleType +import unittest +import warnings +import zipimport + + +class FinderTests: + + """Tests for PathFinder.""" + + find = None + check_found = None + + def test_failure(self): + # Test None returned upon not finding a suitable loader. + module = '' + with util.import_state(): + self.assertIsNone(self.find(module)) + + def test_sys_path(self): + # Test that sys.path is used when 'path' is None. + # Implicitly tests that sys.path_importer_cache is used. + module = '' + path = '' + importer = util.mock_spec(module) + with util.import_state(path_importer_cache={path: importer}, + path=[path]): + found = self.find(module) + self.check_found(found, importer) + + def test_path(self): + # Test that 'path' is used when set. + # Implicitly tests that sys.path_importer_cache is used. + module = '' + path = '' + importer = util.mock_spec(module) + with util.import_state(path_importer_cache={path: importer}): + found = self.find(module, [path]) + self.check_found(found, importer) + + def test_empty_list(self): + # An empty list should not count as asking for sys.path. + module = 'module' + path = '' + importer = util.mock_spec(module) + with util.import_state(path_importer_cache={path: importer}, + path=[path]): + self.assertIsNone(self.find('module', [])) + + def test_path_hooks(self): + # Test that sys.path_hooks is used. + # Test that sys.path_importer_cache is set. + module = '' + path = '' + importer = util.mock_spec(module) + hook = util.mock_path_hook(path, importer=importer) + with util.import_state(path_hooks=[hook]): + found = self.find(module, [path]) + self.check_found(found, importer) + self.assertIn(path, sys.path_importer_cache) + self.assertIs(sys.path_importer_cache[path], importer) + + def test_empty_path_hooks(self): + # Test that if sys.path_hooks is empty a warning is raised, + # sys.path_importer_cache gets None set, and PathFinder returns None. + path_entry = 'bogus_path' + with util.import_state(path_importer_cache={}, path_hooks=[], + path=[path_entry]): + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always', ImportWarning) + warnings.simplefilter('ignore', DeprecationWarning) + self.assertIsNone(self.find('os')) + self.assertIsNone(sys.path_importer_cache[path_entry]) + self.assertEqual(len(w), 1) + self.assertIsSubclass(w[-1].category, ImportWarning) + + def test_path_importer_cache_empty_string(self): + # The empty string should create a finder using the cwd. + path = '' + module = '' + importer = util.mock_spec(module) + hook = util.mock_path_hook(os.getcwd(), importer=importer) + with util.import_state(path=[path], path_hooks=[hook]): + found = self.find(module) + self.check_found(found, importer) + self.assertIn(os.getcwd(), sys.path_importer_cache) + + def test_None_on_sys_path(self): + # Putting None in sys.path[0] caused an import regression from Python + # 3.2: http://bugs.python.org/issue16514 + new_path = sys.path[:] + new_path.insert(0, None) + new_path_importer_cache = sys.path_importer_cache.copy() + new_path_importer_cache.pop(None, None) + new_path_hooks = [zipimport.zipimporter, + self.machinery.FileFinder.path_hook( + *self.importlib._bootstrap_external._get_supported_file_loaders())] + missing = object() + email = sys.modules.pop('email', missing) + try: + with util.import_state(meta_path=sys.meta_path[:], + path=new_path, + path_importer_cache=new_path_importer_cache, + path_hooks=new_path_hooks): + module = self.importlib.import_module('email') + self.assertIsInstance(module, ModuleType) + finally: + if email is not missing: + sys.modules['email'] = email + + def test_finder_with_find_spec(self): + class TestFinder: + spec = None + def find_spec(self, fullname, target=None): + return self.spec + path = 'testing path' + with util.import_state(path_importer_cache={path: TestFinder()}): + self.assertIsNone( + self.machinery.PathFinder.find_spec('whatever', [path])) + success_finder = TestFinder() + success_finder.spec = self.machinery.ModuleSpec('whatever', __loader__) + with util.import_state(path_importer_cache={path: success_finder}): + got = self.machinery.PathFinder.find_spec('whatever', [path]) + self.assertEqual(got, success_finder.spec) + + def test_deleted_cwd(self): + # Issue #22834 + old_dir = os.getcwd() + self.addCleanup(os.chdir, old_dir) + new_dir = tempfile.mkdtemp() + try: + os.chdir(new_dir) + try: + os.rmdir(new_dir) + except OSError: + # EINVAL on Solaris, EBUSY on AIX, ENOTEMPTY on Windows + self.skipTest("platform does not allow " + "the deletion of the cwd") + except: + os.chdir(old_dir) + os.rmdir(new_dir) + raise + + with util.import_state(path=['']): + # Do not want FileNotFoundError raised. + self.assertIsNone(self.machinery.PathFinder.find_spec('whatever')) + + @os_helper.skip_unless_working_chmod + def test_permission_error_cwd(self): + # gh-115911: Test that an unreadable CWD does not break imports, in + # particular during early stages of interpreter startup. + + def noop_hook(*args): + raise ImportError + + with ( + os_helper.temp_dir() as new_dir, + os_helper.save_mode(new_dir), + os_helper.change_cwd(new_dir), + util.import_state(path=[''], path_hooks=[noop_hook]), + ): + # chmod() is done here (inside the 'with' block) because the order + # of teardown operations cannot be the reverse of setup order. See + # https://github.com/python/cpython/pull/116131#discussion_r1739649390 + try: + os.chmod(new_dir, 0o000) + except OSError: + self.skipTest("platform does not allow " + "changing mode of the cwd") + + # Do not want PermissionError raised. + self.assertIsNone(self.machinery.PathFinder.find_spec('whatever')) + + def test_invalidate_caches_finders(self): + # Finders with an invalidate_caches() method have it called. + class FakeFinder: + def __init__(self): + self.called = False + + def invalidate_caches(self): + self.called = True + + key = os.path.abspath('finder_to_invalidate') + cache = {'leave_alone': object(), key: FakeFinder()} + with util.import_state(path_importer_cache=cache): + self.machinery.PathFinder.invalidate_caches() + self.assertTrue(cache[key].called) + + def test_invalidate_caches_clear_out_None(self): + # Clear out None in sys.path_importer_cache() when invalidating caches. + cache = {'clear_out': None} + with util.import_state(path_importer_cache=cache): + self.machinery.PathFinder.invalidate_caches() + self.assertEqual(len(cache), 0) + + def test_invalidate_caches_clear_out_relative_path(self): + class FakeFinder: + def invalidate_caches(self): + pass + + cache = {'relative_path': FakeFinder()} + with util.import_state(path_importer_cache=cache): + self.machinery.PathFinder.invalidate_caches() + self.assertEqual(cache, {}) + + +class FindModuleTests(FinderTests): + def find(self, *args, **kwargs): + spec = self.machinery.PathFinder.find_spec(*args, **kwargs) + return None if spec is None else spec.loader + + def check_found(self, found, importer): + self.assertIs(found, importer) + + +(Frozen_FindModuleTests, + Source_FindModuleTests +) = util.test_both(FindModuleTests, importlib=importlib, machinery=machinery) + + +class FindSpecTests(FinderTests): + def find(self, *args, **kwargs): + return self.machinery.PathFinder.find_spec(*args, **kwargs) + def check_found(self, found, importer): + self.assertIs(found.loader, importer) + + +(Frozen_FindSpecTests, + Source_FindSpecTests + ) = util.test_both(FindSpecTests, importlib=importlib, machinery=machinery) + + +class PathEntryFinderTests: + + def test_finder_with_failing_find_spec(self): + class Finder: + path_location = 'test_finder_with_find_spec' + def __init__(self, path): + if path != self.path_location: + raise ImportError + + @staticmethod + def find_spec(fullname, target=None): + return None + + + with util.import_state(path=[Finder.path_location]+sys.path[:], + path_hooks=[Finder]): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ImportWarning) + self.machinery.PathFinder.find_spec('importlib') + + +(Frozen_PEFTests, + Source_PEFTests + ) = util.test_both(PathEntryFinderTests, machinery=machinery) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/import_/test_relative_imports.py b/stdlib/test/test_importlib/import_/test_relative_imports.py new file mode 100644 index 000000000..1549cbe96 --- /dev/null +++ b/stdlib/test/test_importlib/import_/test_relative_imports.py @@ -0,0 +1,248 @@ +"""Test relative imports (PEP 328).""" +from test.test_importlib import util +import unittest +import warnings + + +class RelativeImports: + + """PEP 328 introduced relative imports. This allows for imports to occur + from within a package without having to specify the actual package name. + + A simple example is to import another module within the same package + [module from module]:: + + # From pkg.mod1 with pkg.mod2 being a module. + from . import mod2 + + This also works for getting an attribute from a module that is specified + in a relative fashion [attr from module]:: + + # From pkg.mod1. + from .mod2 import attr + + But this is in no way restricted to working between modules; it works + from [package to module],:: + + # From pkg, importing pkg.module which is a module. + from . import module + + [module to package],:: + + # Pull attr from pkg, called from pkg.module which is a module. + from . import attr + + and [package to package]:: + + # From pkg.subpkg1 (both pkg.subpkg[1,2] are packages). + from .. import subpkg2 + + The number of dots used is in no way restricted [deep import]:: + + # Import pkg.attr from pkg.pkg1.pkg2.pkg3.pkg4.pkg5. + from ...... import attr + + To prevent someone from accessing code that is outside of a package, one + cannot reach the location containing the root package itself:: + + # From pkg.__init__ [too high from package] + from .. import top_level + + # From pkg.module [too high from module] + from .. import top_level + + Relative imports are the only type of import that allow for an empty + module name for an import [empty name]. + + """ + + def relative_import_test(self, create, globals_, callback): + """Abstract out boilerplace for setting up for an import test.""" + uncache_names = [] + for name in create: + if not name.endswith('.__init__'): + uncache_names.append(name) + else: + uncache_names.append(name[:-len('.__init__')]) + with util.mock_spec(*create) as importer: + with util.import_state(meta_path=[importer]): + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + for global_ in globals_: + with util.uncache(*uncache_names): + callback(global_) + + + def test_module_from_module(self): + # [module from module] + create = 'pkg.__init__', 'pkg.mod2' + globals_ = {'__package__': 'pkg'}, {'__name__': 'pkg.mod1'} + def callback(global_): + self.__import__('pkg') # For __import__(). + module = self.__import__('', global_, fromlist=['mod2'], level=1) + self.assertEqual(module.__name__, 'pkg') + self.assertHasAttr(module, 'mod2') + self.assertEqual(module.mod2.attr, 'pkg.mod2') + self.relative_import_test(create, globals_, callback) + + def test_attr_from_module(self): + # [attr from module] + create = 'pkg.__init__', 'pkg.mod2' + globals_ = {'__package__': 'pkg'}, {'__name__': 'pkg.mod1'} + def callback(global_): + self.__import__('pkg') # For __import__(). + module = self.__import__('mod2', global_, fromlist=['attr'], + level=1) + self.assertEqual(module.__name__, 'pkg.mod2') + self.assertEqual(module.attr, 'pkg.mod2') + self.relative_import_test(create, globals_, callback) + + def test_package_to_module(self): + # [package to module] + create = 'pkg.__init__', 'pkg.module' + globals_ = ({'__package__': 'pkg'}, + {'__name__': 'pkg', '__path__': ['blah']}) + def callback(global_): + self.__import__('pkg') # For __import__(). + module = self.__import__('', global_, fromlist=['module'], + level=1) + self.assertEqual(module.__name__, 'pkg') + self.assertHasAttr(module, 'module') + self.assertEqual(module.module.attr, 'pkg.module') + self.relative_import_test(create, globals_, callback) + + def test_module_to_package(self): + # [module to package] + create = 'pkg.__init__', 'pkg.module' + globals_ = {'__package__': 'pkg'}, {'__name__': 'pkg.module'} + def callback(global_): + self.__import__('pkg') # For __import__(). + module = self.__import__('', global_, fromlist=['attr'], level=1) + self.assertEqual(module.__name__, 'pkg') + self.relative_import_test(create, globals_, callback) + + def test_package_to_package(self): + # [package to package] + create = ('pkg.__init__', 'pkg.subpkg1.__init__', + 'pkg.subpkg2.__init__') + globals_ = ({'__package__': 'pkg.subpkg1'}, + {'__name__': 'pkg.subpkg1', '__path__': ['blah']}) + def callback(global_): + module = self.__import__('', global_, fromlist=['subpkg2'], + level=2) + self.assertEqual(module.__name__, 'pkg') + self.assertHasAttr(module, 'subpkg2') + self.assertEqual(module.subpkg2.attr, 'pkg.subpkg2.__init__') + self.relative_import_test(create, globals_, callback) + + def test_deep_import(self): + # [deep import] + create = ['pkg.__init__'] + for count in range(1,6): + create.append('{0}.pkg{1}.__init__'.format( + create[-1][:-len('.__init__')], count)) + globals_ = ({'__package__': 'pkg.pkg1.pkg2.pkg3.pkg4.pkg5'}, + {'__name__': 'pkg.pkg1.pkg2.pkg3.pkg4.pkg5', + '__path__': ['blah']}) + def callback(global_): + self.__import__(globals_[0]['__package__']) + module = self.__import__('', global_, fromlist=['attr'], level=6) + self.assertEqual(module.__name__, 'pkg') + self.relative_import_test(create, globals_, callback) + + def test_too_high_from_package(self): + # [too high from package] + create = ['top_level', 'pkg.__init__'] + globals_ = ({'__package__': 'pkg'}, + {'__name__': 'pkg', '__path__': ['blah']}) + def callback(global_): + self.__import__('pkg') + with self.assertRaises(ImportError): + self.__import__('', global_, fromlist=['top_level'], + level=2) + self.relative_import_test(create, globals_, callback) + + def test_too_high_from_module(self): + # [too high from module] + create = ['top_level', 'pkg.__init__', 'pkg.module'] + globals_ = {'__package__': 'pkg'}, {'__name__': 'pkg.module'} + def callback(global_): + self.__import__('pkg') + with self.assertRaises(ImportError): + self.__import__('', global_, fromlist=['top_level'], + level=2) + self.relative_import_test(create, globals_, callback) + + def test_empty_name_w_level_0(self): + # [empty name] + with self.assertRaises(ValueError): + self.__import__('') + + def test_import_from_different_package(self): + # Test importing from a different package than the caller. + # in pkg.subpkg1.mod + # from ..subpkg2 import mod + create = ['__runpy_pkg__.__init__', + '__runpy_pkg__.__runpy_pkg__.__init__', + '__runpy_pkg__.uncle.__init__', + '__runpy_pkg__.uncle.cousin.__init__', + '__runpy_pkg__.uncle.cousin.nephew'] + globals_ = {'__package__': '__runpy_pkg__.__runpy_pkg__'} + def callback(global_): + self.__import__('__runpy_pkg__.__runpy_pkg__') + module = self.__import__('uncle.cousin', globals_, {}, + fromlist=['nephew'], + level=2) + self.assertEqual(module.__name__, '__runpy_pkg__.uncle.cousin') + self.relative_import_test(create, globals_, callback) + + def test_import_relative_import_no_fromlist(self): + # Import a relative module w/ no fromlist. + create = ['crash.__init__', 'crash.mod'] + globals_ = [{'__package__': 'crash', '__name__': 'crash'}] + def callback(global_): + self.__import__('crash') + mod = self.__import__('mod', global_, {}, [], 1) + self.assertEqual(mod.__name__, 'crash.mod') + self.relative_import_test(create, globals_, callback) + + def test_relative_import_no_globals(self): + # No globals for a relative import is an error. + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + with self.assertRaises(KeyError): + self.__import__('sys', level=1) + + def test_relative_import_no_package(self): + with self.assertRaises(ImportError): + self.__import__('a', {'__package__': '', '__spec__': None}, + level=1) + + def test_relative_import_no_package_exists_absolute(self): + with self.assertRaises(ImportError): + self.__import__('sys', {'__package__': '', '__spec__': None}, + level=1) + + def test_malicious_relative_import(self): + # https://github.com/python/cpython/issues/134100 + # Test to make sure UAF bug with error msg doesn't come back to life + import sys + loooong = "".ljust(0x23000, "b") + name = f"a.{loooong}.c" + + with util.uncache(name): + sys.modules[name] = {} + with self.assertRaisesRegex( + KeyError, + r"'a\.b+' not in sys\.modules as expected" + ): + __import__(f"{loooong}.c", {"__package__": "a"}, level=1) + + +(Frozen_RelativeImports, + Source_RelativeImports + ) = util.test_both(RelativeImports, __import__=util.__import__) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/metadata/__init__.py b/stdlib/test/test_importlib/metadata/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/stdlib/test/test_importlib/metadata/_context.py b/stdlib/test/test_importlib/metadata/_context.py new file mode 100644 index 000000000..8a53eb55d --- /dev/null +++ b/stdlib/test/test_importlib/metadata/_context.py @@ -0,0 +1,13 @@ +import contextlib + + +# from jaraco.context 4.3 +class suppress(contextlib.suppress, contextlib.ContextDecorator): + """ + A version of contextlib.suppress with decorator support. + + >>> @suppress(KeyError) + ... def key_error(): + ... {}[''] + >>> key_error() + """ diff --git a/stdlib/test/test_importlib/metadata/_path.py b/stdlib/test/test_importlib/metadata/_path.py new file mode 100644 index 000000000..b3cfb9cd5 --- /dev/null +++ b/stdlib/test/test_importlib/metadata/_path.py @@ -0,0 +1,115 @@ +# from jaraco.path 3.7 + +import functools +import pathlib +from typing import Dict, Protocol, Union +from typing import runtime_checkable + + +class Symlink(str): + """ + A string indicating the target of a symlink. + """ + + +FilesSpec = Dict[str, Union[str, bytes, Symlink, 'FilesSpec']] # type: ignore + + +@runtime_checkable +class TreeMaker(Protocol): + def __truediv__(self, *args, **kwargs): ... # pragma: no cover + + def mkdir(self, **kwargs): ... # pragma: no cover + + def write_text(self, content, **kwargs): ... # pragma: no cover + + def write_bytes(self, content): ... # pragma: no cover + + def symlink_to(self, target): ... # pragma: no cover + + +def _ensure_tree_maker(obj: Union[str, TreeMaker]) -> TreeMaker: + return obj if isinstance(obj, TreeMaker) else pathlib.Path(obj) # type: ignore + + +def build( + spec: FilesSpec, + prefix: Union[str, TreeMaker] = pathlib.Path(), # type: ignore +): + """ + Build a set of files/directories, as described by the spec. + + Each key represents a pathname, and the value represents + the content. Content may be a nested directory. + + >>> spec = { + ... 'README.txt': "A README file", + ... "foo": { + ... "__init__.py": "", + ... "bar": { + ... "__init__.py": "", + ... }, + ... "baz.py": "# Some code", + ... "bar.py": Symlink("baz.py"), + ... }, + ... "bing": Symlink("foo"), + ... } + >>> target = getfixture('tmp_path') + >>> build(spec, target) + >>> target.joinpath('foo/baz.py').read_text(encoding='utf-8') + '# Some code' + >>> target.joinpath('bing/bar.py').read_text(encoding='utf-8') + '# Some code' + """ + for name, contents in spec.items(): + create(contents, _ensure_tree_maker(prefix) / name) + + +@functools.singledispatch +def create(content: Union[str, bytes, FilesSpec], path): + path.mkdir(exist_ok=True) + build(content, prefix=path) # type: ignore + + +@create.register +def _(content: bytes, path): + path.write_bytes(content) + + +@create.register +def _(content: str, path): + path.write_text(content, encoding='utf-8') + + +@create.register +def _(content: Symlink, path): + path.symlink_to(content) + + +class Recording: + """ + A TreeMaker object that records everything that would be written. + + >>> r = Recording() + >>> build({'foo': {'foo1.txt': 'yes'}, 'bar.txt': 'abc'}, r) + >>> r.record + ['foo/foo1.txt', 'bar.txt'] + """ + + def __init__(self, loc=pathlib.PurePosixPath(), record=None): + self.loc = loc + self.record = record if record is not None else [] + + def __truediv__(self, other): + return Recording(self.loc / other, self.record) + + def write_text(self, content, **kwargs): + self.record.append(str(self.loc)) + + write_bytes = write_text + + def mkdir(self, **kwargs): + return + + def symlink_to(self, target): + pass diff --git a/stdlib/test/test_importlib/metadata/data/__init__.py b/stdlib/test/test_importlib/metadata/data/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/stdlib/test/test_importlib/metadata/data/example-21.12-py3-none-any.whl b/stdlib/test/test_importlib/metadata/data/example-21.12-py3-none-any.whl new file mode 100644 index 000000000..641ab07f7 Binary files /dev/null and b/stdlib/test/test_importlib/metadata/data/example-21.12-py3-none-any.whl differ diff --git a/stdlib/test/test_importlib/metadata/data/example-21.12-py3.6.egg b/stdlib/test/test_importlib/metadata/data/example-21.12-py3.6.egg new file mode 100644 index 000000000..cdb298a19 Binary files /dev/null and b/stdlib/test/test_importlib/metadata/data/example-21.12-py3.6.egg differ diff --git a/stdlib/test/test_importlib/metadata/data/example2-1.0.0-py3-none-any.whl b/stdlib/test/test_importlib/metadata/data/example2-1.0.0-py3-none-any.whl new file mode 100644 index 000000000..5ca93657f Binary files /dev/null and b/stdlib/test/test_importlib/metadata/data/example2-1.0.0-py3-none-any.whl differ diff --git a/stdlib/test/test_importlib/metadata/data/sources/example/example/__init__.py b/stdlib/test/test_importlib/metadata/data/sources/example/example/__init__.py new file mode 100644 index 000000000..ba73b7433 --- /dev/null +++ b/stdlib/test/test_importlib/metadata/data/sources/example/example/__init__.py @@ -0,0 +1,2 @@ +def main(): + return 'example' diff --git a/stdlib/test/test_importlib/metadata/data/sources/example/setup.py b/stdlib/test/test_importlib/metadata/data/sources/example/setup.py new file mode 100644 index 000000000..479488a03 --- /dev/null +++ b/stdlib/test/test_importlib/metadata/data/sources/example/setup.py @@ -0,0 +1,11 @@ +from setuptools import setup + +setup( + name='example', + version='21.12', + license='Apache Software License', + packages=['example'], + entry_points={ + 'console_scripts': ['example = example:main', 'Example=example:main'], + }, +) diff --git a/stdlib/test/test_importlib/metadata/data/sources/example2/example2/__init__.py b/stdlib/test/test_importlib/metadata/data/sources/example2/example2/__init__.py new file mode 100644 index 000000000..de645c2e8 --- /dev/null +++ b/stdlib/test/test_importlib/metadata/data/sources/example2/example2/__init__.py @@ -0,0 +1,2 @@ +def main(): + return "example" diff --git a/stdlib/test/test_importlib/metadata/data/sources/example2/pyproject.toml b/stdlib/test/test_importlib/metadata/data/sources/example2/pyproject.toml new file mode 100644 index 000000000..011f4751f --- /dev/null +++ b/stdlib/test/test_importlib/metadata/data/sources/example2/pyproject.toml @@ -0,0 +1,10 @@ +[build-system] +build-backend = 'trampolim' +requires = ['trampolim'] + +[project] +name = 'example2' +version = '1.0.0' + +[project.scripts] +example = 'example2:main' diff --git a/stdlib/test/test_importlib/metadata/fixtures.py b/stdlib/test/test_importlib/metadata/fixtures.py new file mode 100644 index 000000000..826b1b325 --- /dev/null +++ b/stdlib/test/test_importlib/metadata/fixtures.py @@ -0,0 +1,395 @@ +import sys +import copy +import json +import shutil +import pathlib +import textwrap +import functools +import contextlib + +from test.support import import_helper +from test.support import os_helper +from test.support import requires_zlib + +from . import _path +from ._path import FilesSpec + + +try: + from importlib import resources # type: ignore + + getattr(resources, 'files') + getattr(resources, 'as_file') +except (ImportError, AttributeError): + import importlib_resources as resources # type: ignore + + +@contextlib.contextmanager +def tmp_path(): + """ + Like os_helper.temp_dir, but yields a pathlib.Path. + """ + with os_helper.temp_dir() as path: + yield pathlib.Path(path) + + +@contextlib.contextmanager +def install_finder(finder): + sys.meta_path.append(finder) + try: + yield + finally: + sys.meta_path.remove(finder) + + +class Fixtures: + def setUp(self): + self.fixtures = contextlib.ExitStack() + self.addCleanup(self.fixtures.close) + + +class SiteDir(Fixtures): + def setUp(self): + super().setUp() + self.site_dir = self.fixtures.enter_context(tmp_path()) + + +class OnSysPath(Fixtures): + @staticmethod + @contextlib.contextmanager + def add_sys_path(dir): + sys.path[:0] = [str(dir)] + try: + yield + finally: + sys.path.remove(str(dir)) + + def setUp(self): + super().setUp() + self.fixtures.enter_context(self.add_sys_path(self.site_dir)) + self.fixtures.enter_context(import_helper.isolated_modules()) + + +class SiteBuilder(SiteDir): + def setUp(self): + super().setUp() + for cls in self.__class__.mro(): + with contextlib.suppress(AttributeError): + build_files(cls.files, prefix=self.site_dir) + + +class DistInfoPkg(OnSysPath, SiteBuilder): + files: FilesSpec = { + "distinfo_pkg-1.0.0.dist-info": { + "METADATA": """ + Name: distinfo-pkg + Author: Steven Ma + Version: 1.0.0 + Requires-Dist: wheel >= 1.0 + Requires-Dist: pytest; extra == 'test' + Keywords: sample package + + Once upon a time + There was a distinfo pkg + """, + "RECORD": "mod.py,sha256=abc,20\n", + "entry_points.txt": """ + [entries] + main = mod:main + ns:sub = mod:main + """, + }, + "mod.py": """ + def main(): + print("hello world") + """, + } + + def make_uppercase(self): + """ + Rewrite metadata with everything uppercase. + """ + shutil.rmtree(self.site_dir / "distinfo_pkg-1.0.0.dist-info") + files = copy.deepcopy(DistInfoPkg.files) + info = files["distinfo_pkg-1.0.0.dist-info"] + info["METADATA"] = info["METADATA"].upper() + build_files(files, self.site_dir) + + +class DistInfoPkgEditable(DistInfoPkg): + """ + Package with a PEP 660 direct_url.json. + """ + + some_hash = '524127ce937f7cb65665130c695abd18ca386f60bb29687efb976faa1596fdcc' + files: FilesSpec = { + 'distinfo_pkg-1.0.0.dist-info': { + 'direct_url.json': json.dumps({ + "archive_info": { + "hash": f"sha256={some_hash}", + "hashes": {"sha256": f"{some_hash}"}, + }, + "url": "file:///path/to/distinfo_pkg-1.0.0.editable-py3-none-any.whl", + }) + }, + } + + +class DistInfoPkgWithDot(OnSysPath, SiteBuilder): + files: FilesSpec = { + "pkg_dot-1.0.0.dist-info": { + "METADATA": """ + Name: pkg.dot + Version: 1.0.0 + """, + }, + } + + +class DistInfoPkgWithDotLegacy(OnSysPath, SiteBuilder): + files: FilesSpec = { + "pkg.dot-1.0.0.dist-info": { + "METADATA": """ + Name: pkg.dot + Version: 1.0.0 + """, + }, + "pkg.lot.egg-info": { + "METADATA": """ + Name: pkg.lot + Version: 1.0.0 + """, + }, + } + + +class DistInfoPkgOffPath(SiteBuilder): + files = DistInfoPkg.files + + +class EggInfoPkg(OnSysPath, SiteBuilder): + files: FilesSpec = { + "egginfo_pkg.egg-info": { + "PKG-INFO": """ + Name: egginfo-pkg + Author: Steven Ma + License: Unknown + Version: 1.0.0 + Classifier: Intended Audience :: Developers + Classifier: Topic :: Software Development :: Libraries + Keywords: sample package + Description: Once upon a time + There was an egginfo package + """, + "SOURCES.txt": """ + mod.py + egginfo_pkg.egg-info/top_level.txt + """, + "entry_points.txt": """ + [entries] + main = mod:main + """, + "requires.txt": """ + wheel >= 1.0; python_version >= "2.7" + [test] + pytest + """, + "top_level.txt": "mod\n", + }, + "mod.py": """ + def main(): + print("hello world") + """, + } + + +class EggInfoPkgPipInstalledNoToplevel(OnSysPath, SiteBuilder): + files: FilesSpec = { + "egg_with_module_pkg.egg-info": { + "PKG-INFO": "Name: egg_with_module-pkg", + # SOURCES.txt is made from the source archive, and contains files + # (setup.py) that are not present after installation. + "SOURCES.txt": """ + egg_with_module.py + setup.py + egg_with_module_pkg.egg-info/PKG-INFO + egg_with_module_pkg.egg-info/SOURCES.txt + egg_with_module_pkg.egg-info/top_level.txt + """, + # installed-files.txt is written by pip, and is a strictly more + # accurate source than SOURCES.txt as to the installed contents of + # the package. + "installed-files.txt": """ + ../egg_with_module.py + PKG-INFO + SOURCES.txt + top_level.txt + """, + # missing top_level.txt (to trigger fallback to installed-files.txt) + }, + "egg_with_module.py": """ + def main(): + print("hello world") + """, + } + + +class EggInfoPkgPipInstalledExternalDataFiles(OnSysPath, SiteBuilder): + files: FilesSpec = { + "egg_with_module_pkg.egg-info": { + "PKG-INFO": "Name: egg_with_module-pkg", + # SOURCES.txt is made from the source archive, and contains files + # (setup.py) that are not present after installation. + "SOURCES.txt": """ + egg_with_module.py + setup.py + egg_with_module.json + egg_with_module_pkg.egg-info/PKG-INFO + egg_with_module_pkg.egg-info/SOURCES.txt + egg_with_module_pkg.egg-info/top_level.txt + """, + # installed-files.txt is written by pip, and is a strictly more + # accurate source than SOURCES.txt as to the installed contents of + # the package. + "installed-files.txt": """ + ../../../etc/jupyter/jupyter_notebook_config.d/relative.json + /etc/jupyter/jupyter_notebook_config.d/absolute.json + ../egg_with_module.py + PKG-INFO + SOURCES.txt + top_level.txt + """, + # missing top_level.txt (to trigger fallback to installed-files.txt) + }, + "egg_with_module.py": """ + def main(): + print("hello world") + """, + } + + +class EggInfoPkgPipInstalledNoModules(OnSysPath, SiteBuilder): + files: FilesSpec = { + "egg_with_no_modules_pkg.egg-info": { + "PKG-INFO": "Name: egg_with_no_modules-pkg", + # SOURCES.txt is made from the source archive, and contains files + # (setup.py) that are not present after installation. + "SOURCES.txt": """ + setup.py + egg_with_no_modules_pkg.egg-info/PKG-INFO + egg_with_no_modules_pkg.egg-info/SOURCES.txt + egg_with_no_modules_pkg.egg-info/top_level.txt + """, + # installed-files.txt is written by pip, and is a strictly more + # accurate source than SOURCES.txt as to the installed contents of + # the package. + "installed-files.txt": """ + PKG-INFO + SOURCES.txt + top_level.txt + """, + # top_level.txt correctly reflects that no modules are installed + "top_level.txt": b"\n", + }, + } + + +class EggInfoPkgSourcesFallback(OnSysPath, SiteBuilder): + files: FilesSpec = { + "sources_fallback_pkg.egg-info": { + "PKG-INFO": "Name: sources_fallback-pkg", + # SOURCES.txt is made from the source archive, and contains files + # (setup.py) that are not present after installation. + "SOURCES.txt": """ + sources_fallback.py + setup.py + sources_fallback_pkg.egg-info/PKG-INFO + sources_fallback_pkg.egg-info/SOURCES.txt + """, + # missing installed-files.txt (i.e. not installed by pip) and + # missing top_level.txt (to trigger fallback to SOURCES.txt) + }, + "sources_fallback.py": """ + def main(): + print("hello world") + """, + } + + +class EggInfoFile(OnSysPath, SiteBuilder): + files: FilesSpec = { + "egginfo_file.egg-info": """ + Metadata-Version: 1.0 + Name: egginfo_file + Version: 0.1 + Summary: An example package + Home-page: www.example.com + Author: Eric Haffa-Vee + Author-email: eric@example.coms + License: UNKNOWN + Description: UNKNOWN + Platform: UNKNOWN + """, + } + + +# dedent all text strings before writing +orig = _path.create.registry[str] +_path.create.register(str, lambda content, path: orig(DALS(content), path)) + + +build_files = _path.build + + +def build_record(file_defs): + return ''.join(f'{name},,\n' for name in record_names(file_defs)) + + +def record_names(file_defs): + recording = _path.Recording() + _path.build(file_defs, recording) + return recording.record + + +class FileBuilder: + def unicode_filename(self): + return os_helper.FS_NONASCII or self.skip( + "File system does not support non-ascii." + ) + + +def DALS(str): + "Dedent and left-strip" + return textwrap.dedent(str).lstrip() + + +@requires_zlib() +class ZipFixtures: + root = 'test.test_importlib.metadata.data' + + def _fixture_on_path(self, filename): + pkg_file = resources.files(self.root).joinpath(filename) + file = self.resources.enter_context(resources.as_file(pkg_file)) + assert file.name.startswith('example'), file.name + sys.path.insert(0, str(file)) + self.resources.callback(sys.path.pop, 0) + + def setUp(self): + # Add self.zip_name to the front of sys.path. + self.resources = contextlib.ExitStack() + self.addCleanup(self.resources.close) + + +def parameterize(*args_set): + """Run test method with a series of parameters.""" + + def wrapper(func): + @functools.wraps(func) + def _inner(self): + for args in args_set: + with self.subTest(**args): + func(self, **args) + + return _inner + + return wrapper diff --git a/stdlib/test/test_importlib/metadata/stubs.py b/stdlib/test/test_importlib/metadata/stubs.py new file mode 100644 index 000000000..e5b011c39 --- /dev/null +++ b/stdlib/test/test_importlib/metadata/stubs.py @@ -0,0 +1,10 @@ +import unittest + + +class fake_filesystem_unittest: + """ + Stubbed version of the pyfakefs module + """ + class TestCase(unittest.TestCase): + def setUpPyfakefs(self): + self.skipTest("pyfakefs not available") diff --git a/stdlib/test/test_importlib/metadata/test_api.py b/stdlib/test/test_importlib/metadata/test_api.py new file mode 100644 index 000000000..2256e0c50 --- /dev/null +++ b/stdlib/test/test_importlib/metadata/test_api.py @@ -0,0 +1,323 @@ +import re +import textwrap +import unittest +import warnings +import importlib +import contextlib + +from . import fixtures +from importlib.metadata import ( + Distribution, + PackageNotFoundError, + distribution, + entry_points, + files, + metadata, + requires, + version, +) + + +@contextlib.contextmanager +def suppress_known_deprecation(): + with warnings.catch_warnings(record=True) as ctx: + warnings.simplefilter('default', category=DeprecationWarning) + yield ctx + + +class APITests( + fixtures.EggInfoPkg, + fixtures.EggInfoPkgPipInstalledNoToplevel, + fixtures.EggInfoPkgPipInstalledNoModules, + fixtures.EggInfoPkgPipInstalledExternalDataFiles, + fixtures.EggInfoPkgSourcesFallback, + fixtures.DistInfoPkg, + fixtures.DistInfoPkgWithDot, + fixtures.EggInfoFile, + unittest.TestCase, +): + version_pattern = r'\d+\.\d+(\.\d)?' + + def test_retrieves_version_of_self(self): + pkg_version = version('egginfo-pkg') + assert isinstance(pkg_version, str) + assert re.match(self.version_pattern, pkg_version) + + def test_retrieves_version_of_distinfo_pkg(self): + pkg_version = version('distinfo-pkg') + assert isinstance(pkg_version, str) + assert re.match(self.version_pattern, pkg_version) + + def test_for_name_does_not_exist(self): + with self.assertRaises(PackageNotFoundError): + distribution('does-not-exist') + + def test_name_normalization(self): + names = 'pkg.dot', 'pkg_dot', 'pkg-dot', 'pkg..dot', 'Pkg.Dot' + for name in names: + with self.subTest(name): + assert distribution(name).metadata['Name'] == 'pkg.dot' + + def test_prefix_not_matched(self): + prefixes = 'p', 'pkg', 'pkg.' + for prefix in prefixes: + with self.subTest(prefix): + with self.assertRaises(PackageNotFoundError): + distribution(prefix) + + def test_for_top_level(self): + tests = [ + ('egginfo-pkg', 'mod'), + ('egg_with_no_modules-pkg', ''), + ] + for pkg_name, expect_content in tests: + with self.subTest(pkg_name): + self.assertEqual( + distribution(pkg_name).read_text('top_level.txt').strip(), + expect_content, + ) + + def test_read_text(self): + tests = [ + ('egginfo-pkg', 'mod\n'), + ('egg_with_no_modules-pkg', '\n'), + ] + for pkg_name, expect_content in tests: + with self.subTest(pkg_name): + top_level = [ + path for path in files(pkg_name) if path.name == 'top_level.txt' + ][0] + self.assertEqual(top_level.read_text(), expect_content) + + def test_entry_points(self): + eps = entry_points() + assert 'entries' in eps.groups + entries = eps.select(group='entries') + assert 'main' in entries.names + ep = entries['main'] + self.assertEqual(ep.value, 'mod:main') + self.assertEqual(ep.extras, []) + + def test_entry_points_distribution(self): + entries = entry_points(group='entries') + for entry in ("main", "ns:sub"): + ep = entries[entry] + self.assertIn(ep.dist.name, ('distinfo-pkg', 'egginfo-pkg')) + self.assertEqual(ep.dist.version, "1.0.0") + + def test_entry_points_unique_packages_normalized(self): + """ + Entry points should only be exposed for the first package + on sys.path with a given name (even when normalized). + """ + alt_site_dir = self.fixtures.enter_context(fixtures.tmp_path()) + self.fixtures.enter_context(self.add_sys_path(alt_site_dir)) + alt_pkg = { + "DistInfo_pkg-1.1.0.dist-info": { + "METADATA": """ + Name: distinfo-pkg + Version: 1.1.0 + """, + "entry_points.txt": """ + [entries] + main = mod:altmain + """, + }, + } + fixtures.build_files(alt_pkg, alt_site_dir) + entries = entry_points(group='entries') + assert not any( + ep.dist.name == 'distinfo-pkg' and ep.dist.version == '1.0.0' + for ep in entries + ) + # ns:sub doesn't exist in alt_pkg + assert 'ns:sub' not in entries.names + + def test_entry_points_missing_name(self): + with self.assertRaises(KeyError): + entry_points(group='entries')['missing'] + + def test_entry_points_missing_group(self): + assert entry_points(group='missing') == () + + def test_entry_points_allows_no_attributes(self): + ep = entry_points().select(group='entries', name='main') + with self.assertRaises(AttributeError): + ep.foo = 4 + + def test_metadata_for_this_package(self): + md = metadata('egginfo-pkg') + assert md['author'] == 'Steven Ma' + assert md['LICENSE'] == 'Unknown' + assert md['Name'] == 'egginfo-pkg' + classifiers = md.get_all('Classifier') + assert 'Topic :: Software Development :: Libraries' in classifiers + + def test_missing_key_legacy(self): + """ + Requesting a missing key will still return None, but warn. + """ + md = metadata('distinfo-pkg') + with suppress_known_deprecation(): + assert md['does-not-exist'] is None + + def test_get_key(self): + """ + Getting a key gets the key. + """ + md = metadata('egginfo-pkg') + assert md.get('Name') == 'egginfo-pkg' + + def test_get_missing_key(self): + """ + Requesting a missing key will return None. + """ + md = metadata('distinfo-pkg') + assert md.get('does-not-exist') is None + + @staticmethod + def _test_files(files): + root = files[0].root + for file in files: + assert file.root == root + assert not file.hash or file.hash.value + assert not file.hash or file.hash.mode == 'sha256' + assert not file.size or file.size >= 0 + assert file.locate().exists() + assert isinstance(file.read_binary(), bytes) + if file.name.endswith('.py'): + file.read_text() + + def test_file_hash_repr(self): + util = [p for p in files('distinfo-pkg') if p.name == 'mod.py'][0] + self.assertRegex(repr(util.hash), '') + + def test_files_dist_info(self): + self._test_files(files('distinfo-pkg')) + + def test_files_egg_info(self): + self._test_files(files('egginfo-pkg')) + self._test_files(files('egg_with_module-pkg')) + self._test_files(files('egg_with_no_modules-pkg')) + self._test_files(files('sources_fallback-pkg')) + + def test_version_egg_info_file(self): + self.assertEqual(version('egginfo-file'), '0.1') + + def test_requires_egg_info_file(self): + requirements = requires('egginfo-file') + self.assertIsNone(requirements) + + def test_requires_egg_info(self): + deps = requires('egginfo-pkg') + assert len(deps) == 2 + assert any(dep == 'wheel >= 1.0; python_version >= "2.7"' for dep in deps) + + def test_requires_egg_info_empty(self): + fixtures.build_files( + { + 'requires.txt': '', + }, + self.site_dir.joinpath('egginfo_pkg.egg-info'), + ) + deps = requires('egginfo-pkg') + assert deps == [] + + def test_requires_dist_info(self): + deps = requires('distinfo-pkg') + assert len(deps) == 2 + assert all(deps) + assert 'wheel >= 1.0' in deps + assert "pytest; extra == 'test'" in deps + + def test_more_complex_deps_requires_text(self): + requires = textwrap.dedent( + """ + dep1 + dep2 + + [:python_version < "3"] + dep3 + + [extra1] + dep4 + dep6@ git+https://example.com/python/dep.git@v1.0.0 + + [extra2:python_version < "3"] + dep5 + """ + ) + deps = sorted(Distribution._deps_from_requires_text(requires)) + expected = [ + 'dep1', + 'dep2', + 'dep3; python_version < "3"', + 'dep4; extra == "extra1"', + 'dep5; (python_version < "3") and extra == "extra2"', + 'dep6@ git+https://example.com/python/dep.git@v1.0.0 ; extra == "extra1"', + ] + # It's important that the environment marker expression be + # wrapped in parentheses to avoid the following 'and' binding more + # tightly than some other part of the environment expression. + + assert deps == expected + + def test_as_json(self): + md = metadata('distinfo-pkg').json + assert 'name' in md + assert md['keywords'] == ['sample', 'package'] + desc = md['description'] + assert desc.startswith('Once upon a time\nThere was') + assert len(md['requires_dist']) == 2 + + def test_as_json_egg_info(self): + md = metadata('egginfo-pkg').json + assert 'name' in md + assert md['keywords'] == ['sample', 'package'] + desc = md['description'] + assert desc.startswith('Once upon a time\nThere was') + assert len(md['classifier']) == 2 + + def test_as_json_odd_case(self): + self.make_uppercase() + md = metadata('distinfo-pkg').json + assert 'name' in md + assert len(md['requires_dist']) == 2 + assert md['keywords'] == ['SAMPLE', 'PACKAGE'] + + +class LegacyDots(fixtures.DistInfoPkgWithDotLegacy, unittest.TestCase): + def test_name_normalization(self): + names = 'pkg.dot', 'pkg_dot', 'pkg-dot', 'pkg..dot', 'Pkg.Dot' + for name in names: + with self.subTest(name): + assert distribution(name).metadata['Name'] == 'pkg.dot' + + def test_name_normalization_versionless_egg_info(self): + names = 'pkg.lot', 'pkg_lot', 'pkg-lot', 'pkg..lot', 'Pkg.Lot' + for name in names: + with self.subTest(name): + assert distribution(name).metadata['Name'] == 'pkg.lot' + + +class OffSysPathTests(fixtures.DistInfoPkgOffPath, unittest.TestCase): + def test_find_distributions_specified_path(self): + dists = Distribution.discover(path=[str(self.site_dir)]) + assert any(dist.metadata['Name'] == 'distinfo-pkg' for dist in dists) + + def test_distribution_at_pathlib(self): + """Demonstrate how to load metadata direct from a directory.""" + dist_info_path = self.site_dir / 'distinfo_pkg-1.0.0.dist-info' + dist = Distribution.at(dist_info_path) + assert dist.version == '1.0.0' + + def test_distribution_at_str(self): + dist_info_path = self.site_dir / 'distinfo_pkg-1.0.0.dist-info' + dist = Distribution.at(str(dist_info_path)) + assert dist.version == '1.0.0' + + +class InvalidateCache(unittest.TestCase): + def test_invalidate_cache(self): + # No externally observable behavior, but ensures test coverage... + importlib.invalidate_caches() diff --git a/stdlib/test/test_importlib/metadata/test_main.py b/stdlib/test/test_importlib/metadata/test_main.py new file mode 100644 index 000000000..e4218076f --- /dev/null +++ b/stdlib/test/test_importlib/metadata/test_main.py @@ -0,0 +1,468 @@ +import re +import pickle +import unittest +import warnings +import importlib +import importlib.metadata +import contextlib +from test.support import os_helper + +try: + import pyfakefs.fake_filesystem_unittest as ffs +except ImportError: + from .stubs import fake_filesystem_unittest as ffs + +from . import fixtures +from ._context import suppress +from ._path import Symlink +from importlib.metadata import ( + Distribution, + EntryPoint, + PackageNotFoundError, + _unique, + distributions, + entry_points, + metadata, + packages_distributions, + version, +) + + +@contextlib.contextmanager +def suppress_known_deprecation(): + with warnings.catch_warnings(record=True) as ctx: + warnings.simplefilter('default', category=DeprecationWarning) + yield ctx + + +class BasicTests(fixtures.DistInfoPkg, unittest.TestCase): + version_pattern = r'\d+\.\d+(\.\d)?' + + def test_retrieves_version_of_self(self): + dist = Distribution.from_name('distinfo-pkg') + assert isinstance(dist.version, str) + assert re.match(self.version_pattern, dist.version) + + def test_for_name_does_not_exist(self): + with self.assertRaises(PackageNotFoundError): + Distribution.from_name('does-not-exist') + + def test_package_not_found_mentions_metadata(self): + """ + When a package is not found, that could indicate that the + package is not installed or that it is installed without + metadata. Ensure the exception mentions metadata to help + guide users toward the cause. See #124. + """ + with self.assertRaises(PackageNotFoundError) as ctx: + Distribution.from_name('does-not-exist') + + assert "metadata" in str(ctx.exception) + + # expected to fail until ABC is enforced + @suppress(AssertionError) + @suppress_known_deprecation() + def test_abc_enforced(self): + with self.assertRaises(TypeError): + type('DistributionSubclass', (Distribution,), {})() + + @fixtures.parameterize( + dict(name=None), + dict(name=''), + ) + def test_invalid_inputs_to_from_name(self, name): + with self.assertRaises(Exception): + Distribution.from_name(name) + + +class ImportTests(fixtures.DistInfoPkg, unittest.TestCase): + def test_import_nonexistent_module(self): + # Ensure that the MetadataPathFinder does not crash an import of a + # non-existent module. + with self.assertRaises(ImportError): + importlib.import_module('does_not_exist') + + def test_resolve(self): + ep = entry_points(group='entries')['main'] + self.assertEqual(ep.load().__name__, "main") + + def test_entrypoint_with_colon_in_name(self): + ep = entry_points(group='entries')['ns:sub'] + self.assertEqual(ep.value, 'mod:main') + + def test_resolve_without_attr(self): + ep = EntryPoint( + name='ep', + value='importlib.metadata', + group='grp', + ) + assert ep.load() is importlib.metadata + + +class NameNormalizationTests(fixtures.OnSysPath, fixtures.SiteDir, unittest.TestCase): + @staticmethod + def make_pkg(name): + """ + Create minimal metadata for a dist-info package with + the indicated name on the file system. + """ + return { + f'{name}.dist-info': { + 'METADATA': 'VERSION: 1.0\n', + }, + } + + def test_dashes_in_dist_name_found_as_underscores(self): + """ + For a package with a dash in the name, the dist-info metadata + uses underscores in the name. Ensure the metadata loads. + """ + fixtures.build_files(self.make_pkg('my_pkg'), self.site_dir) + assert version('my-pkg') == '1.0' + + def test_dist_name_found_as_any_case(self): + """ + Ensure the metadata loads when queried with any case. + """ + pkg_name = 'CherryPy' + fixtures.build_files(self.make_pkg(pkg_name), self.site_dir) + assert version(pkg_name) == '1.0' + assert version(pkg_name.lower()) == '1.0' + assert version(pkg_name.upper()) == '1.0' + + def test_unique_distributions(self): + """ + Two distributions varying only by non-normalized name on + the file system should resolve as the same. + """ + fixtures.build_files(self.make_pkg('abc'), self.site_dir) + before = list(_unique(distributions())) + + alt_site_dir = self.fixtures.enter_context(fixtures.tmp_path()) + self.fixtures.enter_context(self.add_sys_path(alt_site_dir)) + fixtures.build_files(self.make_pkg('ABC'), alt_site_dir) + after = list(_unique(distributions())) + + assert len(after) == len(before) + + +class NonASCIITests(fixtures.OnSysPath, fixtures.SiteDir, unittest.TestCase): + @staticmethod + def pkg_with_non_ascii_description(site_dir): + """ + Create minimal metadata for a package with non-ASCII in + the description. + """ + contents = { + 'portend.dist-info': { + 'METADATA': 'Description: pôrˈtend', + }, + } + fixtures.build_files(contents, site_dir) + return 'portend' + + @staticmethod + def pkg_with_non_ascii_description_egg_info(site_dir): + """ + Create minimal metadata for an egg-info package with + non-ASCII in the description. + """ + contents = { + 'portend.dist-info': { + 'METADATA': """ + Name: portend + + pôrˈtend""", + }, + } + fixtures.build_files(contents, site_dir) + return 'portend' + + def test_metadata_loads(self): + pkg_name = self.pkg_with_non_ascii_description(self.site_dir) + meta = metadata(pkg_name) + assert meta['Description'] == 'pôrˈtend' + + def test_metadata_loads_egg_info(self): + pkg_name = self.pkg_with_non_ascii_description_egg_info(self.site_dir) + meta = metadata(pkg_name) + assert meta['Description'] == 'pôrˈtend' + + +class DiscoveryTests( + fixtures.EggInfoPkg, + fixtures.EggInfoPkgPipInstalledNoToplevel, + fixtures.EggInfoPkgPipInstalledNoModules, + fixtures.EggInfoPkgSourcesFallback, + fixtures.DistInfoPkg, + unittest.TestCase, +): + def test_package_discovery(self): + dists = list(distributions()) + assert all(isinstance(dist, Distribution) for dist in dists) + assert any(dist.metadata['Name'] == 'egginfo-pkg' for dist in dists) + assert any(dist.metadata['Name'] == 'egg_with_module-pkg' for dist in dists) + assert any(dist.metadata['Name'] == 'egg_with_no_modules-pkg' for dist in dists) + assert any(dist.metadata['Name'] == 'sources_fallback-pkg' for dist in dists) + assert any(dist.metadata['Name'] == 'distinfo-pkg' for dist in dists) + + def test_invalid_usage(self): + with self.assertRaises(ValueError): + list(distributions(context='something', name='else')) + + def test_interleaved_discovery(self): + """ + Ensure interleaved searches are safe. + + When the search is cached, it is possible for searches to be + interleaved, so make sure those use-cases are safe. + + Ref #293 + """ + dists = distributions() + next(dists) + version('egginfo-pkg') + next(dists) + + +class DirectoryTest(fixtures.OnSysPath, fixtures.SiteDir, unittest.TestCase): + def test_egg_info(self): + # make an `EGG-INFO` directory that's unrelated + self.site_dir.joinpath('EGG-INFO').mkdir() + # used to crash with `IsADirectoryError` + with self.assertRaises(PackageNotFoundError): + version('unknown-package') + + def test_egg(self): + egg = self.site_dir.joinpath('foo-3.6.egg') + egg.mkdir() + with self.add_sys_path(egg): + with self.assertRaises(PackageNotFoundError): + version('foo') + + +class MissingSysPath(fixtures.OnSysPath, unittest.TestCase): + site_dir = '/does-not-exist' + + def test_discovery(self): + """ + Discovering distributions should succeed even if + there is an invalid path on sys.path. + """ + importlib.metadata.distributions() + + +class InaccessibleSysPath(fixtures.OnSysPath, ffs.TestCase): + site_dir = '/access-denied' + + def setUp(self): + super().setUp() + self.setUpPyfakefs() + self.fs.create_dir(self.site_dir, perm_bits=000) + + def test_discovery(self): + """ + Discovering distributions should succeed even if + there is an invalid path on sys.path. + """ + list(importlib.metadata.distributions()) + + +class TestEntryPoints(unittest.TestCase): + def __init__(self, *args): + super().__init__(*args) + self.ep = importlib.metadata.EntryPoint( + name='name', value='value', group='group' + ) + + def test_entry_point_pickleable(self): + revived = pickle.loads(pickle.dumps(self.ep)) + assert revived == self.ep + + def test_positional_args(self): + """ + Capture legacy (namedtuple) construction, discouraged. + """ + EntryPoint('name', 'value', 'group') + + def test_immutable(self): + """EntryPoints should be immutable""" + with self.assertRaises(AttributeError): + self.ep.name = 'badactor' + + def test_repr(self): + assert 'EntryPoint' in repr(self.ep) + assert 'name=' in repr(self.ep) + assert "'name'" in repr(self.ep) + + def test_hashable(self): + """EntryPoints should be hashable""" + hash(self.ep) + + def test_module(self): + assert self.ep.module == 'value' + + def test_attr(self): + assert self.ep.attr is None + + def test_sortable(self): + """ + EntryPoint objects are sortable, but result is undefined. + """ + sorted([ + EntryPoint(name='b', value='val', group='group'), + EntryPoint(name='a', value='val', group='group'), + ]) + + +class FileSystem( + fixtures.OnSysPath, fixtures.SiteDir, fixtures.FileBuilder, unittest.TestCase +): + def test_unicode_dir_on_sys_path(self): + """ + Ensure a Unicode subdirectory of a directory on sys.path + does not crash. + """ + fixtures.build_files( + {self.unicode_filename(): {}}, + prefix=self.site_dir, + ) + list(distributions()) + + +class PackagesDistributionsPrebuiltTest(fixtures.ZipFixtures, unittest.TestCase): + def test_packages_distributions_example(self): + self._fixture_on_path('example-21.12-py3-none-any.whl') + assert packages_distributions()['example'] == ['example'] + + def test_packages_distributions_example2(self): + """ + Test packages_distributions on a wheel built + by trampolim. + """ + self._fixture_on_path('example2-1.0.0-py3-none-any.whl') + assert packages_distributions()['example2'] == ['example2'] + + +class PackagesDistributionsTest( + fixtures.OnSysPath, fixtures.SiteDir, unittest.TestCase +): + def test_packages_distributions_neither_toplevel_nor_files(self): + """ + Test a package built without 'top-level.txt' or a file list. + """ + fixtures.build_files( + { + 'trim_example-1.0.0.dist-info': { + 'METADATA': """ + Name: trim_example + Version: 1.0.0 + """, + } + }, + prefix=self.site_dir, + ) + packages_distributions() + + def test_packages_distributions_all_module_types(self): + """ + Test top-level modules detected on a package without 'top-level.txt'. + """ + suffixes = importlib.machinery.all_suffixes() + metadata = dict( + METADATA=""" + Name: all_distributions + Version: 1.0.0 + """, + ) + files = { + 'all_distributions-1.0.0.dist-info': metadata, + } + for i, suffix in enumerate(suffixes): + files.update({ + f'importable-name {i}{suffix}': '', + f'in_namespace_{i}': { + f'mod{suffix}': '', + }, + f'in_package_{i}': { + '__init__.py': '', + f'mod{suffix}': '', + }, + }) + metadata.update(RECORD=fixtures.build_record(files)) + fixtures.build_files(files, prefix=self.site_dir) + + distributions = packages_distributions() + + for i in range(len(suffixes)): + assert distributions[f'importable-name {i}'] == ['all_distributions'] + assert distributions[f'in_namespace_{i}'] == ['all_distributions'] + assert distributions[f'in_package_{i}'] == ['all_distributions'] + + assert not any(name.endswith('.dist-info') for name in distributions) + + @os_helper.skip_unless_symlink + def test_packages_distributions_symlinked_top_level(self) -> None: + """ + Distribution is resolvable from a simple top-level symlink in RECORD. + See #452. + """ + + files: fixtures.FilesSpec = { + "symlinked_pkg-1.0.0.dist-info": { + "METADATA": """ + Name: symlinked-pkg + Version: 1.0.0 + """, + "RECORD": "symlinked,,\n", + }, + ".symlink.target": {}, + "symlinked": Symlink(".symlink.target"), + } + + fixtures.build_files(files, self.site_dir) + assert packages_distributions()['symlinked'] == ['symlinked-pkg'] + + +class PackagesDistributionsEggTest( + fixtures.EggInfoPkg, + fixtures.EggInfoPkgPipInstalledNoToplevel, + fixtures.EggInfoPkgPipInstalledNoModules, + fixtures.EggInfoPkgSourcesFallback, + unittest.TestCase, +): + def test_packages_distributions_on_eggs(self): + """ + Test old-style egg packages with a variation of 'top_level.txt', + 'SOURCES.txt', and 'installed-files.txt', available. + """ + distributions = packages_distributions() + + def import_names_from_package(package_name): + return { + import_name + for import_name, package_names in distributions.items() + if package_name in package_names + } + + # egginfo-pkg declares one import ('mod') via top_level.txt + assert import_names_from_package('egginfo-pkg') == {'mod'} + + # egg_with_module-pkg has one import ('egg_with_module') inferred from + # installed-files.txt (top_level.txt is missing) + assert import_names_from_package('egg_with_module-pkg') == {'egg_with_module'} + + # egg_with_no_modules-pkg should not be associated with any import names + # (top_level.txt is empty, and installed-files.txt has no .py files) + assert import_names_from_package('egg_with_no_modules-pkg') == set() + + # sources_fallback-pkg has one import ('sources_fallback') inferred from + # SOURCES.txt (top_level.txt and installed-files.txt is missing) + assert import_names_from_package('sources_fallback-pkg') == {'sources_fallback'} + + +class EditableDistributionTest(fixtures.DistInfoPkgEditable, unittest.TestCase): + def test_origin(self): + dist = Distribution.from_name('distinfo-pkg') + assert dist.origin.url.endswith('.whl') + assert dist.origin.archive_info.hashes.sha256 diff --git a/stdlib/test/test_importlib/metadata/test_zip.py b/stdlib/test/test_importlib/metadata/test_zip.py new file mode 100644 index 000000000..276f6288c --- /dev/null +++ b/stdlib/test/test_importlib/metadata/test_zip.py @@ -0,0 +1,62 @@ +import sys +import unittest + +from . import fixtures +from importlib.metadata import ( + PackageNotFoundError, + distribution, + distributions, + entry_points, + files, + version, +) + + +class TestZip(fixtures.ZipFixtures, unittest.TestCase): + def setUp(self): + super().setUp() + self._fixture_on_path('example-21.12-py3-none-any.whl') + + def test_zip_version(self): + self.assertEqual(version('example'), '21.12') + + def test_zip_version_does_not_match(self): + with self.assertRaises(PackageNotFoundError): + version('definitely-not-installed') + + def test_zip_entry_points(self): + scripts = entry_points(group='console_scripts') + entry_point = scripts['example'] + self.assertEqual(entry_point.value, 'example:main') + entry_point = scripts['Example'] + self.assertEqual(entry_point.value, 'example:main') + + def test_missing_metadata(self): + self.assertIsNone(distribution('example').read_text('does not exist')) + + def test_case_insensitive(self): + self.assertEqual(version('Example'), '21.12') + + def test_files(self): + for file in files('example'): + path = str(file.dist.locate_file(file)) + assert '.whl/' in path, path + + def test_one_distribution(self): + dists = list(distributions(path=sys.path[:1])) + assert len(dists) == 1 + + +class TestEgg(TestZip): + def setUp(self): + super().setUp() + self._fixture_on_path('example-21.12-py3.6.egg') + + def test_files(self): + for file in files('example'): + path = str(file.dist.locate_file(file)) + assert '.egg/' in path, path + + def test_normalized_name(self): + dist = distribution('example') + assert dist._normalized_name == 'example' diff --git a/stdlib/test/test_importlib/namespace_pkgs/both_portions/foo/one.py b/stdlib/test/test_importlib/namespace_pkgs/both_portions/foo/one.py new file mode 100644 index 000000000..3080f6f8f --- /dev/null +++ b/stdlib/test/test_importlib/namespace_pkgs/both_portions/foo/one.py @@ -0,0 +1 @@ +attr = 'both_portions foo one' diff --git a/stdlib/test/test_importlib/namespace_pkgs/both_portions/foo/two.py b/stdlib/test/test_importlib/namespace_pkgs/both_portions/foo/two.py new file mode 100644 index 000000000..4131d3d4b --- /dev/null +++ b/stdlib/test/test_importlib/namespace_pkgs/both_portions/foo/two.py @@ -0,0 +1 @@ +attr = 'both_portions foo two' diff --git a/stdlib/test/test_importlib/namespace_pkgs/missing_directory.zip b/stdlib/test/test_importlib/namespace_pkgs/missing_directory.zip new file mode 100644 index 000000000..836a9106b Binary files /dev/null and b/stdlib/test/test_importlib/namespace_pkgs/missing_directory.zip differ diff --git a/stdlib/test/test_importlib/namespace_pkgs/module_and_namespace_package/a_test.py b/stdlib/test/test_importlib/namespace_pkgs/module_and_namespace_package/a_test.py new file mode 100644 index 000000000..43cbedbbd --- /dev/null +++ b/stdlib/test/test_importlib/namespace_pkgs/module_and_namespace_package/a_test.py @@ -0,0 +1 @@ +attr = 'in module' diff --git a/stdlib/test/test_importlib/namespace_pkgs/module_and_namespace_package/a_test/empty b/stdlib/test/test_importlib/namespace_pkgs/module_and_namespace_package/a_test/empty new file mode 100644 index 000000000..e69de29bb diff --git a/stdlib/test/test_importlib/namespace_pkgs/nested_portion1.zip b/stdlib/test/test_importlib/namespace_pkgs/nested_portion1.zip new file mode 100644 index 000000000..8d22406f2 Binary files /dev/null and b/stdlib/test/test_importlib/namespace_pkgs/nested_portion1.zip differ diff --git a/stdlib/test/test_importlib/namespace_pkgs/not_a_namespace_pkg/foo/__init__.py b/stdlib/test/test_importlib/namespace_pkgs/not_a_namespace_pkg/foo/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/stdlib/test/test_importlib/namespace_pkgs/not_a_namespace_pkg/foo/one.py b/stdlib/test/test_importlib/namespace_pkgs/not_a_namespace_pkg/foo/one.py new file mode 100644 index 000000000..d8f5c831f --- /dev/null +++ b/stdlib/test/test_importlib/namespace_pkgs/not_a_namespace_pkg/foo/one.py @@ -0,0 +1 @@ +attr = 'portion1 foo one' diff --git a/stdlib/test/test_importlib/namespace_pkgs/portion1/foo/one.py b/stdlib/test/test_importlib/namespace_pkgs/portion1/foo/one.py new file mode 100644 index 000000000..d8f5c831f --- /dev/null +++ b/stdlib/test/test_importlib/namespace_pkgs/portion1/foo/one.py @@ -0,0 +1 @@ +attr = 'portion1 foo one' diff --git a/stdlib/test/test_importlib/namespace_pkgs/portion2/foo/two.py b/stdlib/test/test_importlib/namespace_pkgs/portion2/foo/two.py new file mode 100644 index 000000000..d092e1e99 --- /dev/null +++ b/stdlib/test/test_importlib/namespace_pkgs/portion2/foo/two.py @@ -0,0 +1 @@ +attr = 'portion2 foo two' diff --git a/stdlib/test/test_importlib/namespace_pkgs/project1/parent/child/one.py b/stdlib/test/test_importlib/namespace_pkgs/project1/parent/child/one.py new file mode 100644 index 000000000..2776fcdfd --- /dev/null +++ b/stdlib/test/test_importlib/namespace_pkgs/project1/parent/child/one.py @@ -0,0 +1 @@ +attr = 'parent child one' diff --git a/stdlib/test/test_importlib/namespace_pkgs/project2/parent/child/two.py b/stdlib/test/test_importlib/namespace_pkgs/project2/parent/child/two.py new file mode 100644 index 000000000..8b037bcb0 --- /dev/null +++ b/stdlib/test/test_importlib/namespace_pkgs/project2/parent/child/two.py @@ -0,0 +1 @@ +attr = 'parent child two' diff --git a/stdlib/test/test_importlib/namespace_pkgs/project3/parent/child/three.py b/stdlib/test/test_importlib/namespace_pkgs/project3/parent/child/three.py new file mode 100644 index 000000000..f8abfe1c1 --- /dev/null +++ b/stdlib/test/test_importlib/namespace_pkgs/project3/parent/child/three.py @@ -0,0 +1 @@ +attr = 'parent child three' diff --git a/stdlib/test/test_importlib/namespace_pkgs/top_level_portion1.zip b/stdlib/test/test_importlib/namespace_pkgs/top_level_portion1.zip new file mode 100644 index 000000000..3b866c914 Binary files /dev/null and b/stdlib/test/test_importlib/namespace_pkgs/top_level_portion1.zip differ diff --git a/stdlib/test/test_importlib/partial/cfimport.py b/stdlib/test/test_importlib/partial/cfimport.py new file mode 100644 index 000000000..c92d2fe1d --- /dev/null +++ b/stdlib/test/test_importlib/partial/cfimport.py @@ -0,0 +1,38 @@ +import os +import sys +import threading +import traceback + + +NLOOPS = 50 +NTHREADS = 30 + + +def t1(): + try: + from concurrent.futures import ThreadPoolExecutor + except Exception: + traceback.print_exc() + os._exit(1) + +def t2(): + try: + from concurrent.futures.thread import ThreadPoolExecutor + except Exception: + traceback.print_exc() + os._exit(1) + +def main(): + for j in range(NLOOPS): + threads = [] + for i in range(NTHREADS): + threads.append(threading.Thread(target=t2 if i % 1 else t1)) + for thread in threads: + thread.start() + for thread in threads: + thread.join() + sys.modules.pop('concurrent.futures', None) + sys.modules.pop('concurrent.futures.thread', None) + +if __name__ == "__main__": + main() diff --git a/stdlib/test/test_importlib/partial/pool_in_threads.py b/stdlib/test/test_importlib/partial/pool_in_threads.py new file mode 100644 index 000000000..faa7867b8 --- /dev/null +++ b/stdlib/test/test_importlib/partial/pool_in_threads.py @@ -0,0 +1,27 @@ +import multiprocessing +import os +import threading +import traceback + + +def t(): + try: + with multiprocessing.Pool(1): + pass + except Exception: + traceback.print_exc() + os._exit(1) + + +def main(): + threads = [] + for i in range(20): + threads.append(threading.Thread(target=t)) + for thread in threads: + thread.start() + for thread in threads: + thread.join() + + +if __name__ == "__main__": + main() diff --git a/stdlib/test/test_importlib/resources/__init__.py b/stdlib/test/test_importlib/resources/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/stdlib/test/test_importlib/resources/_path.py b/stdlib/test/test_importlib/resources/_path.py new file mode 100644 index 000000000..b144628cb --- /dev/null +++ b/stdlib/test/test_importlib/resources/_path.py @@ -0,0 +1,94 @@ +import pathlib +import functools + +from typing import Dict, Union +from typing import runtime_checkable +from typing import Protocol + + +#### +# from jaraco.path 3.7.1 + + +class Symlink(str): + """ + A string indicating the target of a symlink. + """ + + +FilesSpec = Dict[str, Union[str, bytes, Symlink, 'FilesSpec']] + + +@runtime_checkable +class TreeMaker(Protocol): + def __truediv__(self, *args, **kwargs): ... # pragma: no cover + + def mkdir(self, **kwargs): ... # pragma: no cover + + def write_text(self, content, **kwargs): ... # pragma: no cover + + def write_bytes(self, content): ... # pragma: no cover + + def symlink_to(self, target): ... # pragma: no cover + + +def _ensure_tree_maker(obj: Union[str, TreeMaker]) -> TreeMaker: + return obj if isinstance(obj, TreeMaker) else pathlib.Path(obj) # type: ignore[return-value] + + +def build( + spec: FilesSpec, + prefix: Union[str, TreeMaker] = pathlib.Path(), # type: ignore[assignment] +): + """ + Build a set of files/directories, as described by the spec. + + Each key represents a pathname, and the value represents + the content. Content may be a nested directory. + + >>> spec = { + ... 'README.txt': "A README file", + ... "foo": { + ... "__init__.py": "", + ... "bar": { + ... "__init__.py": "", + ... }, + ... "baz.py": "# Some code", + ... "bar.py": Symlink("baz.py"), + ... }, + ... "bing": Symlink("foo"), + ... } + >>> target = getfixture('tmp_path') + >>> build(spec, target) + >>> target.joinpath('foo/baz.py').read_text(encoding='utf-8') + '# Some code' + >>> target.joinpath('bing/bar.py').read_text(encoding='utf-8') + '# Some code' + """ + for name, contents in spec.items(): + create(contents, _ensure_tree_maker(prefix) / name) + + +@functools.singledispatch +def create(content: Union[str, bytes, FilesSpec], path): + path.mkdir(exist_ok=True) + build(content, prefix=path) # type: ignore[arg-type] + + +@create.register +def _(content: bytes, path): + path.write_bytes(content) + + +@create.register +def _(content: str, path): + path.write_text(content, encoding='utf-8') + + +@create.register +def _(content: Symlink, path): + path.symlink_to(content) + + +# end from jaraco.path +#### diff --git a/stdlib/test/test_importlib/resources/test_compatibilty_files.py b/stdlib/test/test_importlib/resources/test_compatibilty_files.py new file mode 100644 index 000000000..bcf608d9e --- /dev/null +++ b/stdlib/test/test_importlib/resources/test_compatibilty_files.py @@ -0,0 +1,104 @@ +import io +import unittest + +from importlib import resources + +from importlib.resources._adapters import ( + CompatibilityFiles, + wrap_spec, +) + +from . import util + + +class CompatibilityFilesTests(unittest.TestCase): + @property + def package(self): + bytes_data = io.BytesIO(b'Hello, world!') + return util.create_package( + file=bytes_data, + path='some_path', + contents=('a', 'b', 'c'), + ) + + @property + def files(self): + return resources.files(self.package) + + def test_spec_path_iter(self): + self.assertEqual( + sorted(path.name for path in self.files.iterdir()), + ['a', 'b', 'c'], + ) + + def test_child_path_iter(self): + self.assertEqual(list((self.files / 'a').iterdir()), []) + + def test_orphan_path_iter(self): + self.assertEqual(list((self.files / 'a' / 'a').iterdir()), []) + self.assertEqual(list((self.files / 'a' / 'a' / 'a').iterdir()), []) + + def test_spec_path_is(self): + self.assertFalse(self.files.is_file()) + self.assertFalse(self.files.is_dir()) + + def test_child_path_is(self): + self.assertTrue((self.files / 'a').is_file()) + self.assertFalse((self.files / 'a').is_dir()) + + def test_orphan_path_is(self): + self.assertFalse((self.files / 'a' / 'a').is_file()) + self.assertFalse((self.files / 'a' / 'a').is_dir()) + self.assertFalse((self.files / 'a' / 'a' / 'a').is_file()) + self.assertFalse((self.files / 'a' / 'a' / 'a').is_dir()) + + def test_spec_path_name(self): + self.assertEqual(self.files.name, 'testingpackage') + + def test_child_path_name(self): + self.assertEqual((self.files / 'a').name, 'a') + + def test_orphan_path_name(self): + self.assertEqual((self.files / 'a' / 'b').name, 'b') + self.assertEqual((self.files / 'a' / 'b' / 'c').name, 'c') + + def test_spec_path_open(self): + self.assertEqual(self.files.read_bytes(), b'Hello, world!') + self.assertEqual(self.files.read_text(encoding='utf-8'), 'Hello, world!') + + def test_child_path_open(self): + self.assertEqual((self.files / 'a').read_bytes(), b'Hello, world!') + self.assertEqual( + (self.files / 'a').read_text(encoding='utf-8'), 'Hello, world!' + ) + + def test_orphan_path_open(self): + with self.assertRaises(FileNotFoundError): + (self.files / 'a' / 'b').read_bytes() + with self.assertRaises(FileNotFoundError): + (self.files / 'a' / 'b' / 'c').read_bytes() + + def test_open_invalid_mode(self): + with self.assertRaises(ValueError): + self.files.open('0') + + def test_orphan_path_invalid(self): + with self.assertRaises(ValueError): + CompatibilityFiles.OrphanPath() + + def test_wrap_spec(self): + spec = wrap_spec(self.package) + self.assertIsInstance(spec.loader.get_resource_reader(None), CompatibilityFiles) + + +class CompatibilityFilesNoReaderTests(unittest.TestCase): + @property + def package(self): + return util.create_package_from_loader(None) + + @property + def files(self): + return resources.files(self.package) + + def test_spec_path_joinpath(self): + self.assertIsInstance(self.files / 'a', CompatibilityFiles.OrphanPath) diff --git a/stdlib/test/test_importlib/resources/test_contents.py b/stdlib/test/test_importlib/resources/test_contents.py new file mode 100644 index 000000000..4e4e0e9c3 --- /dev/null +++ b/stdlib/test/test_importlib/resources/test_contents.py @@ -0,0 +1,38 @@ +import unittest +from importlib import resources + +from . import util + + +class ContentsTests: + expected = { + '__init__.py', + 'binary.file', + 'subdirectory', + 'utf-16.file', + 'utf-8.file', + } + + def test_contents(self): + contents = {path.name for path in resources.files(self.data).iterdir()} + assert self.expected <= contents + + +class ContentsDiskTests(ContentsTests, util.DiskSetup, unittest.TestCase): + pass + + +class ContentsZipTests(ContentsTests, util.ZipSetup, unittest.TestCase): + pass + + +class ContentsNamespaceTests(ContentsTests, util.DiskSetup, unittest.TestCase): + MODULE = 'namespacedata01' + + expected = { + # no __init__ because of namespace design + 'binary.file', + 'subdirectory', + 'utf-16.file', + 'utf-8.file', + } diff --git a/stdlib/test/test_importlib/resources/test_custom.py b/stdlib/test/test_importlib/resources/test_custom.py new file mode 100644 index 000000000..640f90fc0 --- /dev/null +++ b/stdlib/test/test_importlib/resources/test_custom.py @@ -0,0 +1,48 @@ +import unittest +import contextlib +import pathlib + +from test.support import os_helper + +from importlib import resources +from importlib.resources import abc +from importlib.resources.abc import TraversableResources, ResourceReader +from . import util + + +class SimpleLoader: + """ + A simple loader that only implements a resource reader. + """ + + def __init__(self, reader: ResourceReader): + self.reader = reader + + def get_resource_reader(self, package): + return self.reader + + +class MagicResources(TraversableResources): + """ + Magically returns the resources at path. + """ + + def __init__(self, path: pathlib.Path): + self.path = path + + def files(self): + return self.path + + +class CustomTraversableResourcesTests(unittest.TestCase): + def setUp(self): + self.fixtures = contextlib.ExitStack() + self.addCleanup(self.fixtures.close) + + def test_custom_loader(self): + temp_dir = pathlib.Path(self.fixtures.enter_context(os_helper.temp_dir())) + loader = SimpleLoader(MagicResources(temp_dir)) + pkg = util.create_package_from_loader(loader) + files = resources.files(pkg) + assert isinstance(files, abc.Traversable) + assert list(files.iterdir()) == [] diff --git a/stdlib/test/test_importlib/resources/test_files.py b/stdlib/test/test_importlib/resources/test_files.py new file mode 100644 index 000000000..3ce44999f --- /dev/null +++ b/stdlib/test/test_importlib/resources/test_files.py @@ -0,0 +1,191 @@ +import pathlib +import py_compile +import textwrap +import unittest +import warnings +import importlib +import contextlib + +from importlib import resources +from importlib.resources.abc import Traversable +from . import util +from test.support import os_helper, import_helper + + +@contextlib.contextmanager +def suppress_known_deprecation(): + with warnings.catch_warnings(record=True) as ctx: + warnings.simplefilter('default', category=DeprecationWarning) + yield ctx + + +class FilesTests: + def test_read_bytes(self): + files = resources.files(self.data) + actual = files.joinpath('utf-8.file').read_bytes() + assert actual == b'Hello, UTF-8 world!\n' + + def test_read_text(self): + files = resources.files(self.data) + actual = files.joinpath('utf-8.file').read_text(encoding='utf-8') + assert actual == 'Hello, UTF-8 world!\n' + + def test_traversable(self): + assert isinstance(resources.files(self.data), Traversable) + + def test_joinpath_with_multiple_args(self): + files = resources.files(self.data) + binfile = files.joinpath('subdirectory', 'binary.file') + self.assertTrue(binfile.is_file()) + + def test_old_parameter(self): + """ + Files used to take a 'package' parameter. Make sure anyone + passing by name is still supported. + """ + with suppress_known_deprecation(): + resources.files(package=self.data) + + +class OpenDiskTests(FilesTests, util.DiskSetup, unittest.TestCase): + pass + + +class OpenZipTests(FilesTests, util.ZipSetup, unittest.TestCase): + pass + + +class OpenNamespaceTests(FilesTests, util.DiskSetup, unittest.TestCase): + MODULE = 'namespacedata01' + + def test_non_paths_in_dunder_path(self): + """ + Non-path items in a namespace package's ``__path__`` are ignored. + + As reported in python/importlib_resources#311, some tools + like Setuptools, when creating editable packages, will inject + non-paths into a namespace package's ``__path__``, a + sentinel like + ``__editable__.sample_namespace-1.0.finder.__path_hook__`` + to cause the ``PathEntryFinder`` to be called when searching + for packages. In that case, resources should still be loadable. + """ + import namespacedata01 + + namespacedata01.__path__.append( + '__editable__.sample_namespace-1.0.finder.__path_hook__' + ) + + resources.files(namespacedata01) + + +class OpenNamespaceZipTests(FilesTests, util.ZipSetup, unittest.TestCase): + ZIP_MODULE = 'namespacedata01' + + +class DirectSpec: + """ + Override behavior of ModuleSetup to write a full spec directly. + """ + + MODULE = 'unused' + + def load_fixture(self, name): + self.tree_on_path(self.spec) + + +class ModulesFiles: + spec = { + 'mod.py': '', + 'res.txt': 'resources are the best', + } + + def test_module_resources(self): + """ + A module can have resources found adjacent to the module. + """ + import mod # type: ignore[import-not-found] + + actual = resources.files(mod).joinpath('res.txt').read_text(encoding='utf-8') + assert actual == self.spec['res.txt'] + + +class ModuleFilesDiskTests(DirectSpec, util.DiskSetup, ModulesFiles, unittest.TestCase): + pass + + +class ModuleFilesZipTests(DirectSpec, util.ZipSetup, ModulesFiles, unittest.TestCase): + pass + + +class ImplicitContextFiles: + set_val = textwrap.dedent( + f""" + import {resources.__name__} as res + val = res.files().joinpath('res.txt').read_text(encoding='utf-8') + """ + ) + spec = { + 'somepkg': { + '__init__.py': set_val, + 'submod.py': set_val, + 'res.txt': 'resources are the best', + }, + 'frozenpkg': { + '__init__.py': set_val.replace(resources.__name__, 'c_resources'), + 'res.txt': 'resources are the best', + }, + } + + def test_implicit_files_package(self): + """ + Without any parameter, files() will infer the location as the caller. + """ + assert importlib.import_module('somepkg').val == 'resources are the best' + + def test_implicit_files_submodule(self): + """ + Without any parameter, files() will infer the location as the caller. + """ + assert importlib.import_module('somepkg.submod').val == 'resources are the best' + + def _compile_importlib(self): + """ + Make a compiled-only copy of the importlib resources package. + + Currently only code is copied, as importlib resources doesn't itself + have any resources. + """ + bin_site = self.fixtures.enter_context(os_helper.temp_dir()) + c_resources = pathlib.Path(bin_site, 'c_resources') + sources = pathlib.Path(resources.__file__).parent + + for source_path in sources.glob('**/*.py'): + c_path = c_resources.joinpath(source_path.relative_to(sources)).with_suffix('.pyc') + py_compile.compile(source_path, c_path) + self.fixtures.enter_context(import_helper.DirsOnSysPath(bin_site)) + + def test_implicit_files_with_compiled_importlib(self): + """ + Caller detection works for compiled-only resources module. + + python/cpython#123085 + """ + self._compile_importlib() + assert importlib.import_module('frozenpkg').val == 'resources are the best' + + +class ImplicitContextFilesDiskTests( + DirectSpec, util.DiskSetup, ImplicitContextFiles, unittest.TestCase +): + pass + + +class ImplicitContextFilesZipTests( + DirectSpec, util.ZipSetup, ImplicitContextFiles, unittest.TestCase +): + pass + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/resources/test_functional.py b/stdlib/test/test_importlib/resources/test_functional.py new file mode 100644 index 000000000..e8d25fa4d --- /dev/null +++ b/stdlib/test/test_importlib/resources/test_functional.py @@ -0,0 +1,249 @@ +import unittest +import os +import importlib + +from test.support import warnings_helper + +from importlib import resources + +from . import util + +# Since the functional API forwards to Traversable, we only test +# filesystem resources here -- not zip files, namespace packages etc. +# We do test for two kinds of Anchor, though. + + +class StringAnchorMixin: + anchor01 = 'data01' + anchor02 = 'data02' + + +class ModuleAnchorMixin: + @property + def anchor01(self): + return importlib.import_module('data01') + + @property + def anchor02(self): + return importlib.import_module('data02') + + +class FunctionalAPIBase(util.DiskSetup): + def setUp(self): + super().setUp() + self.load_fixture('data02') + + def _gen_resourcetxt_path_parts(self): + """Yield various names of a text file in anchor02, each in a subTest""" + for path_parts in ( + ('subdirectory', 'subsubdir', 'resource.txt'), + ('subdirectory/subsubdir/resource.txt',), + ('subdirectory/subsubdir', 'resource.txt'), + ): + with self.subTest(path_parts=path_parts): + yield path_parts + + def test_read_text(self): + self.assertEqual( + resources.read_text(self.anchor01, 'utf-8.file'), + 'Hello, UTF-8 world!\n', + ) + self.assertEqual( + resources.read_text( + self.anchor02, + 'subdirectory', + 'subsubdir', + 'resource.txt', + encoding='utf-8', + ), + 'a resource', + ) + for path_parts in self._gen_resourcetxt_path_parts(): + self.assertEqual( + resources.read_text( + self.anchor02, + *path_parts, + encoding='utf-8', + ), + 'a resource', + ) + # Use generic OSError, since e.g. attempting to read a directory can + # fail with PermissionError rather than IsADirectoryError + with self.assertRaises(OSError): + resources.read_text(self.anchor01) + with self.assertRaises(OSError): + resources.read_text(self.anchor01, 'no-such-file') + with self.assertRaises(UnicodeDecodeError): + resources.read_text(self.anchor01, 'utf-16.file') + self.assertEqual( + resources.read_text( + self.anchor01, + 'binary.file', + encoding='latin1', + ), + '\x00\x01\x02\x03', + ) + self.assertEndsWith( # ignore the BOM + resources.read_text( + self.anchor01, + 'utf-16.file', + errors='backslashreplace', + ), + 'Hello, UTF-16 world!\n'.encode('utf-16-le').decode( + errors='backslashreplace', + ), + ) + + def test_read_binary(self): + self.assertEqual( + resources.read_binary(self.anchor01, 'utf-8.file'), + b'Hello, UTF-8 world!\n', + ) + for path_parts in self._gen_resourcetxt_path_parts(): + self.assertEqual( + resources.read_binary(self.anchor02, *path_parts), + b'a resource', + ) + + def test_open_text(self): + with resources.open_text(self.anchor01, 'utf-8.file') as f: + self.assertEqual(f.read(), 'Hello, UTF-8 world!\n') + for path_parts in self._gen_resourcetxt_path_parts(): + with resources.open_text( + self.anchor02, + *path_parts, + encoding='utf-8', + ) as f: + self.assertEqual(f.read(), 'a resource') + # Use generic OSError, since e.g. attempting to read a directory can + # fail with PermissionError rather than IsADirectoryError + with self.assertRaises(OSError): + resources.open_text(self.anchor01) + with self.assertRaises(OSError): + resources.open_text(self.anchor01, 'no-such-file') + with resources.open_text(self.anchor01, 'utf-16.file') as f: + with self.assertRaises(UnicodeDecodeError): + f.read() + with resources.open_text( + self.anchor01, + 'binary.file', + encoding='latin1', + ) as f: + self.assertEqual(f.read(), '\x00\x01\x02\x03') + with resources.open_text( + self.anchor01, + 'utf-16.file', + errors='backslashreplace', + ) as f: + self.assertEndsWith( # ignore the BOM + f.read(), + 'Hello, UTF-16 world!\n'.encode('utf-16-le').decode( + errors='backslashreplace', + ), + ) + + def test_open_binary(self): + with resources.open_binary(self.anchor01, 'utf-8.file') as f: + self.assertEqual(f.read(), b'Hello, UTF-8 world!\n') + for path_parts in self._gen_resourcetxt_path_parts(): + with resources.open_binary( + self.anchor02, + *path_parts, + ) as f: + self.assertEqual(f.read(), b'a resource') + + def test_path(self): + with resources.path(self.anchor01, 'utf-8.file') as path: + with open(str(path), encoding='utf-8') as f: + self.assertEqual(f.read(), 'Hello, UTF-8 world!\n') + with resources.path(self.anchor01) as path: + with open(os.path.join(path, 'utf-8.file'), encoding='utf-8') as f: + self.assertEqual(f.read(), 'Hello, UTF-8 world!\n') + + def test_is_resource(self): + is_resource = resources.is_resource + self.assertTrue(is_resource(self.anchor01, 'utf-8.file')) + self.assertFalse(is_resource(self.anchor01, 'no_such_file')) + self.assertFalse(is_resource(self.anchor01)) + self.assertFalse(is_resource(self.anchor01, 'subdirectory')) + for path_parts in self._gen_resourcetxt_path_parts(): + self.assertTrue(is_resource(self.anchor02, *path_parts)) + + def test_contents(self): + with warnings_helper.check_warnings((".*contents.*", DeprecationWarning)): + c = resources.contents(self.anchor01) + self.assertGreaterEqual( + set(c), + {'utf-8.file', 'utf-16.file', 'binary.file', 'subdirectory'}, + ) + with self.assertRaises(OSError), warnings_helper.check_warnings(( + ".*contents.*", + DeprecationWarning, + )): + list(resources.contents(self.anchor01, 'utf-8.file')) + + for path_parts in self._gen_resourcetxt_path_parts(): + with self.assertRaises(OSError), warnings_helper.check_warnings(( + ".*contents.*", + DeprecationWarning, + )): + list(resources.contents(self.anchor01, *path_parts)) + with warnings_helper.check_warnings((".*contents.*", DeprecationWarning)): + c = resources.contents(self.anchor01, 'subdirectory') + self.assertGreaterEqual( + set(c), + {'binary.file'}, + ) + + @warnings_helper.ignore_warnings(category=DeprecationWarning) + def test_common_errors(self): + for func in ( + resources.read_text, + resources.read_binary, + resources.open_text, + resources.open_binary, + resources.path, + resources.is_resource, + resources.contents, + ): + with self.subTest(func=func): + # Rejecting None anchor + with self.assertRaises(TypeError): + func(None) + # Rejecting invalid anchor type + with self.assertRaises((TypeError, AttributeError)): + func(1234) + # Unknown module + with self.assertRaises(ModuleNotFoundError): + func('$missing module$') + + def test_text_errors(self): + for func in ( + resources.read_text, + resources.open_text, + ): + with self.subTest(func=func): + # Multiple path arguments need explicit encoding argument. + with self.assertRaises(TypeError): + func( + self.anchor02, + 'subdirectory', + 'subsubdir', + 'resource.txt', + ) + + +class FunctionalAPITest_StringAnchor( + StringAnchorMixin, + FunctionalAPIBase, + unittest.TestCase, +): + pass + + +class FunctionalAPITest_ModuleAnchor( + ModuleAnchorMixin, + FunctionalAPIBase, + unittest.TestCase, +): + pass diff --git a/stdlib/test/test_importlib/resources/test_open.py b/stdlib/test/test_importlib/resources/test_open.py new file mode 100644 index 000000000..8c00378ad --- /dev/null +++ b/stdlib/test/test_importlib/resources/test_open.py @@ -0,0 +1,84 @@ +import unittest + +from importlib import resources +from . import util + + +class CommonBinaryTests(util.CommonTests, unittest.TestCase): + def execute(self, package, path): + target = resources.files(package).joinpath(path) + with target.open('rb'): + pass + + +class CommonTextTests(util.CommonTests, unittest.TestCase): + def execute(self, package, path): + target = resources.files(package).joinpath(path) + with target.open(encoding='utf-8'): + pass + + +class OpenTests: + def test_open_binary(self): + target = resources.files(self.data) / 'binary.file' + with target.open('rb') as fp: + result = fp.read() + self.assertEqual(result, bytes(range(4))) + + def test_open_text_default_encoding(self): + target = resources.files(self.data) / 'utf-8.file' + with target.open(encoding='utf-8') as fp: + result = fp.read() + self.assertEqual(result, 'Hello, UTF-8 world!\n') + + def test_open_text_given_encoding(self): + target = resources.files(self.data) / 'utf-16.file' + with target.open(encoding='utf-16', errors='strict') as fp: + result = fp.read() + self.assertEqual(result, 'Hello, UTF-16 world!\n') + + def test_open_text_with_errors(self): + """ + Raises UnicodeError without the 'errors' argument. + """ + target = resources.files(self.data) / 'utf-16.file' + with target.open(encoding='utf-8', errors='strict') as fp: + self.assertRaises(UnicodeError, fp.read) + with target.open(encoding='utf-8', errors='ignore') as fp: + result = fp.read() + self.assertEqual( + result, + 'H\x00e\x00l\x00l\x00o\x00,\x00 ' + '\x00U\x00T\x00F\x00-\x001\x006\x00 ' + '\x00w\x00o\x00r\x00l\x00d\x00!\x00\n\x00', + ) + + def test_open_binary_FileNotFoundError(self): + target = resources.files(self.data) / 'does-not-exist' + with self.assertRaises(FileNotFoundError): + target.open('rb') + + def test_open_text_FileNotFoundError(self): + target = resources.files(self.data) / 'does-not-exist' + with self.assertRaises(FileNotFoundError): + target.open(encoding='utf-8') + + +class OpenDiskTests(OpenTests, util.DiskSetup, unittest.TestCase): + pass + + +class OpenDiskNamespaceTests(OpenTests, util.DiskSetup, unittest.TestCase): + MODULE = 'namespacedata01' + + +class OpenZipTests(OpenTests, util.ZipSetup, unittest.TestCase): + pass + + +class OpenNamespaceZipTests(OpenTests, util.ZipSetup, unittest.TestCase): + MODULE = 'namespacedata01' + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/resources/test_path.py b/stdlib/test/test_importlib/resources/test_path.py new file mode 100644 index 000000000..903911f57 --- /dev/null +++ b/stdlib/test/test_importlib/resources/test_path.py @@ -0,0 +1,60 @@ +import io +import pathlib +import unittest + +from importlib import resources +from . import util + + +class CommonTests(util.CommonTests, unittest.TestCase): + def execute(self, package, path): + with resources.as_file(resources.files(package).joinpath(path)): + pass + + +class PathTests: + def test_reading(self): + """ + Path should be readable and a pathlib.Path instance. + """ + target = resources.files(self.data) / 'utf-8.file' + with resources.as_file(target) as path: + self.assertIsInstance(path, pathlib.Path) + self.assertEndsWith(path.name, "utf-8.file") + self.assertEqual('Hello, UTF-8 world!\n', path.read_text(encoding='utf-8')) + + +class PathDiskTests(PathTests, util.DiskSetup, unittest.TestCase): + def test_natural_path(self): + # Guarantee the internal implementation detail that + # file-system-backed resources do not get the tempdir + # treatment. + target = resources.files(self.data) / 'utf-8.file' + with resources.as_file(target) as path: + assert 'data' in str(path) + + +class PathMemoryTests(PathTests, unittest.TestCase): + def setUp(self): + file = io.BytesIO(b'Hello, UTF-8 world!\n') + self.addCleanup(file.close) + self.data = util.create_package( + file=file, path=FileNotFoundError("package exists only in memory") + ) + self.data.__spec__.origin = None + self.data.__spec__.has_location = False + + +class PathZipTests(PathTests, util.ZipSetup, unittest.TestCase): + def test_remove_in_context_manager(self): + """ + It is not an error if the file that was temporarily stashed on the + file system is removed inside the `with` stanza. + """ + target = resources.files(self.data) / 'utf-8.file' + with resources.as_file(target) as path: + path.unlink() + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/resources/test_read.py b/stdlib/test/test_importlib/resources/test_read.py new file mode 100644 index 000000000..59c237d96 --- /dev/null +++ b/stdlib/test/test_importlib/resources/test_read.py @@ -0,0 +1,93 @@ +import unittest + +from importlib import import_module, resources + +from . import util + + +class CommonBinaryTests(util.CommonTests, unittest.TestCase): + def execute(self, package, path): + resources.files(package).joinpath(path).read_bytes() + + +class CommonTextTests(util.CommonTests, unittest.TestCase): + def execute(self, package, path): + resources.files(package).joinpath(path).read_text(encoding='utf-8') + + +class ReadTests: + def test_read_bytes(self): + result = resources.files(self.data).joinpath('binary.file').read_bytes() + self.assertEqual(result, bytes(range(4))) + + def test_read_text_default_encoding(self): + result = ( + resources.files(self.data) + .joinpath('utf-8.file') + .read_text(encoding='utf-8') + ) + self.assertEqual(result, 'Hello, UTF-8 world!\n') + + def test_read_text_given_encoding(self): + result = ( + resources.files(self.data) + .joinpath('utf-16.file') + .read_text(encoding='utf-16') + ) + self.assertEqual(result, 'Hello, UTF-16 world!\n') + + def test_read_text_with_errors(self): + """ + Raises UnicodeError without the 'errors' argument. + """ + target = resources.files(self.data) / 'utf-16.file' + self.assertRaises(UnicodeError, target.read_text, encoding='utf-8') + result = target.read_text(encoding='utf-8', errors='ignore') + self.assertEqual( + result, + 'H\x00e\x00l\x00l\x00o\x00,\x00 ' + '\x00U\x00T\x00F\x00-\x001\x006\x00 ' + '\x00w\x00o\x00r\x00l\x00d\x00!\x00\n\x00', + ) + + +class ReadDiskTests(ReadTests, util.DiskSetup, unittest.TestCase): + pass + + +class ReadZipTests(ReadTests, util.ZipSetup, unittest.TestCase): + def test_read_submodule_resource(self): + submodule = import_module('data01.subdirectory') + result = resources.files(submodule).joinpath('binary.file').read_bytes() + self.assertEqual(result, bytes(range(4, 8))) + + def test_read_submodule_resource_by_name(self): + result = ( + resources.files('data01.subdirectory').joinpath('binary.file').read_bytes() + ) + self.assertEqual(result, bytes(range(4, 8))) + + +class ReadNamespaceTests(ReadTests, util.DiskSetup, unittest.TestCase): + MODULE = 'namespacedata01' + + +class ReadNamespaceZipTests(ReadTests, util.ZipSetup, unittest.TestCase): + MODULE = 'namespacedata01' + + def test_read_submodule_resource(self): + submodule = import_module('namespacedata01.subdirectory') + result = resources.files(submodule).joinpath('binary.file').read_bytes() + self.assertEqual(result, bytes(range(12, 16))) + + def test_read_submodule_resource_by_name(self): + result = ( + resources.files('namespacedata01.subdirectory') + .joinpath('binary.file') + .read_bytes() + ) + self.assertEqual(result, bytes(range(12, 16))) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/resources/test_reader.py b/stdlib/test/test_importlib/resources/test_reader.py new file mode 100644 index 000000000..ed5693ab4 --- /dev/null +++ b/stdlib/test/test_importlib/resources/test_reader.py @@ -0,0 +1,137 @@ +import os.path +import pathlib +import unittest + +from importlib import import_module +from importlib.readers import MultiplexedPath, NamespaceReader + +from . import util + + +class MultiplexedPathTest(util.DiskSetup, unittest.TestCase): + MODULE = 'namespacedata01' + + def setUp(self): + super().setUp() + self.folder = pathlib.Path(self.data.__path__[0]) + self.data01 = pathlib.Path(self.load_fixture('data01').__file__).parent + self.data02 = pathlib.Path(self.load_fixture('data02').__file__).parent + + def test_init_no_paths(self): + with self.assertRaises(FileNotFoundError): + MultiplexedPath() + + def test_init_file(self): + with self.assertRaises(NotADirectoryError): + MultiplexedPath(self.folder / 'binary.file') + + def test_iterdir(self): + contents = {path.name for path in MultiplexedPath(self.folder).iterdir()} + try: + contents.remove('__pycache__') + except (KeyError, ValueError): + pass + self.assertEqual( + contents, {'subdirectory', 'binary.file', 'utf-16.file', 'utf-8.file'} + ) + + def test_iterdir_duplicate(self): + contents = { + path.name for path in MultiplexedPath(self.folder, self.data01).iterdir() + } + for remove in ('__pycache__', '__init__.pyc'): + try: + contents.remove(remove) + except (KeyError, ValueError): + pass + self.assertEqual( + contents, + {'__init__.py', 'binary.file', 'subdirectory', 'utf-16.file', 'utf-8.file'}, + ) + + def test_is_dir(self): + self.assertEqual(MultiplexedPath(self.folder).is_dir(), True) + + def test_is_file(self): + self.assertEqual(MultiplexedPath(self.folder).is_file(), False) + + def test_open_file(self): + path = MultiplexedPath(self.folder) + with self.assertRaises(FileNotFoundError): + path.read_bytes() + with self.assertRaises(FileNotFoundError): + path.read_text() + with self.assertRaises(FileNotFoundError): + path.open() + + def test_join_path(self): + prefix = str(self.folder.parent) + path = MultiplexedPath(self.folder, self.data01) + self.assertEqual( + str(path.joinpath('binary.file'))[len(prefix) + 1 :], + os.path.join('namespacedata01', 'binary.file'), + ) + sub = path.joinpath('subdirectory') + assert isinstance(sub, MultiplexedPath) + assert 'namespacedata01' in str(sub) + assert 'data01' in str(sub) + self.assertEqual( + str(path.joinpath('imaginary'))[len(prefix) + 1 :], + os.path.join('namespacedata01', 'imaginary'), + ) + self.assertEqual(path.joinpath(), path) + + def test_join_path_compound(self): + path = MultiplexedPath(self.folder) + assert not path.joinpath('imaginary/foo.py').exists() + + def test_join_path_common_subdir(self): + prefix = str(self.data02.parent) + path = MultiplexedPath(self.data01, self.data02) + self.assertIsInstance(path.joinpath('subdirectory'), MultiplexedPath) + self.assertEqual( + str(path.joinpath('subdirectory', 'subsubdir'))[len(prefix) + 1 :], + os.path.join('data02', 'subdirectory', 'subsubdir'), + ) + + def test_repr(self): + self.assertEqual( + repr(MultiplexedPath(self.folder)), + f"MultiplexedPath('{self.folder}')", + ) + + def test_name(self): + self.assertEqual( + MultiplexedPath(self.folder).name, + os.path.basename(self.folder), + ) + + +class NamespaceReaderTest(util.DiskSetup, unittest.TestCase): + MODULE = 'namespacedata01' + + def test_init_error(self): + with self.assertRaises(ValueError): + NamespaceReader(['path1', 'path2']) + + def test_resource_path(self): + namespacedata01 = import_module('namespacedata01') + reader = NamespaceReader(namespacedata01.__spec__.submodule_search_locations) + + root = self.data.__path__[0] + self.assertEqual( + reader.resource_path('binary.file'), os.path.join(root, 'binary.file') + ) + self.assertEqual( + reader.resource_path('imaginary'), os.path.join(root, 'imaginary') + ) + + def test_files(self): + reader = NamespaceReader(self.data.__spec__.submodule_search_locations) + root = self.data.__path__[0] + self.assertIsInstance(reader.files(), MultiplexedPath) + self.assertEqual(repr(reader.files()), f"MultiplexedPath('{root}')") + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/resources/test_resource.py b/stdlib/test/test_importlib/resources/test_resource.py new file mode 100644 index 000000000..fcede14b8 --- /dev/null +++ b/stdlib/test/test_importlib/resources/test_resource.py @@ -0,0 +1,236 @@ +import unittest + +from . import util +from importlib import resources, import_module + + +class ResourceTests: + # Subclasses are expected to set the `data` attribute. + + def test_is_file_exists(self): + target = resources.files(self.data) / 'binary.file' + self.assertTrue(target.is_file()) + + def test_is_file_missing(self): + target = resources.files(self.data) / 'not-a-file' + self.assertFalse(target.is_file()) + + def test_is_dir(self): + target = resources.files(self.data) / 'subdirectory' + self.assertFalse(target.is_file()) + self.assertTrue(target.is_dir()) + + +class ResourceDiskTests(ResourceTests, util.DiskSetup, unittest.TestCase): + pass + + +class ResourceZipTests(ResourceTests, util.ZipSetup, unittest.TestCase): + pass + + +def names(traversable): + return {item.name for item in traversable.iterdir()} + + +class ResourceLoaderTests(util.DiskSetup, unittest.TestCase): + def test_resource_contents(self): + package = util.create_package( + file=self.data, path=self.data.__file__, contents=['A', 'B', 'C'] + ) + self.assertEqual(names(resources.files(package)), {'A', 'B', 'C'}) + + def test_is_file(self): + package = util.create_package( + file=self.data, + path=self.data.__file__, + contents=['A', 'B', 'C', 'D/E', 'D/F'], + ) + self.assertTrue(resources.files(package).joinpath('B').is_file()) + + def test_is_dir(self): + package = util.create_package( + file=self.data, + path=self.data.__file__, + contents=['A', 'B', 'C', 'D/E', 'D/F'], + ) + self.assertTrue(resources.files(package).joinpath('D').is_dir()) + + def test_resource_missing(self): + package = util.create_package( + file=self.data, + path=self.data.__file__, + contents=['A', 'B', 'C', 'D/E', 'D/F'], + ) + self.assertFalse(resources.files(package).joinpath('Z').is_file()) + + +class ResourceCornerCaseTests(util.DiskSetup, unittest.TestCase): + def test_package_has_no_reader_fallback(self): + """ + Test odd ball packages which: + # 1. Do not have a ResourceReader as a loader + # 2. Are not on the file system + # 3. Are not in a zip file + """ + module = util.create_package( + file=self.data, path=self.data.__file__, contents=['A', 'B', 'C'] + ) + # Give the module a dummy loader. + module.__loader__ = object() + # Give the module a dummy origin. + module.__file__ = '/path/which/shall/not/be/named' + module.__spec__.loader = module.__loader__ + module.__spec__.origin = module.__file__ + self.assertFalse(resources.files(module).joinpath('A').is_file()) + + +class ResourceFromZipsTest01(util.ZipSetup, unittest.TestCase): + def test_is_submodule_resource(self): + submodule = import_module('data01.subdirectory') + self.assertTrue(resources.files(submodule).joinpath('binary.file').is_file()) + + def test_read_submodule_resource_by_name(self): + self.assertTrue( + resources.files('data01.subdirectory').joinpath('binary.file').is_file() + ) + + def test_submodule_contents(self): + submodule = import_module('data01.subdirectory') + self.assertEqual( + names(resources.files(submodule)), {'__init__.py', 'binary.file'} + ) + + def test_submodule_contents_by_name(self): + self.assertEqual( + names(resources.files('data01.subdirectory')), + {'__init__.py', 'binary.file'}, + ) + + def test_as_file_directory(self): + with resources.as_file(resources.files('data01')) as data: + assert data.name == 'data01' + assert data.is_dir() + assert data.joinpath('subdirectory').is_dir() + assert len(list(data.iterdir())) + assert not data.parent.exists() + + +class ResourceFromZipsTest02(util.ZipSetup, unittest.TestCase): + MODULE = 'data02' + + def test_unrelated_contents(self): + """ + Test thata zip with two unrelated subpackages return + distinct resources. Ref python/importlib_resources#44. + """ + self.assertEqual( + names(resources.files('data02.one')), + {'__init__.py', 'resource1.txt'}, + ) + self.assertEqual( + names(resources.files('data02.two')), + {'__init__.py', 'resource2.txt'}, + ) + + +class DeletingZipsTest(util.ZipSetup, unittest.TestCase): + """Having accessed resources in a zip file should not keep an open + reference to the zip. + """ + + def test_iterdir_does_not_keep_open(self): + [item.name for item in resources.files('data01').iterdir()] + + def test_is_file_does_not_keep_open(self): + resources.files('data01').joinpath('binary.file').is_file() + + def test_is_file_failure_does_not_keep_open(self): + resources.files('data01').joinpath('not-present').is_file() + + @unittest.skip("Desired but not supported.") + def test_as_file_does_not_keep_open(self): # pragma: no cover + resources.as_file(resources.files('data01') / 'binary.file') + + def test_entered_path_does_not_keep_open(self): + """ + Mimic what certifi does on import to make its bundle + available for the process duration. + """ + resources.as_file(resources.files('data01') / 'binary.file').__enter__() + + def test_read_binary_does_not_keep_open(self): + resources.files('data01').joinpath('binary.file').read_bytes() + + def test_read_text_does_not_keep_open(self): + resources.files('data01').joinpath('utf-8.file').read_text(encoding='utf-8') + + +class ResourceFromNamespaceTests: + def test_is_submodule_resource(self): + self.assertTrue( + resources.files(import_module('namespacedata01')) + .joinpath('binary.file') + .is_file() + ) + + def test_read_submodule_resource_by_name(self): + self.assertTrue( + resources.files('namespacedata01').joinpath('binary.file').is_file() + ) + + def test_submodule_contents(self): + contents = names(resources.files(import_module('namespacedata01'))) + try: + contents.remove('__pycache__') + except KeyError: + pass + self.assertEqual( + contents, {'subdirectory', 'binary.file', 'utf-8.file', 'utf-16.file'} + ) + + def test_submodule_contents_by_name(self): + contents = names(resources.files('namespacedata01')) + try: + contents.remove('__pycache__') + except KeyError: + pass + self.assertEqual( + contents, {'subdirectory', 'binary.file', 'utf-8.file', 'utf-16.file'} + ) + + def test_submodule_sub_contents(self): + contents = names(resources.files(import_module('namespacedata01.subdirectory'))) + try: + contents.remove('__pycache__') + except KeyError: + pass + self.assertEqual(contents, {'binary.file'}) + + def test_submodule_sub_contents_by_name(self): + contents = names(resources.files('namespacedata01.subdirectory')) + try: + contents.remove('__pycache__') + except KeyError: + pass + self.assertEqual(contents, {'binary.file'}) + + +class ResourceFromNamespaceDiskTests( + util.DiskSetup, + ResourceFromNamespaceTests, + unittest.TestCase, +): + MODULE = 'namespacedata01' + + +class ResourceFromNamespaceZipTests( + util.ZipSetup, + ResourceFromNamespaceTests, + unittest.TestCase, +): + MODULE = 'namespacedata01' + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/resources/util.py b/stdlib/test/test_importlib/resources/util.py new file mode 100644 index 000000000..e2d995f59 --- /dev/null +++ b/stdlib/test/test_importlib/resources/util.py @@ -0,0 +1,206 @@ +import abc +import importlib +import io +import sys +import types +import pathlib +import contextlib + +from importlib.resources.abc import ResourceReader +from test.support import import_helper, os_helper +from . import zip as zip_ +from . import _path + + +from importlib.machinery import ModuleSpec + + +class Reader(ResourceReader): + def __init__(self, **kwargs): + vars(self).update(kwargs) + + def get_resource_reader(self, package): + return self + + def open_resource(self, path): + self._path = path + if isinstance(self.file, Exception): + raise self.file + return self.file + + def resource_path(self, path_): + self._path = path_ + if isinstance(self.path, Exception): + raise self.path + return self.path + + def is_resource(self, path_): + self._path = path_ + if isinstance(self.path, Exception): + raise self.path + + def part(entry): + return entry.split('/') + + return any( + len(parts) == 1 and parts[0] == path_ for parts in map(part, self._contents) + ) + + def contents(self): + if isinstance(self.path, Exception): + raise self.path + yield from self._contents + + +def create_package_from_loader(loader, is_package=True): + name = 'testingpackage' + module = types.ModuleType(name) + spec = ModuleSpec(name, loader, origin='does-not-exist', is_package=is_package) + module.__spec__ = spec + module.__loader__ = loader + return module + + +def create_package(file=None, path=None, is_package=True, contents=()): + return create_package_from_loader( + Reader(file=file, path=path, _contents=contents), + is_package, + ) + + +class CommonTestsBase(metaclass=abc.ABCMeta): + """ + Tests shared by test_open, test_path, and test_read. + """ + + @abc.abstractmethod + def execute(self, package, path): + """ + Call the pertinent legacy API function (e.g. open_text, path) + on package and path. + """ + + def test_package_name(self): + """ + Passing in the package name should succeed. + """ + self.execute(self.data.__name__, 'utf-8.file') + + def test_package_object(self): + """ + Passing in the package itself should succeed. + """ + self.execute(self.data, 'utf-8.file') + + def test_string_path(self): + """ + Passing in a string for the path should succeed. + """ + path = 'utf-8.file' + self.execute(self.data, path) + + def test_pathlib_path(self): + """ + Passing in a pathlib.PurePath object for the path should succeed. + """ + path = pathlib.PurePath('utf-8.file') + self.execute(self.data, path) + + def test_importing_module_as_side_effect(self): + """ + The anchor package can already be imported. + """ + del sys.modules[self.data.__name__] + self.execute(self.data.__name__, 'utf-8.file') + + def test_missing_path(self): + """ + Attempting to open or read or request the path for a + non-existent path should succeed if open_resource + can return a viable data stream. + """ + bytes_data = io.BytesIO(b'Hello, world!') + package = create_package(file=bytes_data, path=FileNotFoundError()) + self.execute(package, 'utf-8.file') + self.assertEqual(package.__loader__._path, 'utf-8.file') + + def test_extant_path(self): + # Attempting to open or read or request the path when the + # path does exist should still succeed. Does not assert + # anything about the result. + bytes_data = io.BytesIO(b'Hello, world!') + # any path that exists + path = __file__ + package = create_package(file=bytes_data, path=path) + self.execute(package, 'utf-8.file') + self.assertEqual(package.__loader__._path, 'utf-8.file') + + def test_useless_loader(self): + package = create_package(file=FileNotFoundError(), path=FileNotFoundError()) + with self.assertRaises(FileNotFoundError): + self.execute(package, 'utf-8.file') + + +fixtures = dict( + data01={ + '__init__.py': '', + 'binary.file': bytes(range(4)), + 'utf-16.file': '\ufeffHello, UTF-16 world!\n'.encode('utf-16-le'), + 'utf-8.file': 'Hello, UTF-8 world!\n'.encode('utf-8'), + 'subdirectory': { + '__init__.py': '', + 'binary.file': bytes(range(4, 8)), + }, + }, + data02={ + '__init__.py': '', + 'one': {'__init__.py': '', 'resource1.txt': 'one resource'}, + 'two': {'__init__.py': '', 'resource2.txt': 'two resource'}, + 'subdirectory': {'subsubdir': {'resource.txt': 'a resource'}}, + }, + namespacedata01={ + 'binary.file': bytes(range(4)), + 'utf-16.file': '\ufeffHello, UTF-16 world!\n'.encode('utf-16-le'), + 'utf-8.file': 'Hello, UTF-8 world!\n'.encode('utf-8'), + 'subdirectory': { + 'binary.file': bytes(range(12, 16)), + }, + }, +) + + +class ModuleSetup: + def setUp(self): + self.fixtures = contextlib.ExitStack() + self.addCleanup(self.fixtures.close) + + self.fixtures.enter_context(import_helper.isolated_modules()) + self.data = self.load_fixture(self.MODULE) + + def load_fixture(self, module): + self.tree_on_path({module: fixtures[module]}) + return importlib.import_module(module) + + +class ZipSetup(ModuleSetup): + MODULE = 'data01' + + def tree_on_path(self, spec): + temp_dir = self.fixtures.enter_context(os_helper.temp_dir()) + modules = pathlib.Path(temp_dir) / 'zipped modules.zip' + self.fixtures.enter_context( + import_helper.DirsOnSysPath(str(zip_.make_zip_file(spec, modules))) + ) + + +class DiskSetup(ModuleSetup): + MODULE = 'data01' + + def tree_on_path(self, spec): + temp_dir = self.fixtures.enter_context(os_helper.temp_dir()) + _path.build(spec, pathlib.Path(temp_dir)) + self.fixtures.enter_context(import_helper.DirsOnSysPath(temp_dir)) + + +class CommonTests(DiskSetup, CommonTestsBase): + pass diff --git a/stdlib/test/test_importlib/resources/zip.py b/stdlib/test/test_importlib/resources/zip.py new file mode 100755 index 000000000..fc453f020 --- /dev/null +++ b/stdlib/test/test_importlib/resources/zip.py @@ -0,0 +1,24 @@ +""" +Generate zip test data files. +""" + +import zipfile + + +def make_zip_file(tree, dst): + """ + Zip the files in tree into a new zipfile at dst. + """ + with zipfile.ZipFile(dst, 'w') as zf: + for name, contents in walk(tree): + zf.writestr(name, contents) + zipfile._path.CompleteDirs.inject(zf) + return dst + + +def walk(tree, prefix=''): + for name, contents in tree.items(): + if isinstance(contents, dict): + yield from walk(contents, prefix=f'{prefix}{name}/') + else: + yield f'{prefix}{name}', contents diff --git a/stdlib/test/test_importlib/source/__init__.py b/stdlib/test/test_importlib/source/__init__.py new file mode 100644 index 000000000..4b16ecc31 --- /dev/null +++ b/stdlib/test/test_importlib/source/__init__.py @@ -0,0 +1,5 @@ +import os +from test.support import load_package_tests + +def load_tests(*args): + return load_package_tests(os.path.dirname(__file__), *args) diff --git a/stdlib/test/test_importlib/source/__main__.py b/stdlib/test/test_importlib/source/__main__.py new file mode 100644 index 000000000..40a23a297 --- /dev/null +++ b/stdlib/test/test_importlib/source/__main__.py @@ -0,0 +1,4 @@ +from . import load_tests +import unittest + +unittest.main() diff --git a/stdlib/test/test_importlib/source/test_case_sensitivity.py b/stdlib/test/test_importlib/source/test_case_sensitivity.py new file mode 100644 index 000000000..e52829e62 --- /dev/null +++ b/stdlib/test/test_importlib/source/test_case_sensitivity.py @@ -0,0 +1,78 @@ +"""Test case-sensitivity (PEP 235).""" +import sys + +from test.test_importlib import util + +importlib = util.import_importlib('importlib') +machinery = util.import_importlib('importlib.machinery') + +import os +from test.support import os_helper +import unittest + + +@util.case_insensitive_tests +class CaseSensitivityTest(util.CASEOKTestBase): + + """PEP 235 dictates that on case-preserving, case-insensitive file systems + that imports are case-sensitive unless the PYTHONCASEOK environment + variable is set.""" + + name = 'MoDuLe' + assert name != name.lower() + + def finder(self, path): + return self.machinery.FileFinder(path, + (self.machinery.SourceFileLoader, + self.machinery.SOURCE_SUFFIXES), + (self.machinery.SourcelessFileLoader, + self.machinery.BYTECODE_SUFFIXES)) + + def sensitivity_test(self): + """Look for a module with matching and non-matching sensitivity.""" + sensitive_pkg = 'sensitive.{0}'.format(self.name) + insensitive_pkg = 'insensitive.{0}'.format(self.name.lower()) + context = util.create_modules(insensitive_pkg, sensitive_pkg) + with context as mapping: + sensitive_path = os.path.join(mapping['.root'], 'sensitive') + insensitive_path = os.path.join(mapping['.root'], 'insensitive') + sensitive_finder = self.finder(sensitive_path) + insensitive_finder = self.finder(insensitive_path) + return self.find(sensitive_finder), self.find(insensitive_finder) + + @unittest.skipIf(sys.flags.ignore_environment, 'ignore_environment flag was set') + def test_sensitive(self): + with os_helper.EnvironmentVarGuard() as env: + env.unset('PYTHONCASEOK') + self.caseok_env_changed(should_exist=False) + sensitive, insensitive = self.sensitivity_test() + self.assertIsNotNone(sensitive) + self.assertIn(self.name, sensitive.get_filename(self.name)) + self.assertIsNone(insensitive) + + @unittest.skipIf(sys.flags.ignore_environment, 'ignore_environment flag was set') + def test_insensitive(self): + with os_helper.EnvironmentVarGuard() as env: + env.set('PYTHONCASEOK', '1') + self.caseok_env_changed(should_exist=True) + sensitive, insensitive = self.sensitivity_test() + self.assertIsNotNone(sensitive) + self.assertIn(self.name, sensitive.get_filename(self.name)) + self.assertIsNotNone(insensitive) + self.assertIn(self.name, insensitive.get_filename(self.name)) + + +class CaseSensitivityTestPEP451(CaseSensitivityTest): + def find(self, finder): + found = finder.find_spec(self.name) + return found.loader if found is not None else found + + +(Frozen_CaseSensitivityTestPEP451, + Source_CaseSensitivityTestPEP451 + ) = util.test_both(CaseSensitivityTestPEP451, importlib=importlib, + machinery=machinery) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/source/test_file_loader.py b/stdlib/test/test_importlib/source/test_file_loader.py new file mode 100644 index 000000000..f35adec1a --- /dev/null +++ b/stdlib/test/test_importlib/source/test_file_loader.py @@ -0,0 +1,795 @@ +from test.test_importlib import abc, util + +importlib = util.import_importlib('importlib') +importlib_abc = util.import_importlib('importlib.abc') +machinery = util.import_importlib('importlib.machinery') +importlib_util = util.import_importlib('importlib.util') + +import errno +import marshal +import os +import py_compile +import shutil +import stat +import sys +import types +import unittest +import warnings + +from test.support.import_helper import make_legacy_pyc, unload + +from test.test_py_compile import without_source_date_epoch +from test.test_py_compile import SourceDateEpochTestMeta + + +class SimpleTest(abc.LoaderTests): + + """Should have no issue importing a source module [basic]. And if there is + a syntax error, it should raise a SyntaxError [syntax error]. + + """ + + def setUp(self): + self.name = 'spam' + self.filepath = os.path.join('ham', self.name + '.py') + self.loader = self.machinery.SourceFileLoader(self.name, self.filepath) + + def test_load_module_API(self): + class Tester(self.abc.FileLoader): + def get_source(self, _): return 'attr = 42' + def is_package(self, _): return False + + loader = Tester('blah', 'blah.py') + self.addCleanup(unload, 'blah') + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + module = loader.load_module() # Should not raise an exception. + + def test_get_filename_API(self): + # If fullname is not set then assume self.path is desired. + class Tester(self.abc.FileLoader): + def get_code(self, _): pass + def get_source(self, _): pass + def is_package(self, _): pass + + path = 'some_path' + name = 'some_name' + loader = Tester(name, path) + self.assertEqual(path, loader.get_filename(name)) + self.assertEqual(path, loader.get_filename()) + self.assertEqual(path, loader.get_filename(None)) + with self.assertRaises(ImportError): + loader.get_filename(name + 'XXX') + + def test_equality(self): + other = self.machinery.SourceFileLoader(self.name, self.filepath) + self.assertEqual(self.loader, other) + + def test_inequality(self): + other = self.machinery.SourceFileLoader('_' + self.name, self.filepath) + self.assertNotEqual(self.loader, other) + + # [basic] + def test_module(self): + with util.create_modules('_temp') as mapping: + loader = self.machinery.SourceFileLoader('_temp', mapping['_temp']) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + module = loader.load_module('_temp') + self.assertIn('_temp', sys.modules) + check = {'__name__': '_temp', '__file__': mapping['_temp'], + '__package__': ''} + for attr, value in check.items(): + self.assertEqual(getattr(module, attr), value) + + def test_package(self): + with util.create_modules('_pkg.__init__') as mapping: + loader = self.machinery.SourceFileLoader('_pkg', + mapping['_pkg.__init__']) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + module = loader.load_module('_pkg') + self.assertIn('_pkg', sys.modules) + check = {'__name__': '_pkg', '__file__': mapping['_pkg.__init__'], + '__path__': [os.path.dirname(mapping['_pkg.__init__'])], + '__package__': '_pkg'} + for attr, value in check.items(): + self.assertEqual(getattr(module, attr), value) + + + def test_lacking_parent(self): + with util.create_modules('_pkg.__init__', '_pkg.mod')as mapping: + loader = self.machinery.SourceFileLoader('_pkg.mod', + mapping['_pkg.mod']) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + module = loader.load_module('_pkg.mod') + self.assertIn('_pkg.mod', sys.modules) + check = {'__name__': '_pkg.mod', '__file__': mapping['_pkg.mod'], + '__package__': '_pkg'} + for attr, value in check.items(): + self.assertEqual(getattr(module, attr), value) + + def fake_mtime(self, fxn): + """Fake mtime to always be higher than expected.""" + return lambda name: fxn(name) + 1 + + def test_module_reuse(self): + with util.create_modules('_temp') as mapping: + loader = self.machinery.SourceFileLoader('_temp', mapping['_temp']) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + module = loader.load_module('_temp') + module_id = id(module) + module_dict_id = id(module.__dict__) + with open(mapping['_temp'], 'w', encoding='utf-8') as file: + file.write("testing_var = 42\n") + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + module = loader.load_module('_temp') + self.assertIn('testing_var', module.__dict__, + "'testing_var' not in " + "{0}".format(list(module.__dict__.keys()))) + self.assertEqual(module, sys.modules['_temp']) + self.assertEqual(id(module), module_id) + self.assertEqual(id(module.__dict__), module_dict_id) + + def test_state_after_failure(self): + # A failed reload should leave the original module intact. + attributes = ('__file__', '__path__', '__package__') + value = '' + name = '_temp' + with util.create_modules(name) as mapping: + orig_module = types.ModuleType(name) + for attr in attributes: + setattr(orig_module, attr, value) + with open(mapping[name], 'w', encoding='utf-8') as file: + file.write('+++ bad syntax +++') + loader = self.machinery.SourceFileLoader('_temp', mapping['_temp']) + with self.assertRaises(SyntaxError): + loader.exec_module(orig_module) + for attr in attributes: + self.assertEqual(getattr(orig_module, attr), value) + with self.assertRaises(SyntaxError): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + loader.load_module(name) + for attr in attributes: + self.assertEqual(getattr(orig_module, attr), value) + + # [syntax error] + def test_bad_syntax(self): + with util.create_modules('_temp') as mapping: + with open(mapping['_temp'], 'w', encoding='utf-8') as file: + file.write('=') + loader = self.machinery.SourceFileLoader('_temp', mapping['_temp']) + with self.assertRaises(SyntaxError): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + loader.load_module('_temp') + self.assertNotIn('_temp', sys.modules) + + def test_file_from_empty_string_dir(self): + # Loading a module found from an empty string entry on sys.path should + # not only work, but keep all attributes relative. + file_path = '_temp.py' + with open(file_path, 'w', encoding='utf-8') as file: + file.write("# test file for importlib") + try: + with util.uncache('_temp'): + loader = self.machinery.SourceFileLoader('_temp', file_path) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + mod = loader.load_module('_temp') + self.assertEqual(file_path, mod.__file__) + self.assertEqual(self.util.cache_from_source(file_path), + mod.__cached__) + finally: + os.unlink(file_path) + pycache = os.path.dirname(self.util.cache_from_source(file_path)) + if os.path.exists(pycache): + shutil.rmtree(pycache) + + @util.writes_bytecode_files + def test_timestamp_overflow(self): + # When a modification timestamp is larger than 2**32, it should be + # truncated rather than raise an OverflowError. + with util.create_modules('_temp') as mapping: + source = mapping['_temp'] + compiled = self.util.cache_from_source(source) + with open(source, 'w', encoding='utf-8') as f: + f.write("x = 5") + try: + os.utime(source, (2 ** 33 - 5, 2 ** 33 - 5)) + except OverflowError: + self.skipTest("cannot set modification time to large integer") + except OSError as e: + if e.errno != getattr(errno, 'EOVERFLOW', None): + raise + self.skipTest("cannot set modification time to large integer ({})".format(e)) + loader = self.machinery.SourceFileLoader('_temp', mapping['_temp']) + # PEP 451 + module = types.ModuleType('_temp') + module.__spec__ = self.util.spec_from_loader('_temp', loader) + loader.exec_module(module) + self.assertEqual(module.x, 5) + self.assertTrue(os.path.exists(compiled)) + os.unlink(compiled) + # PEP 302 + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + mod = loader.load_module('_temp') + # Sanity checks. + self.assertEqual(mod.__cached__, compiled) + self.assertEqual(mod.x, 5) + # The pyc file was created. + self.assertTrue(os.path.exists(compiled)) + + def test_unloadable(self): + loader = self.machinery.SourceFileLoader('good name', {}) + module = types.ModuleType('bad name') + module.__spec__ = self.machinery.ModuleSpec('bad name', loader) + with self.assertRaises(ImportError): + loader.exec_module(module) + with self.assertRaises(ImportError): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + loader.load_module('bad name') + + @util.writes_bytecode_files + def test_checked_hash_based_pyc(self): + with util.create_modules('_temp') as mapping: + source = mapping['_temp'] + pyc = self.util.cache_from_source(source) + with open(source, 'wb') as fp: + fp.write(b'state = "old"') + os.utime(source, (50, 50)) + py_compile.compile( + source, + invalidation_mode=py_compile.PycInvalidationMode.CHECKED_HASH, + ) + loader = self.machinery.SourceFileLoader('_temp', source) + mod = types.ModuleType('_temp') + mod.__spec__ = self.util.spec_from_loader('_temp', loader) + loader.exec_module(mod) + self.assertEqual(mod.state, 'old') + # Write a new source with the same mtime and size as before. + with open(source, 'wb') as fp: + fp.write(b'state = "new"') + os.utime(source, (50, 50)) + loader.exec_module(mod) + self.assertEqual(mod.state, 'new') + with open(pyc, 'rb') as fp: + data = fp.read() + self.assertEqual(int.from_bytes(data[4:8], 'little'), 0b11) + self.assertEqual( + self.util.source_hash(b'state = "new"'), + data[8:16], + ) + + @util.writes_bytecode_files + def test_overridden_checked_hash_based_pyc(self): + with util.create_modules('_temp') as mapping, \ + unittest.mock.patch('_imp.check_hash_based_pycs', 'never'): + source = mapping['_temp'] + pyc = self.util.cache_from_source(source) + with open(source, 'wb') as fp: + fp.write(b'state = "old"') + os.utime(source, (50, 50)) + py_compile.compile( + source, + invalidation_mode=py_compile.PycInvalidationMode.CHECKED_HASH, + ) + loader = self.machinery.SourceFileLoader('_temp', source) + mod = types.ModuleType('_temp') + mod.__spec__ = self.util.spec_from_loader('_temp', loader) + loader.exec_module(mod) + self.assertEqual(mod.state, 'old') + # Write a new source with the same mtime and size as before. + with open(source, 'wb') as fp: + fp.write(b'state = "new"') + os.utime(source, (50, 50)) + loader.exec_module(mod) + self.assertEqual(mod.state, 'old') + + @util.writes_bytecode_files + def test_unchecked_hash_based_pyc(self): + with util.create_modules('_temp') as mapping: + source = mapping['_temp'] + pyc = self.util.cache_from_source(source) + with open(source, 'wb') as fp: + fp.write(b'state = "old"') + os.utime(source, (50, 50)) + py_compile.compile( + source, + invalidation_mode=py_compile.PycInvalidationMode.UNCHECKED_HASH, + ) + loader = self.machinery.SourceFileLoader('_temp', source) + mod = types.ModuleType('_temp') + mod.__spec__ = self.util.spec_from_loader('_temp', loader) + loader.exec_module(mod) + self.assertEqual(mod.state, 'old') + # Update the source file, which should be ignored. + with open(source, 'wb') as fp: + fp.write(b'state = "new"') + loader.exec_module(mod) + self.assertEqual(mod.state, 'old') + with open(pyc, 'rb') as fp: + data = fp.read() + self.assertEqual(int.from_bytes(data[4:8], 'little'), 0b1) + self.assertEqual( + self.util.source_hash(b'state = "old"'), + data[8:16], + ) + + @util.writes_bytecode_files + def test_overridden_unchecked_hash_based_pyc(self): + with util.create_modules('_temp') as mapping, \ + unittest.mock.patch('_imp.check_hash_based_pycs', 'always'): + source = mapping['_temp'] + pyc = self.util.cache_from_source(source) + with open(source, 'wb') as fp: + fp.write(b'state = "old"') + os.utime(source, (50, 50)) + py_compile.compile( + source, + invalidation_mode=py_compile.PycInvalidationMode.UNCHECKED_HASH, + ) + loader = self.machinery.SourceFileLoader('_temp', source) + mod = types.ModuleType('_temp') + mod.__spec__ = self.util.spec_from_loader('_temp', loader) + loader.exec_module(mod) + self.assertEqual(mod.state, 'old') + # Update the source file, which should be ignored. + with open(source, 'wb') as fp: + fp.write(b'state = "new"') + loader.exec_module(mod) + self.assertEqual(mod.state, 'new') + with open(pyc, 'rb') as fp: + data = fp.read() + self.assertEqual(int.from_bytes(data[4:8], 'little'), 0b1) + self.assertEqual( + self.util.source_hash(b'state = "new"'), + data[8:16], + ) + + +(Frozen_SimpleTest, + Source_SimpleTest + ) = util.test_both(SimpleTest, importlib=importlib, machinery=machinery, + abc=importlib_abc, util=importlib_util) + + +class SourceDateEpochTestMeta(SourceDateEpochTestMeta, + type(Source_SimpleTest)): + pass + + +class SourceDateEpoch_SimpleTest(Source_SimpleTest, + metaclass=SourceDateEpochTestMeta, + source_date_epoch=True): + pass + + +class BadBytecodeTest: + + def import_(self, file, module_name): + raise NotImplementedError + + def manipulate_bytecode(self, + name, mapping, manipulator, *, + del_source=False, + invalidation_mode=py_compile.PycInvalidationMode.TIMESTAMP): + """Manipulate the bytecode of a module by passing it into a callable + that returns what to use as the new bytecode.""" + try: + del sys.modules['_temp'] + except KeyError: + pass + py_compile.compile(mapping[name], invalidation_mode=invalidation_mode) + if not del_source: + bytecode_path = self.util.cache_from_source(mapping[name]) + else: + os.unlink(mapping[name]) + bytecode_path = make_legacy_pyc(mapping[name]) + if manipulator: + with open(bytecode_path, 'rb') as file: + bc = file.read() + new_bc = manipulator(bc) + with open(bytecode_path, 'wb') as file: + if new_bc is not None: + file.write(new_bc) + return bytecode_path + + def _test_empty_file(self, test, *, del_source=False): + with util.create_modules('_temp') as mapping: + bc_path = self.manipulate_bytecode('_temp', mapping, + lambda bc: b'', + del_source=del_source) + test('_temp', mapping, bc_path) + + @util.writes_bytecode_files + def _test_partial_magic(self, test, *, del_source=False): + # When their are less than 4 bytes to a .pyc, regenerate it if + # possible, else raise ImportError. + with util.create_modules('_temp') as mapping: + bc_path = self.manipulate_bytecode('_temp', mapping, + lambda bc: bc[:3], + del_source=del_source) + test('_temp', mapping, bc_path) + + def _test_magic_only(self, test, *, del_source=False): + with util.create_modules('_temp') as mapping: + bc_path = self.manipulate_bytecode('_temp', mapping, + lambda bc: bc[:4], + del_source=del_source) + test('_temp', mapping, bc_path) + + def _test_partial_flags(self, test, *, del_source=False): + with util.create_modules('_temp') as mapping: + bc_path = self.manipulate_bytecode('_temp', mapping, + lambda bc: bc[:7], + del_source=del_source) + test('_temp', mapping, bc_path) + + def _test_partial_hash(self, test, *, del_source=False): + with util.create_modules('_temp') as mapping: + bc_path = self.manipulate_bytecode( + '_temp', + mapping, + lambda bc: bc[:13], + del_source=del_source, + invalidation_mode=py_compile.PycInvalidationMode.CHECKED_HASH, + ) + test('_temp', mapping, bc_path) + with util.create_modules('_temp') as mapping: + bc_path = self.manipulate_bytecode( + '_temp', + mapping, + lambda bc: bc[:13], + del_source=del_source, + invalidation_mode=py_compile.PycInvalidationMode.UNCHECKED_HASH, + ) + test('_temp', mapping, bc_path) + + def _test_partial_timestamp(self, test, *, del_source=False): + with util.create_modules('_temp') as mapping: + bc_path = self.manipulate_bytecode('_temp', mapping, + lambda bc: bc[:11], + del_source=del_source) + test('_temp', mapping, bc_path) + + def _test_partial_size(self, test, *, del_source=False): + with util.create_modules('_temp') as mapping: + bc_path = self.manipulate_bytecode('_temp', mapping, + lambda bc: bc[:15], + del_source=del_source) + test('_temp', mapping, bc_path) + + def _test_no_marshal(self, *, del_source=False): + with util.create_modules('_temp') as mapping: + bc_path = self.manipulate_bytecode('_temp', mapping, + lambda bc: bc[:16], + del_source=del_source) + file_path = mapping['_temp'] if not del_source else bc_path + with self.assertRaises(EOFError): + self.import_(file_path, '_temp') + + def _test_non_code_marshal(self, *, del_source=False): + with util.create_modules('_temp') as mapping: + bytecode_path = self.manipulate_bytecode('_temp', mapping, + lambda bc: bc[:16] + marshal.dumps(b'abcd'), + del_source=del_source) + file_path = mapping['_temp'] if not del_source else bytecode_path + with self.assertRaises(ImportError) as cm: + self.import_(file_path, '_temp') + self.assertEqual(cm.exception.name, '_temp') + self.assertEqual(cm.exception.path, bytecode_path) + + def _test_bad_marshal(self, *, del_source=False): + with util.create_modules('_temp') as mapping: + bytecode_path = self.manipulate_bytecode('_temp', mapping, + lambda bc: bc[:16] + b'', + del_source=del_source) + file_path = mapping['_temp'] if not del_source else bytecode_path + with self.assertRaises(EOFError): + self.import_(file_path, '_temp') + + def _test_bad_magic(self, test, *, del_source=False): + with util.create_modules('_temp') as mapping: + bc_path = self.manipulate_bytecode('_temp', mapping, + lambda bc: b'\x00\x00\x00\x00' + bc[4:]) + test('_temp', mapping, bc_path) + + +class BadBytecodeTestPEP451(BadBytecodeTest): + + def import_(self, file, module_name): + loader = self.loader(module_name, file) + module = types.ModuleType(module_name) + module.__spec__ = self.util.spec_from_loader(module_name, loader) + loader.exec_module(module) + + +class BadBytecodeTestPEP302(BadBytecodeTest): + + def import_(self, file, module_name): + loader = self.loader(module_name, file) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + module = loader.load_module(module_name) + self.assertIn(module_name, sys.modules) + + +class SourceLoaderBadBytecodeTest: + + @classmethod + def setUpClass(cls): + cls.loader = cls.machinery.SourceFileLoader + + @util.writes_bytecode_files + def test_empty_file(self): + # When a .pyc is empty, regenerate it if possible, else raise + # ImportError. + def test(name, mapping, bytecode_path): + self.import_(mapping[name], name) + with open(bytecode_path, 'rb') as file: + self.assertGreater(len(file.read()), 16) + + self._test_empty_file(test) + + def test_partial_magic(self): + def test(name, mapping, bytecode_path): + self.import_(mapping[name], name) + with open(bytecode_path, 'rb') as file: + self.assertGreater(len(file.read()), 16) + + self._test_partial_magic(test) + + @util.writes_bytecode_files + def test_magic_only(self): + # When there is only the magic number, regenerate the .pyc if possible, + # else raise EOFError. + def test(name, mapping, bytecode_path): + self.import_(mapping[name], name) + with open(bytecode_path, 'rb') as file: + self.assertGreater(len(file.read()), 16) + + self._test_magic_only(test) + + @util.writes_bytecode_files + def test_bad_magic(self): + # When the magic number is different, the bytecode should be + # regenerated. + def test(name, mapping, bytecode_path): + self.import_(mapping[name], name) + with open(bytecode_path, 'rb') as bytecode_file: + self.assertEqual(bytecode_file.read(4), + self.util.MAGIC_NUMBER) + + self._test_bad_magic(test) + + @util.writes_bytecode_files + def test_partial_timestamp(self): + # When the timestamp is partial, regenerate the .pyc, else + # raise EOFError. + def test(name, mapping, bc_path): + self.import_(mapping[name], name) + with open(bc_path, 'rb') as file: + self.assertGreater(len(file.read()), 16) + + self._test_partial_timestamp(test) + + @util.writes_bytecode_files + def test_partial_flags(self): + # When the flags is partial, regenerate the .pyc, else raise EOFError. + def test(name, mapping, bc_path): + self.import_(mapping[name], name) + with open(bc_path, 'rb') as file: + self.assertGreater(len(file.read()), 16) + + self._test_partial_flags(test) + + @util.writes_bytecode_files + def test_partial_hash(self): + # When the hash is partial, regenerate the .pyc, else raise EOFError. + def test(name, mapping, bc_path): + self.import_(mapping[name], name) + with open(bc_path, 'rb') as file: + self.assertGreater(len(file.read()), 16) + + self._test_partial_hash(test) + + @util.writes_bytecode_files + def test_partial_size(self): + # When the size is partial, regenerate the .pyc, else + # raise EOFError. + def test(name, mapping, bc_path): + self.import_(mapping[name], name) + with open(bc_path, 'rb') as file: + self.assertGreater(len(file.read()), 16) + + self._test_partial_size(test) + + @util.writes_bytecode_files + def test_no_marshal(self): + # When there is only the magic number and timestamp, raise EOFError. + self._test_no_marshal() + + @util.writes_bytecode_files + def test_non_code_marshal(self): + self._test_non_code_marshal() + # XXX ImportError when sourceless + + # [bad marshal] + @util.writes_bytecode_files + def test_bad_marshal(self): + # Bad marshal data should raise a ValueError. + self._test_bad_marshal() + + # [bad timestamp] + @util.writes_bytecode_files + @without_source_date_epoch + def test_old_timestamp(self): + # When the timestamp is older than the source, bytecode should be + # regenerated. + zeros = b'\x00\x00\x00\x00' + with util.create_modules('_temp') as mapping: + py_compile.compile(mapping['_temp']) + bytecode_path = self.util.cache_from_source(mapping['_temp']) + with open(bytecode_path, 'r+b') as bytecode_file: + bytecode_file.seek(8) + bytecode_file.write(zeros) + self.import_(mapping['_temp'], '_temp') + source_mtime = os.path.getmtime(mapping['_temp']) + source_timestamp = self.importlib._pack_uint32(source_mtime) + with open(bytecode_path, 'rb') as bytecode_file: + bytecode_file.seek(8) + self.assertEqual(bytecode_file.read(4), source_timestamp) + + # [bytecode read-only] + @util.writes_bytecode_files + def test_read_only_bytecode(self): + # When bytecode is read-only but should be rewritten, fail silently. + with util.create_modules('_temp') as mapping: + # Create bytecode that will need to be re-created. + py_compile.compile(mapping['_temp']) + bytecode_path = self.util.cache_from_source(mapping['_temp']) + with open(bytecode_path, 'r+b') as bytecode_file: + bytecode_file.seek(0) + bytecode_file.write(b'\x00\x00\x00\x00') + # Make the bytecode read-only. + os.chmod(bytecode_path, + stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH) + try: + # Should not raise OSError! + self.import_(mapping['_temp'], '_temp') + finally: + # Make writable for eventual clean-up. + os.chmod(bytecode_path, stat.S_IWUSR) + + +class SourceLoaderBadBytecodeTestPEP451( + SourceLoaderBadBytecodeTest, BadBytecodeTestPEP451): + pass + + +(Frozen_SourceBadBytecodePEP451, + Source_SourceBadBytecodePEP451 + ) = util.test_both(SourceLoaderBadBytecodeTestPEP451, importlib=importlib, + machinery=machinery, abc=importlib_abc, + util=importlib_util) + + +class SourceLoaderBadBytecodeTestPEP302( + SourceLoaderBadBytecodeTest, BadBytecodeTestPEP302): + pass + + +(Frozen_SourceBadBytecodePEP302, + Source_SourceBadBytecodePEP302 + ) = util.test_both(SourceLoaderBadBytecodeTestPEP302, importlib=importlib, + machinery=machinery, abc=importlib_abc, + util=importlib_util) + + +class SourcelessLoaderBadBytecodeTest: + + @classmethod + def setUpClass(cls): + cls.loader = cls.machinery.SourcelessFileLoader + + def test_empty_file(self): + def test(name, mapping, bytecode_path): + with self.assertRaises(ImportError) as cm: + self.import_(bytecode_path, name) + self.assertEqual(cm.exception.name, name) + self.assertEqual(cm.exception.path, bytecode_path) + + self._test_empty_file(test, del_source=True) + + def test_partial_magic(self): + def test(name, mapping, bytecode_path): + with self.assertRaises(ImportError) as cm: + self.import_(bytecode_path, name) + self.assertEqual(cm.exception.name, name) + self.assertEqual(cm.exception.path, bytecode_path) + self._test_partial_magic(test, del_source=True) + + def test_magic_only(self): + def test(name, mapping, bytecode_path): + with self.assertRaises(EOFError): + self.import_(bytecode_path, name) + + self._test_magic_only(test, del_source=True) + + def test_bad_magic(self): + def test(name, mapping, bytecode_path): + with self.assertRaises(ImportError) as cm: + self.import_(bytecode_path, name) + self.assertEqual(cm.exception.name, name) + self.assertEqual(cm.exception.path, bytecode_path) + + self._test_bad_magic(test, del_source=True) + + def test_partial_timestamp(self): + def test(name, mapping, bytecode_path): + with self.assertRaises(EOFError): + self.import_(bytecode_path, name) + + self._test_partial_timestamp(test, del_source=True) + + def test_partial_flags(self): + def test(name, mapping, bytecode_path): + with self.assertRaises(EOFError): + self.import_(bytecode_path, name) + + self._test_partial_flags(test, del_source=True) + + def test_partial_hash(self): + def test(name, mapping, bytecode_path): + with self.assertRaises(EOFError): + self.import_(bytecode_path, name) + + self._test_partial_hash(test, del_source=True) + + def test_partial_size(self): + def test(name, mapping, bytecode_path): + with self.assertRaises(EOFError): + self.import_(bytecode_path, name) + + self._test_partial_size(test, del_source=True) + + def test_no_marshal(self): + self._test_no_marshal(del_source=True) + + def test_non_code_marshal(self): + self._test_non_code_marshal(del_source=True) + + +class SourcelessLoaderBadBytecodeTestPEP451(SourcelessLoaderBadBytecodeTest, + BadBytecodeTestPEP451): + pass + + +(Frozen_SourcelessBadBytecodePEP451, + Source_SourcelessBadBytecodePEP451 + ) = util.test_both(SourcelessLoaderBadBytecodeTestPEP451, importlib=importlib, + machinery=machinery, abc=importlib_abc, + util=importlib_util) + + +class SourcelessLoaderBadBytecodeTestPEP302(SourcelessLoaderBadBytecodeTest, + BadBytecodeTestPEP302): + pass + + +(Frozen_SourcelessBadBytecodePEP302, + Source_SourcelessBadBytecodePEP302 + ) = util.test_both(SourcelessLoaderBadBytecodeTestPEP302, importlib=importlib, + machinery=machinery, abc=importlib_abc, + util=importlib_util) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/source/test_finder.py b/stdlib/test/test_importlib/source/test_finder.py new file mode 100644 index 000000000..4de736a6b --- /dev/null +++ b/stdlib/test/test_importlib/source/test_finder.py @@ -0,0 +1,212 @@ +from test.test_importlib import abc, util + +machinery = util.import_importlib('importlib.machinery') + +import errno +import os +import py_compile +import stat +import sys +import tempfile +from test.support.import_helper import make_legacy_pyc +import unittest + + +class FinderTests(abc.FinderTests): + + """For a top-level module, it should just be found directly in the + directory being searched. This is true for a directory with source + [top-level source], bytecode [top-level bc], or both [top-level both]. + There is also the possibility that it is a package [top-level package], in + which case there will be a directory with the module name and an + __init__.py file. If there is a directory without an __init__.py an + ImportWarning is returned [empty dir]. + + For sub-modules and sub-packages, the same happens as above but only use + the tail end of the name [sub module] [sub package] [sub empty]. + + When there is a conflict between a package and module having the same name + in the same directory, the package wins out [package over module]. This is + so that imports of modules within the package can occur rather than trigger + an import error. + + When there is a package and module with the same name, always pick the + package over the module [package over module]. This is so that imports from + the package have the possibility of succeeding. + + """ + + def get_finder(self, root): + loader_details = [(self.machinery.SourceFileLoader, + self.machinery.SOURCE_SUFFIXES), + (self.machinery.SourcelessFileLoader, + self.machinery.BYTECODE_SUFFIXES)] + return self.machinery.FileFinder(root, *loader_details) + + def import_(self, root, module): + finder = self.get_finder(root) + return self._find(finder, module, loader_only=True) + + def run_test(self, test, create=None, *, compile_=None, unlink=None): + """Test the finding of 'test' with the creation of modules listed in + 'create'. + + Any names listed in 'compile_' are byte-compiled. Modules + listed in 'unlink' have their source files deleted. + + """ + if create is None: + create = {test} + with util.create_modules(*create) as mapping: + if compile_: + for name in compile_: + py_compile.compile(mapping[name]) + if unlink: + for name in unlink: + os.unlink(mapping[name]) + try: + make_legacy_pyc(mapping[name]) + except OSError as error: + # Some tests do not set compile_=True so the source + # module will not get compiled and there will be no + # PEP 3147 pyc file to rename. + if error.errno != errno.ENOENT: + raise + loader = self.import_(mapping['.root'], test) + self.assertHasAttr(loader, 'load_module') + return loader + + def test_module(self): + # [top-level source] + self.run_test('top_level') + # [top-level bc] + self.run_test('top_level', compile_={'top_level'}, + unlink={'top_level'}) + # [top-level both] + self.run_test('top_level', compile_={'top_level'}) + + # [top-level package] + def test_package(self): + # Source. + self.run_test('pkg', {'pkg.__init__'}) + # Bytecode. + self.run_test('pkg', {'pkg.__init__'}, compile_={'pkg.__init__'}, + unlink={'pkg.__init__'}) + # Both. + self.run_test('pkg', {'pkg.__init__'}, compile_={'pkg.__init__'}) + + # [sub module] + def test_module_in_package(self): + with util.create_modules('pkg.__init__', 'pkg.sub') as mapping: + pkg_dir = os.path.dirname(mapping['pkg.__init__']) + loader = self.import_(pkg_dir, 'pkg.sub') + self.assertHasAttr(loader, 'load_module') + + # [sub package] + def test_package_in_package(self): + context = util.create_modules('pkg.__init__', 'pkg.sub.__init__') + with context as mapping: + pkg_dir = os.path.dirname(mapping['pkg.__init__']) + loader = self.import_(pkg_dir, 'pkg.sub') + self.assertHasAttr(loader, 'load_module') + + # [package over modules] + def test_package_over_module(self): + name = '_temp' + loader = self.run_test(name, {'{0}.__init__'.format(name), name}) + self.assertIn('__init__', loader.get_filename(name)) + + def test_failure(self): + with util.create_modules('blah') as mapping: + nothing = self.import_(mapping['.root'], 'sdfsadsadf') + self.assertEqual(nothing, self.NOT_FOUND) + + def test_empty_string_for_dir(self): + # The empty string from sys.path means to search in the cwd. + finder = self.machinery.FileFinder('', (self.machinery.SourceFileLoader, + self.machinery.SOURCE_SUFFIXES)) + with open('mod.py', 'w', encoding='utf-8') as file: + file.write("# test file for importlib") + try: + loader = self._find(finder, 'mod', loader_only=True) + self.assertHasAttr(loader, 'load_module') + finally: + os.unlink('mod.py') + + def test_invalidate_caches(self): + # invalidate_caches() should reset the mtime. + finder = self.machinery.FileFinder('', (self.machinery.SourceFileLoader, + self.machinery.SOURCE_SUFFIXES)) + finder._path_mtime = 42 + finder.invalidate_caches() + self.assertEqual(finder._path_mtime, -1) + + # Regression test for http://bugs.python.org/issue14846 + def test_dir_removal_handling(self): + mod = 'mod' + with util.create_modules(mod) as mapping: + finder = self.get_finder(mapping['.root']) + found = self._find(finder, 'mod', loader_only=True) + self.assertIsNotNone(found) + found = self._find(finder, 'mod', loader_only=True) + self.assertEqual(found, self.NOT_FOUND) + + @unittest.skipUnless(sys.platform != 'win32', + 'os.chmod() does not support the needed arguments under Windows') + def test_no_read_directory(self): + # Issue #16730 + tempdir = tempfile.TemporaryDirectory() + self.enterContext(tempdir) + # Since we muck with the permissions, we want to set them back to + # their original values to make sure the directory can be properly + # cleaned up. + original_mode = os.stat(tempdir.name).st_mode + self.addCleanup(os.chmod, tempdir.name, original_mode) + os.chmod(tempdir.name, stat.S_IWUSR | stat.S_IXUSR) + finder = self.get_finder(tempdir.name) + found = self._find(finder, 'doesnotexist') + self.assertEqual(found, self.NOT_FOUND) + + def test_ignore_file(self): + # If a directory got changed to a file from underneath us, then don't + # worry about looking for submodules. + with tempfile.NamedTemporaryFile() as file_obj: + finder = self.get_finder(file_obj.name) + found = self._find(finder, 'doesnotexist') + self.assertEqual(found, self.NOT_FOUND) + + +class FinderTestsPEP451(FinderTests): + + NOT_FOUND = None + + def _find(self, finder, name, loader_only=False): + spec = finder.find_spec(name) + return spec.loader if spec is not None else spec + + +(Frozen_FinderTestsPEP451, + Source_FinderTestsPEP451 + ) = util.test_both(FinderTestsPEP451, machinery=machinery) + + +class FinderTestsPEP420(FinderTests): + + NOT_FOUND = (None, []) + + def _find(self, finder, name, loader_only=False): + spec = finder.find_spec(name) + if spec is None: + return self.NOT_FOUND + if loader_only: + return spec.loader + return spec.loader, spec.submodule_search_locations + + +(Frozen_FinderTestsPEP420, + Source_FinderTestsPEP420 + ) = util.test_both(FinderTestsPEP420, machinery=machinery) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/source/test_path_hook.py b/stdlib/test/test_importlib/source/test_path_hook.py new file mode 100644 index 000000000..6e1c23e6a --- /dev/null +++ b/stdlib/test/test_importlib/source/test_path_hook.py @@ -0,0 +1,32 @@ +from test.test_importlib import util + +machinery = util.import_importlib('importlib.machinery') + +import unittest + + +class PathHookTest: + + """Test the path hook for source.""" + + def path_hook(self): + return self.machinery.FileFinder.path_hook((self.machinery.SourceFileLoader, + self.machinery.SOURCE_SUFFIXES)) + + def test_success(self): + with util.create_modules('dummy') as mapping: + self.assertHasAttr(self.path_hook()(mapping['.root']), + 'find_spec') + + def test_empty_string(self): + # The empty string represents the cwd. + self.assertHasAttr(self.path_hook()(''), 'find_spec') + + +(Frozen_PathHookTest, + Source_PathHooktest + ) = util.test_both(PathHookTest, machinery=machinery) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/source/test_source_encoding.py b/stdlib/test/test_importlib/source/test_source_encoding.py new file mode 100644 index 000000000..c09c9aa12 --- /dev/null +++ b/stdlib/test/test_importlib/source/test_source_encoding.py @@ -0,0 +1,175 @@ +from test.test_importlib import util + +machinery = util.import_importlib('importlib.machinery') + +import codecs +import importlib.util +import re +import types +# Because sys.path gets essentially blanked, need to have unicodedata already +# imported for the parser to use. +import unicodedata +import unittest +import warnings + + +CODING_RE = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII) + + +class EncodingTest: + + """PEP 3120 makes UTF-8 the default encoding for source code + [default encoding]. + + PEP 263 specifies how that can change on a per-file basis. Either the first + or second line can contain the encoding line [encoding first line] + [encoding second line]. If the file has the BOM marker it is considered UTF-8 + implicitly [BOM]. If any encoding is specified it must be UTF-8, else it is + an error [BOM and utf-8][BOM conflict]. + + """ + + variable = '\u00fc' + character = '\u00c9' + source_line = "{0} = '{1}'\n".format(variable, character) + module_name = '_temp' + + def run_test(self, source): + with util.create_modules(self.module_name) as mapping: + with open(mapping[self.module_name], 'wb') as file: + file.write(source) + loader = self.machinery.SourceFileLoader(self.module_name, + mapping[self.module_name]) + return self.load(loader) + + def create_source(self, encoding): + encoding_line = "# coding={0}".format(encoding) + assert CODING_RE.match(encoding_line) + source_lines = [encoding_line.encode('utf-8')] + source_lines.append(self.source_line.encode(encoding)) + return b'\n'.join(source_lines) + + def test_non_obvious_encoding(self): + # Make sure that an encoding that has never been a standard one for + # Python works. + encoding_line = "# coding=koi8-r" + assert CODING_RE.match(encoding_line) + source = "{0}\na=42\n".format(encoding_line).encode("koi8-r") + self.run_test(source) + + # [default encoding] + def test_default_encoding(self): + self.run_test(self.source_line.encode('utf-8')) + + # [encoding first line] + def test_encoding_on_first_line(self): + encoding = 'Latin-1' + source = self.create_source(encoding) + self.run_test(source) + + # [encoding second line] + def test_encoding_on_second_line(self): + source = b"#/usr/bin/python\n" + self.create_source('Latin-1') + self.run_test(source) + + # [BOM] + def test_bom(self): + self.run_test(codecs.BOM_UTF8 + self.source_line.encode('utf-8')) + + # [BOM and utf-8] + def test_bom_and_utf_8(self): + source = codecs.BOM_UTF8 + self.create_source('utf-8') + self.run_test(source) + + # [BOM conflict] + def test_bom_conflict(self): + source = codecs.BOM_UTF8 + self.create_source('latin-1') + with self.assertRaises(SyntaxError): + self.run_test(source) + + +class EncodingTestPEP451(EncodingTest): + + def load(self, loader): + module = types.ModuleType(self.module_name) + module.__spec__ = importlib.util.spec_from_loader(self.module_name, loader) + loader.exec_module(module) + return module + + +(Frozen_EncodingTestPEP451, + Source_EncodingTestPEP451 + ) = util.test_both(EncodingTestPEP451, machinery=machinery) + + +class EncodingTestPEP302(EncodingTest): + + def load(self, loader): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + return loader.load_module(self.module_name) + + +(Frozen_EncodingTestPEP302, + Source_EncodingTestPEP302 + ) = util.test_both(EncodingTestPEP302, machinery=machinery) + + +class LineEndingTest: + + r"""Source written with the three types of line endings (\n, \r\n, \r) + need to be readable [cr][crlf][lf].""" + + def run_test(self, line_ending): + module_name = '_temp' + source_lines = [b"a = 42", b"b = -13", b''] + source = line_ending.join(source_lines) + with util.create_modules(module_name) as mapping: + with open(mapping[module_name], 'wb') as file: + file.write(source) + loader = self.machinery.SourceFileLoader(module_name, + mapping[module_name]) + return self.load(loader, module_name) + + # [cr] + def test_cr(self): + self.run_test(b'\r') + + # [crlf] + def test_crlf(self): + self.run_test(b'\r\n') + + # [lf] + def test_lf(self): + self.run_test(b'\n') + + +class LineEndingTestPEP451(LineEndingTest): + + def load(self, loader, module_name): + module = types.ModuleType(module_name) + module.__spec__ = importlib.util.spec_from_loader(module_name, loader) + loader.exec_module(module) + return module + + +(Frozen_LineEndingTestPEP451, + Source_LineEndingTestPEP451 + ) = util.test_both(LineEndingTestPEP451, machinery=machinery) + + +class LineEndingTestPEP302(LineEndingTest): + + def load(self, loader, module_name): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + return loader.load_module(module_name) + + +(Frozen_LineEndingTestPEP302, + Source_LineEndingTestPEP302 + ) = util.test_both(LineEndingTestPEP302, machinery=machinery) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/test_abc.py b/stdlib/test/test_importlib/test_abc.py new file mode 100644 index 000000000..dd943210f --- /dev/null +++ b/stdlib/test/test_importlib/test_abc.py @@ -0,0 +1,943 @@ +import io +import marshal +import os +import sys +from test.support import import_helper +import types +import unittest +from unittest import mock +import warnings + +from test.test_importlib import util as test_util + +init = test_util.import_importlib('importlib') +abc = test_util.import_importlib('importlib.abc') +machinery = test_util.import_importlib('importlib.machinery') +util = test_util.import_importlib('importlib.util') + + +##### Inheritance ############################################################## +class InheritanceTests: + + """Test that the specified class is a subclass/superclass of the expected + classes.""" + + subclasses = [] + superclasses = [] + + def setUp(self): + self.superclasses = [getattr(self.abc, class_name) + for class_name in self.superclass_names] + if hasattr(self, 'subclass_names'): + # Because test.support.import_fresh_module() creates a new + # importlib._bootstrap per module, inheritance checks fail when + # checking across module boundaries (i.e. the _bootstrap in abc is + # not the same as the one in machinery). That means stealing one of + # the modules from the other to make sure the same instance is used. + machinery = self.abc.machinery + self.subclasses = [getattr(machinery, class_name) + for class_name in self.subclass_names] + assert self.subclasses or self.superclasses, self.__class__ + self.__test = getattr(self.abc, self._NAME) + + def test_subclasses(self): + # Test that the expected subclasses inherit. + for subclass in self.subclasses: + self.assertIsSubclass(subclass, self.__test) + + def test_superclasses(self): + # Test that the class inherits from the expected superclasses. + for superclass in self.superclasses: + self.assertIsSubclass(self.__test, superclass) + + +class MetaPathFinder(InheritanceTests): + superclass_names = [] + subclass_names = ['BuiltinImporter', 'FrozenImporter', 'PathFinder', + 'WindowsRegistryFinder'] + + +(Frozen_MetaPathFinderInheritanceTests, + Source_MetaPathFinderInheritanceTests + ) = test_util.test_both(MetaPathFinder, abc=abc) + + +class PathEntryFinder(InheritanceTests): + superclass_names = [] + subclass_names = ['FileFinder'] + + +(Frozen_PathEntryFinderInheritanceTests, + Source_PathEntryFinderInheritanceTests + ) = test_util.test_both(PathEntryFinder, abc=abc) + + +class ResourceLoader(InheritanceTests): + superclass_names = ['Loader'] + + +(Frozen_ResourceLoaderInheritanceTests, + Source_ResourceLoaderInheritanceTests + ) = test_util.test_both(ResourceLoader, abc=abc) + + +class InspectLoader(InheritanceTests): + superclass_names = ['Loader'] + subclass_names = ['BuiltinImporter', 'FrozenImporter', 'ExtensionFileLoader'] + + +(Frozen_InspectLoaderInheritanceTests, + Source_InspectLoaderInheritanceTests + ) = test_util.test_both(InspectLoader, abc=abc) + + +class ExecutionLoader(InheritanceTests): + superclass_names = ['InspectLoader'] + subclass_names = ['ExtensionFileLoader'] + + +(Frozen_ExecutionLoaderInheritanceTests, + Source_ExecutionLoaderInheritanceTests + ) = test_util.test_both(ExecutionLoader, abc=abc) + + +class FileLoader(InheritanceTests): + superclass_names = ['ResourceLoader', 'ExecutionLoader'] + subclass_names = ['SourceFileLoader', 'SourcelessFileLoader'] + + +(Frozen_FileLoaderInheritanceTests, + Source_FileLoaderInheritanceTests + ) = test_util.test_both(FileLoader, abc=abc) + + +class SourceLoader(InheritanceTests): + superclass_names = ['ResourceLoader', 'ExecutionLoader'] + subclass_names = ['SourceFileLoader'] + + +(Frozen_SourceLoaderInheritanceTests, + Source_SourceLoaderInheritanceTests + ) = test_util.test_both(SourceLoader, abc=abc) + + +##### Default return values #################################################### + +def make_abc_subclasses(base_class, name=None, inst=False, **kwargs): + if name is None: + name = base_class.__name__ + base = {kind: getattr(splitabc, name) + for kind, splitabc in abc.items()} + return {cls._KIND: cls() if inst else cls + for cls in test_util.split_frozen(base_class, base, **kwargs)} + + +class ABCTestHarness: + + @property + def ins(self): + # Lazily set ins on the class. + cls = self.SPLIT[self._KIND] + ins = cls() + self.__class__.ins = ins + return ins + + +class MetaPathFinder: + + pass + + +class MetaPathFinderDefaultsTests(ABCTestHarness): + + SPLIT = make_abc_subclasses(MetaPathFinder) + + def test_invalidate_caches(self): + # Calling the method is a no-op. + self.ins.invalidate_caches() + + +(Frozen_MPFDefaultTests, + Source_MPFDefaultTests + ) = test_util.test_both(MetaPathFinderDefaultsTests) + + +class PathEntryFinder: + + pass + + +class PathEntryFinderDefaultsTests(ABCTestHarness): + + SPLIT = make_abc_subclasses(PathEntryFinder) + + def test_invalidate_caches(self): + # Should be a no-op. + self.ins.invalidate_caches() + + +(Frozen_PEFDefaultTests, + Source_PEFDefaultTests + ) = test_util.test_both(PathEntryFinderDefaultsTests) + + +class Loader: + + pass + + +class LoaderDefaultsTests(ABCTestHarness): + + SPLIT = make_abc_subclasses(Loader) + + def test_create_module(self): + spec = 'a spec' + self.assertIsNone(self.ins.create_module(spec)) + + def test_load_module(self): + with self.assertRaises(ImportError): + self.ins.load_module('something') + + def test_module_repr(self): + mod = types.ModuleType('blah') + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + original_repr = repr(mod) + mod.__loader__ = self.ins + # Should still return a proper repr. + self.assertTrue(repr(mod)) + + +(Frozen_LDefaultTests, + SourceLDefaultTests + ) = test_util.test_both(LoaderDefaultsTests) + + +class ResourceLoader(Loader): + + def get_data(self, path): + return super().get_data(path) + + +class ResourceLoaderDefaultsTests(ABCTestHarness): + + SPLIT = make_abc_subclasses(ResourceLoader) + + def test_get_data(self): + with self.assertRaises(IOError): + self.ins.get_data('/some/path') + + +(Frozen_RLDefaultTests, + Source_RLDefaultTests + ) = test_util.test_both(ResourceLoaderDefaultsTests) + + +class InspectLoader(Loader): + + def is_package(self, fullname): + return super().is_package(fullname) + + def get_source(self, fullname): + return super().get_source(fullname) + + +SPLIT_IL = make_abc_subclasses(InspectLoader) + + +class InspectLoaderDefaultsTests(ABCTestHarness): + + SPLIT = SPLIT_IL + + def test_is_package(self): + with self.assertRaises(ImportError): + self.ins.is_package('blah') + + def test_get_source(self): + with self.assertRaises(ImportError): + self.ins.get_source('blah') + + +(Frozen_ILDefaultTests, + Source_ILDefaultTests + ) = test_util.test_both(InspectLoaderDefaultsTests) + + +class ExecutionLoader(InspectLoader): + + def get_filename(self, fullname): + return super().get_filename(fullname) + + +SPLIT_EL = make_abc_subclasses(ExecutionLoader) + + +class ExecutionLoaderDefaultsTests(ABCTestHarness): + + SPLIT = SPLIT_EL + + def test_get_filename(self): + with self.assertRaises(ImportError): + self.ins.get_filename('blah') + + +(Frozen_ELDefaultTests, + Source_ELDefaultsTests + ) = test_util.test_both(InspectLoaderDefaultsTests) + + +class ResourceReader: + + def open_resource(self, *args, **kwargs): + return super().open_resource(*args, **kwargs) + + def resource_path(self, *args, **kwargs): + return super().resource_path(*args, **kwargs) + + def is_resource(self, *args, **kwargs): + return super().is_resource(*args, **kwargs) + + def contents(self, *args, **kwargs): + return super().contents(*args, **kwargs) + + +##### MetaPathFinder concrete methods ########################################## +class MetaPathFinderFindModuleTests: + + @classmethod + def finder(cls, spec): + class MetaPathSpecFinder(cls.abc.MetaPathFinder): + + def find_spec(self, fullname, path, target=None): + self.called_for = fullname, path + return spec + + return MetaPathSpecFinder() + + def test_find_spec_with_explicit_target(self): + loader = object() + spec = self.util.spec_from_loader('blah', loader) + finder = self.finder(spec) + found = finder.find_spec('blah', 'blah', None) + self.assertEqual(found, spec) + + def test_no_spec(self): + finder = self.finder(None) + path = ['a', 'b', 'c'] + name = 'blah' + found = finder.find_spec(name, path, None) + self.assertIsNone(found) + self.assertEqual(name, finder.called_for[0]) + self.assertEqual(path, finder.called_for[1]) + + def test_spec(self): + loader = object() + spec = self.util.spec_from_loader('blah', loader) + finder = self.finder(spec) + found = finder.find_spec('blah', None) + self.assertIs(found, spec) + + +(Frozen_MPFFindModuleTests, + Source_MPFFindModuleTests + ) = test_util.test_both(MetaPathFinderFindModuleTests, abc=abc, util=util) + + +##### Loader concrete methods ################################################## +class LoaderLoadModuleTests: + + def loader(self): + class SpecLoader(self.abc.Loader): + found = None + def exec_module(self, module): + self.found = module + + def is_package(self, fullname): + """Force some non-default module state to be set.""" + return True + + return SpecLoader() + + def test_fresh(self): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + loader = self.loader() + name = 'blah' + with test_util.uncache(name): + loader.load_module(name) + module = loader.found + self.assertIs(sys.modules[name], module) + self.assertEqual(loader, module.__loader__) + self.assertEqual(loader, module.__spec__.loader) + self.assertEqual(name, module.__name__) + self.assertEqual(name, module.__spec__.name) + self.assertIsNotNone(module.__path__) + self.assertIsNotNone(module.__path__, + module.__spec__.submodule_search_locations) + + def test_reload(self): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + name = 'blah' + loader = self.loader() + module = types.ModuleType(name) + module.__spec__ = self.util.spec_from_loader(name, loader) + module.__loader__ = loader + with test_util.uncache(name): + sys.modules[name] = module + loader.load_module(name) + found = loader.found + self.assertIs(found, sys.modules[name]) + self.assertIs(module, sys.modules[name]) + + +(Frozen_LoaderLoadModuleTests, + Source_LoaderLoadModuleTests + ) = test_util.test_both(LoaderLoadModuleTests, abc=abc, util=util) + + +##### InspectLoader concrete methods ########################################### +class InspectLoaderSourceToCodeTests: + + def source_to_module(self, data, path=None): + """Help with source_to_code() tests.""" + module = types.ModuleType('blah') + loader = self.InspectLoaderSubclass() + if path is None: + code = loader.source_to_code(data) + else: + code = loader.source_to_code(data, path) + exec(code, module.__dict__) + return module + + def test_source_to_code_source(self): + # Since compile() can handle strings, so should source_to_code(). + source = 'attr = 42' + module = self.source_to_module(source) + self.assertHasAttr(module, 'attr') + self.assertEqual(module.attr, 42) + + def test_source_to_code_bytes(self): + # Since compile() can handle bytes, so should source_to_code(). + source = b'attr = 42' + module = self.source_to_module(source) + self.assertHasAttr(module, 'attr') + self.assertEqual(module.attr, 42) + + def test_source_to_code_path(self): + # Specifying a path should set it for the code object. + path = 'path/to/somewhere' + loader = self.InspectLoaderSubclass() + code = loader.source_to_code('', path) + self.assertEqual(code.co_filename, path) + + def test_source_to_code_no_path(self): + # Not setting a path should still work and be set to since that + # is a pre-existing practice as a default to compile(). + loader = self.InspectLoaderSubclass() + code = loader.source_to_code('') + self.assertEqual(code.co_filename, '') + + +(Frozen_ILSourceToCodeTests, + Source_ILSourceToCodeTests + ) = test_util.test_both(InspectLoaderSourceToCodeTests, + InspectLoaderSubclass=SPLIT_IL) + + +class InspectLoaderGetCodeTests: + + def test_get_code(self): + # Test success. + module = types.ModuleType('blah') + with mock.patch.object(self.InspectLoaderSubclass, 'get_source') as mocked: + mocked.return_value = 'attr = 42' + loader = self.InspectLoaderSubclass() + code = loader.get_code('blah') + exec(code, module.__dict__) + self.assertEqual(module.attr, 42) + + def test_get_code_source_is_None(self): + # If get_source() is None then this should be None. + with mock.patch.object(self.InspectLoaderSubclass, 'get_source') as mocked: + mocked.return_value = None + loader = self.InspectLoaderSubclass() + code = loader.get_code('blah') + self.assertIsNone(code) + + def test_get_code_source_not_found(self): + # If there is no source then there is no code object. + loader = self.InspectLoaderSubclass() + with self.assertRaises(ImportError): + loader.get_code('blah') + + +(Frozen_ILGetCodeTests, + Source_ILGetCodeTests + ) = test_util.test_both(InspectLoaderGetCodeTests, + InspectLoaderSubclass=SPLIT_IL) + + +class InspectLoaderLoadModuleTests: + + """Test InspectLoader.load_module().""" + + module_name = 'blah' + + def setUp(self): + import_helper.unload(self.module_name) + self.addCleanup(import_helper.unload, self.module_name) + + def load(self, loader): + spec = self.util.spec_from_loader(self.module_name, loader) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + return self.init._bootstrap._load_unlocked(spec) + + def mock_get_code(self): + return mock.patch.object(self.InspectLoaderSubclass, 'get_code') + + def test_get_code_ImportError(self): + # If get_code() raises ImportError, it should propagate. + with self.mock_get_code() as mocked_get_code: + mocked_get_code.side_effect = ImportError + with self.assertRaises(ImportError): + loader = self.InspectLoaderSubclass() + self.load(loader) + + def test_get_code_None(self): + # If get_code() returns None, raise ImportError. + with self.mock_get_code() as mocked_get_code: + mocked_get_code.return_value = None + with self.assertRaises(ImportError): + loader = self.InspectLoaderSubclass() + self.load(loader) + + def test_module_returned(self): + # The loaded module should be returned. + code = compile('attr = 42', '', 'exec') + with self.mock_get_code() as mocked_get_code: + mocked_get_code.return_value = code + loader = self.InspectLoaderSubclass() + module = self.load(loader) + self.assertEqual(module, sys.modules[self.module_name]) + + +(Frozen_ILLoadModuleTests, + Source_ILLoadModuleTests + ) = test_util.test_both(InspectLoaderLoadModuleTests, + InspectLoaderSubclass=SPLIT_IL, + init=init, + util=util) + + +##### ExecutionLoader concrete methods ######################################### +class ExecutionLoaderGetCodeTests: + + def mock_methods(self, *, get_source=False, get_filename=False): + source_mock_context, filename_mock_context = None, None + if get_source: + source_mock_context = mock.patch.object(self.ExecutionLoaderSubclass, + 'get_source') + if get_filename: + filename_mock_context = mock.patch.object(self.ExecutionLoaderSubclass, + 'get_filename') + return source_mock_context, filename_mock_context + + def test_get_code(self): + path = 'blah.py' + source_mock_context, filename_mock_context = self.mock_methods( + get_source=True, get_filename=True) + with source_mock_context as source_mock, filename_mock_context as name_mock: + source_mock.return_value = 'attr = 42' + name_mock.return_value = path + loader = self.ExecutionLoaderSubclass() + code = loader.get_code('blah') + self.assertEqual(code.co_filename, path) + module = types.ModuleType('blah') + exec(code, module.__dict__) + self.assertEqual(module.attr, 42) + + def test_get_code_source_is_None(self): + # If get_source() is None then this should be None. + source_mock_context, _ = self.mock_methods(get_source=True) + with source_mock_context as mocked: + mocked.return_value = None + loader = self.ExecutionLoaderSubclass() + code = loader.get_code('blah') + self.assertIsNone(code) + + def test_get_code_source_not_found(self): + # If there is no source then there is no code object. + loader = self.ExecutionLoaderSubclass() + with self.assertRaises(ImportError): + loader.get_code('blah') + + def test_get_code_no_path(self): + # If get_filename() raises ImportError then simply skip setting the path + # on the code object. + source_mock_context, filename_mock_context = self.mock_methods( + get_source=True, get_filename=True) + with source_mock_context as source_mock, filename_mock_context as name_mock: + source_mock.return_value = 'attr = 42' + name_mock.side_effect = ImportError + loader = self.ExecutionLoaderSubclass() + code = loader.get_code('blah') + self.assertEqual(code.co_filename, '') + module = types.ModuleType('blah') + exec(code, module.__dict__) + self.assertEqual(module.attr, 42) + + +(Frozen_ELGetCodeTests, + Source_ELGetCodeTests + ) = test_util.test_both(ExecutionLoaderGetCodeTests, + ExecutionLoaderSubclass=SPLIT_EL) + + +##### SourceLoader concrete methods ############################################ +class SourceOnlyLoader: + + # Globals that should be defined for all modules. + source = (b"_ = '::'.join([__name__, __file__, __cached__, __package__, " + b"repr(__loader__)])") + + def __init__(self, path): + self.path = path + + def get_data(self, path): + if path != self.path: + raise IOError + return self.source + + def get_filename(self, fullname): + return self.path + + +SPLIT_SOL = make_abc_subclasses(SourceOnlyLoader, 'SourceLoader') + + +class SourceLoader(SourceOnlyLoader): + + source_mtime = 1 + + def __init__(self, path, magic=None): + super().__init__(path) + self.bytecode_path = self.util.cache_from_source(self.path) + self.source_size = len(self.source) + if magic is None: + magic = self.util.MAGIC_NUMBER + data = bytearray(magic) + data.extend(self.init._pack_uint32(0)) + data.extend(self.init._pack_uint32(self.source_mtime)) + data.extend(self.init._pack_uint32(self.source_size)) + code_object = compile(self.source, self.path, 'exec', + dont_inherit=True) + data.extend(marshal.dumps(code_object)) + self.bytecode = bytes(data) + self.written = {} + + def get_data(self, path): + if path == self.path: + return super().get_data(path) + elif path == self.bytecode_path: + return self.bytecode + else: + raise OSError + + def path_stats(self, path): + if path != self.path: + raise IOError + return {'mtime': self.source_mtime, 'size': self.source_size} + + def set_data(self, path, data): + self.written[path] = bytes(data) + return path == self.bytecode_path + + +SPLIT_SL = make_abc_subclasses(SourceLoader, util=util, init=init) + + +class SourceLoaderTestHarness: + + def setUp(self, *, is_package=True, **kwargs): + self.package = 'pkg' + if is_package: + self.path = os.path.join(self.package, '__init__.py') + self.name = self.package + else: + module_name = 'mod' + self.path = os.path.join(self.package, '.'.join(['mod', 'py'])) + self.name = '.'.join([self.package, module_name]) + self.cached = self.util.cache_from_source(self.path) + self.loader = self.loader_mock(self.path, **kwargs) + + def verify_module(self, module): + self.assertEqual(module.__name__, self.name) + self.assertEqual(module.__file__, self.path) + self.assertEqual(module.__cached__, self.cached) + self.assertEqual(module.__package__, self.package) + self.assertEqual(module.__loader__, self.loader) + values = module._.split('::') + self.assertEqual(values[0], self.name) + self.assertEqual(values[1], self.path) + self.assertEqual(values[2], self.cached) + self.assertEqual(values[3], self.package) + self.assertEqual(values[4], repr(self.loader)) + + def verify_code(self, code_object): + module = types.ModuleType(self.name) + module.__file__ = self.path + module.__cached__ = self.cached + module.__package__ = self.package + module.__loader__ = self.loader + module.__path__ = [] + exec(code_object, module.__dict__) + self.verify_module(module) + + +class SourceOnlyLoaderTests(SourceLoaderTestHarness): + """Test importlib.abc.SourceLoader for source-only loading.""" + + def test_get_source(self): + # Verify the source code is returned as a string. + # If an OSError is raised by get_data then raise ImportError. + expected_source = self.loader.source.decode('utf-8') + self.assertEqual(self.loader.get_source(self.name), expected_source) + def raise_OSError(path): + raise OSError + self.loader.get_data = raise_OSError + with self.assertRaises(ImportError) as cm: + self.loader.get_source(self.name) + self.assertEqual(cm.exception.name, self.name) + + def test_is_package(self): + # Properly detect when loading a package. + self.setUp(is_package=False) + self.assertFalse(self.loader.is_package(self.name)) + self.setUp(is_package=True) + self.assertTrue(self.loader.is_package(self.name)) + self.assertFalse(self.loader.is_package(self.name + '.__init__')) + + def test_get_code(self): + # Verify the code object is created. + code_object = self.loader.get_code(self.name) + self.verify_code(code_object) + + def test_source_to_code(self): + # Verify the compiled code object. + code = self.loader.source_to_code(self.loader.source, self.path) + self.verify_code(code) + + def test_load_module(self): + # Loading a module should set __name__, __loader__, __package__, + # __path__ (for packages), __file__, and __cached__. + # The module should also be put into sys.modules. + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ImportWarning) + with test_util.uncache(self.name): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + module = self.loader.load_module(self.name) + self.verify_module(module) + self.assertEqual(module.__path__, [os.path.dirname(self.path)]) + self.assertIn(self.name, sys.modules) + + def test_package_settings(self): + # __package__ needs to be set, while __path__ is set on if the module + # is a package. + # Testing the values for a package are covered by test_load_module. + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ImportWarning) + self.setUp(is_package=False) + with test_util.uncache(self.name): + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + module = self.loader.load_module(self.name) + self.verify_module(module) + self.assertNotHasAttr(module, '__path__') + + def test_get_source_encoding(self): + # Source is considered encoded in UTF-8 by default unless otherwise + # specified by an encoding line. + source = "_ = 'ü'" + self.loader.source = source.encode('utf-8') + returned_source = self.loader.get_source(self.name) + self.assertEqual(returned_source, source) + source = "# coding: latin-1\n_ = ü" + self.loader.source = source.encode('latin-1') + returned_source = self.loader.get_source(self.name) + self.assertEqual(returned_source, source) + + +(Frozen_SourceOnlyLoaderTests, + Source_SourceOnlyLoaderTests + ) = test_util.test_both(SourceOnlyLoaderTests, util=util, + loader_mock=SPLIT_SOL) + + +@unittest.skipIf(sys.dont_write_bytecode, "sys.dont_write_bytecode is true") +class SourceLoaderBytecodeTests(SourceLoaderTestHarness): + + """Test importlib.abc.SourceLoader's use of bytecode. + + Source-only testing handled by SourceOnlyLoaderTests. + + """ + + def verify_code(self, code_object, *, bytecode_written=False): + super().verify_code(code_object) + if bytecode_written: + self.assertIn(self.cached, self.loader.written) + data = bytearray(self.util.MAGIC_NUMBER) + data.extend(self.init._pack_uint32(0)) + data.extend(self.init._pack_uint32(self.loader.source_mtime)) + data.extend(self.init._pack_uint32(self.loader.source_size)) + # Make sure there's > 1 reference to code_object so that the + # marshaled representation below matches the cached representation + l = [code_object] + data.extend(marshal.dumps(code_object)) + self.assertEqual(self.loader.written[self.cached], bytes(data)) + + def test_code_with_everything(self): + # When everything should work. + code_object = self.loader.get_code(self.name) + self.verify_code(code_object) + + def test_no_bytecode(self): + # If no bytecode exists then move on to the source. + self.loader.bytecode_path = "" + # Sanity check + with self.assertRaises(OSError): + bytecode_path = self.util.cache_from_source(self.path) + self.loader.get_data(bytecode_path) + code_object = self.loader.get_code(self.name) + self.verify_code(code_object, bytecode_written=True) + + def test_code_bad_timestamp(self): + # Bytecode is only used when the timestamp matches the source EXACTLY. + for source_mtime in (0, 2): + assert source_mtime != self.loader.source_mtime + original = self.loader.source_mtime + self.loader.source_mtime = source_mtime + # If bytecode is used then EOFError would be raised by marshal. + self.loader.bytecode = self.loader.bytecode[8:] + code_object = self.loader.get_code(self.name) + self.verify_code(code_object, bytecode_written=True) + self.loader.source_mtime = original + + def test_code_bad_magic(self): + # Skip over bytecode with a bad magic number. + self.setUp(magic=b'0000') + # If bytecode is used then EOFError would be raised by marshal. + self.loader.bytecode = self.loader.bytecode[8:] + code_object = self.loader.get_code(self.name) + self.verify_code(code_object, bytecode_written=True) + + def test_dont_write_bytecode(self): + # Bytecode is not written if sys.dont_write_bytecode is true. + # Can assume it is false already thanks to the skipIf class decorator. + try: + sys.dont_write_bytecode = True + self.loader.bytecode_path = "" + code_object = self.loader.get_code(self.name) + self.assertNotIn(self.cached, self.loader.written) + finally: + sys.dont_write_bytecode = False + + def test_no_set_data(self): + # If set_data is not defined, one can still read bytecode. + self.setUp(magic=b'0000') + original_set_data = self.loader.__class__.mro()[1].set_data + try: + del self.loader.__class__.mro()[1].set_data + code_object = self.loader.get_code(self.name) + self.verify_code(code_object) + finally: + self.loader.__class__.mro()[1].set_data = original_set_data + + def test_set_data_raises_exceptions(self): + # Raising NotImplementedError or OSError is okay for set_data. + def raise_exception(exc): + def closure(*args, **kwargs): + raise exc + return closure + + self.setUp(magic=b'0000') + self.loader.set_data = raise_exception(NotImplementedError) + code_object = self.loader.get_code(self.name) + self.verify_code(code_object) + + +(Frozen_SLBytecodeTests, + SourceSLBytecodeTests + ) = test_util.test_both(SourceLoaderBytecodeTests, init=init, util=util, + loader_mock=SPLIT_SL) + + +class SourceLoaderGetSourceTests: + + """Tests for importlib.abc.SourceLoader.get_source().""" + + def test_default_encoding(self): + # Should have no problems with UTF-8 text. + name = 'mod' + mock = self.SourceOnlyLoaderMock('mod.file') + source = 'x = "ü"' + mock.source = source.encode('utf-8') + returned_source = mock.get_source(name) + self.assertEqual(returned_source, source) + + def test_decoded_source(self): + # Decoding should work. + name = 'mod' + mock = self.SourceOnlyLoaderMock("mod.file") + source = "# coding: Latin-1\nx='ü'" + assert source.encode('latin-1') != source.encode('utf-8') + mock.source = source.encode('latin-1') + returned_source = mock.get_source(name) + self.assertEqual(returned_source, source) + + def test_universal_newlines(self): + # PEP 302 says universal newlines should be used. + name = 'mod' + mock = self.SourceOnlyLoaderMock('mod.file') + source = "x = 42\r\ny = -13\r\n" + mock.source = source.encode('utf-8') + expect = io.IncrementalNewlineDecoder(None, True).decode(source) + self.assertEqual(mock.get_source(name), expect) + + +(Frozen_SourceOnlyLoaderGetSourceTests, + Source_SourceOnlyLoaderGetSourceTests + ) = test_util.test_both(SourceLoaderGetSourceTests, + SourceOnlyLoaderMock=SPLIT_SOL) + + +class SourceLoaderDeprecationWarningsTests(unittest.TestCase): + """Tests SourceLoader deprecation warnings.""" + + def test_deprecated_path_mtime(self): + from importlib.abc import SourceLoader + class DummySourceLoader(SourceLoader): + def get_data(self, path): + return b'' + + def get_filename(self, fullname): + return 'foo.py' + + def path_stats(self, path): + return {'mtime': 1} + + loader = DummySourceLoader() + + with self.assertWarnsRegex( + DeprecationWarning, + r"SourceLoader\.path_mtime is deprecated in favour of " + r"SourceLoader\.path_stats\(\)\." + ): + loader.path_mtime('foo.py') + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/test_api.py b/stdlib/test/test_importlib/test_api.py new file mode 100644 index 000000000..1bc531a2f --- /dev/null +++ b/stdlib/test/test_importlib/test_api.py @@ -0,0 +1,508 @@ +from test.test_importlib import util as test_util + +init = test_util.import_importlib('importlib') +util = test_util.import_importlib('importlib.util') +machinery = test_util.import_importlib('importlib.machinery') + +import os.path +import sys +from test import support +from test.support import import_helper +from test.support import os_helper +import traceback +import types +import unittest + + +class ImportModuleTests: + + """Test importlib.import_module.""" + + def test_module_import(self): + # Test importing a top-level module. + with test_util.mock_spec('top_level') as mock: + with test_util.import_state(meta_path=[mock]): + module = self.init.import_module('top_level') + self.assertEqual(module.__name__, 'top_level') + + def test_absolute_package_import(self): + # Test importing a module from a package with an absolute name. + pkg_name = 'pkg' + pkg_long_name = '{0}.__init__'.format(pkg_name) + name = '{0}.mod'.format(pkg_name) + with test_util.mock_spec(pkg_long_name, name) as mock: + with test_util.import_state(meta_path=[mock]): + module = self.init.import_module(name) + self.assertEqual(module.__name__, name) + + def test_shallow_relative_package_import(self): + # Test importing a module from a package through a relative import. + pkg_name = 'pkg' + pkg_long_name = '{0}.__init__'.format(pkg_name) + module_name = 'mod' + absolute_name = '{0}.{1}'.format(pkg_name, module_name) + relative_name = '.{0}'.format(module_name) + with test_util.mock_spec(pkg_long_name, absolute_name) as mock: + with test_util.import_state(meta_path=[mock]): + self.init.import_module(pkg_name) + module = self.init.import_module(relative_name, pkg_name) + self.assertEqual(module.__name__, absolute_name) + + def test_deep_relative_package_import(self): + modules = ['a.__init__', 'a.b.__init__', 'a.c'] + with test_util.mock_spec(*modules) as mock: + with test_util.import_state(meta_path=[mock]): + self.init.import_module('a') + self.init.import_module('a.b') + module = self.init.import_module('..c', 'a.b') + self.assertEqual(module.__name__, 'a.c') + + def test_absolute_import_with_package(self): + # Test importing a module from a package with an absolute name with + # the 'package' argument given. + pkg_name = 'pkg' + pkg_long_name = '{0}.__init__'.format(pkg_name) + name = '{0}.mod'.format(pkg_name) + with test_util.mock_spec(pkg_long_name, name) as mock: + with test_util.import_state(meta_path=[mock]): + self.init.import_module(pkg_name) + module = self.init.import_module(name, pkg_name) + self.assertEqual(module.__name__, name) + + def test_relative_import_wo_package(self): + # Relative imports cannot happen without the 'package' argument being + # set. + with self.assertRaises(TypeError): + self.init.import_module('.support') + + + def test_loaded_once(self): + # Issue #13591: Modules should only be loaded once when + # initializing the parent package attempts to import the + # module currently being imported. + b_load_count = 0 + def load_a(): + self.init.import_module('a.b') + def load_b(): + nonlocal b_load_count + b_load_count += 1 + code = {'a': load_a, 'a.b': load_b} + modules = ['a.__init__', 'a.b'] + with test_util.mock_spec(*modules, module_code=code) as mock: + with test_util.import_state(meta_path=[mock]): + self.init.import_module('a.b') + self.assertEqual(b_load_count, 1) + + +(Frozen_ImportModuleTests, + Source_ImportModuleTests + ) = test_util.test_both( + ImportModuleTests, init=init, util=util, machinery=machinery) + + +class FindLoaderTests: + + FakeMetaFinder = None + + def test_sys_modules(self): + # If a module with __spec__.loader is in sys.modules, then return it. + name = 'some_mod' + with test_util.uncache(name): + module = types.ModuleType(name) + loader = 'a loader!' + module.__spec__ = self.machinery.ModuleSpec(name, loader) + sys.modules[name] = module + spec = self.util.find_spec(name) + self.assertIsNotNone(spec) + self.assertEqual(spec.loader, loader) + + def test_sys_modules_loader_is_None(self): + # If sys.modules[name].__spec__.loader is None, raise ValueError. + name = 'some_mod' + with test_util.uncache(name): + module = types.ModuleType(name) + module.__loader__ = None + sys.modules[name] = module + with self.assertRaises(ValueError): + self.util.find_spec(name) + + def test_sys_modules_loader_is_not_set(self): + # Should raise ValueError + # Issue #17099 + name = 'some_mod' + with test_util.uncache(name): + module = types.ModuleType(name) + try: + del module.__spec__.loader + except AttributeError: + pass + sys.modules[name] = module + with self.assertRaises(ValueError): + self.util.find_spec(name) + + def test_success(self): + # Return the loader found on sys.meta_path. + name = 'some_mod' + with test_util.uncache(name): + with test_util.import_state(meta_path=[self.FakeMetaFinder]): + spec = self.util.find_spec(name) + self.assertEqual((name, (name, None)), (spec.name, spec.loader)) + + def test_success_path(self): + # Searching on a path should work. + name = 'some_mod' + path = 'path to some place' + with test_util.uncache(name): + with test_util.import_state(meta_path=[self.FakeMetaFinder]): + spec = self.util.find_spec(name, path) + self.assertEqual(name, spec.name) + + def test_nothing(self): + # None is returned upon failure to find a loader. + self.assertIsNone(self.util.find_spec('nevergoingtofindthismodule')) + + +class FindLoaderPEP451Tests(FindLoaderTests): + + class FakeMetaFinder: + @staticmethod + def find_spec(name, path=None, target=None): + return machinery['Source'].ModuleSpec(name, (name, path)) + + +(Frozen_FindLoaderPEP451Tests, + Source_FindLoaderPEP451Tests + ) = test_util.test_both( + FindLoaderPEP451Tests, init=init, util=util, machinery=machinery) + + +class ReloadTests: + + def test_reload_modules(self): + for mod in ('tokenize', 'time', 'marshal'): + with self.subTest(module=mod): + with import_helper.CleanImport(mod): + module = self.init.import_module(mod) + self.init.reload(module) + + def test_module_replaced(self): + def code(): + import sys + module = type(sys)('top_level') + module.spam = 3 + sys.modules['top_level'] = module + mock = test_util.mock_spec('top_level', + module_code={'top_level': code}) + with mock: + with test_util.import_state(meta_path=[mock]): + module = self.init.import_module('top_level') + reloaded = self.init.reload(module) + actual = sys.modules['top_level'] + self.assertEqual(actual.spam, 3) + self.assertEqual(reloaded.spam, 3) + + def test_reload_missing_loader(self): + with import_helper.CleanImport('types'): + import types + loader = types.__loader__ + del types.__loader__ + reloaded = self.init.reload(types) + + self.assertIs(reloaded, types) + self.assertIs(sys.modules['types'], types) + self.assertEqual(reloaded.__loader__.path, loader.path) + + def test_reload_loader_replaced(self): + with import_helper.CleanImport('types'): + import types + types.__loader__ = None + self.init.invalidate_caches() + reloaded = self.init.reload(types) + + self.assertIsNot(reloaded.__loader__, None) + self.assertIs(reloaded, types) + self.assertIs(sys.modules['types'], types) + + def test_reload_location_changed(self): + name = 'spam' + with os_helper.temp_cwd(None) as cwd: + with test_util.uncache('spam'): + with import_helper.DirsOnSysPath(cwd): + # Start as a plain module. + self.init.invalidate_caches() + path = os.path.join(cwd, name + '.py') + cached = self.util.cache_from_source(path) + expected = {'__name__': name, + '__package__': '', + '__file__': path, + '__cached__': cached, + '__doc__': None, + } + os_helper.create_empty_file(path) + module = self.init.import_module(name) + ns = vars(module).copy() + loader = ns.pop('__loader__') + spec = ns.pop('__spec__') + ns.pop('__builtins__', None) # An implementation detail. + self.assertEqual(spec.name, name) + self.assertEqual(spec.loader, loader) + self.assertEqual(loader.path, path) + self.assertEqual(ns, expected) + + # Change to a package. + self.init.invalidate_caches() + init_path = os.path.join(cwd, name, '__init__.py') + cached = self.util.cache_from_source(init_path) + expected = {'__name__': name, + '__package__': name, + '__file__': init_path, + '__cached__': cached, + '__path__': [os.path.dirname(init_path)], + '__doc__': None, + } + os.mkdir(name) + os.rename(path, init_path) + reloaded = self.init.reload(module) + ns = vars(reloaded).copy() + loader = ns.pop('__loader__') + spec = ns.pop('__spec__') + ns.pop('__builtins__', None) # An implementation detail. + self.assertEqual(spec.name, name) + self.assertEqual(spec.loader, loader) + self.assertIs(reloaded, module) + self.assertEqual(loader.path, init_path) + self.maxDiff = None + self.assertEqual(ns, expected) + + def test_reload_namespace_changed(self): + name = 'spam' + with os_helper.temp_cwd(None) as cwd: + with test_util.uncache('spam'): + with test_util.import_state(path=[cwd]): + self.init._bootstrap_external._install(self.init._bootstrap) + # Start as a namespace package. + self.init.invalidate_caches() + bad_path = os.path.join(cwd, name, '__init.py') + cached = self.util.cache_from_source(bad_path) + expected = {'__name__': name, + '__package__': name, + '__doc__': None, + '__file__': None, + } + os.mkdir(name) + with open(bad_path, 'w', encoding='utf-8') as init_file: + init_file.write('eggs = None') + module = self.init.import_module(name) + ns = vars(module).copy() + loader = ns.pop('__loader__') + path = ns.pop('__path__') + spec = ns.pop('__spec__') + ns.pop('__builtins__', None) # An implementation detail. + self.assertEqual(spec.name, name) + self.assertIsNotNone(spec.loader) + self.assertIsNotNone(loader) + self.assertEqual(spec.loader, loader) + self.assertEqual(set(path), + set([os.path.dirname(bad_path)])) + with self.assertRaises(AttributeError): + # a NamespaceLoader + loader.path + self.assertEqual(ns, expected) + + # Change to a regular package. + self.init.invalidate_caches() + init_path = os.path.join(cwd, name, '__init__.py') + cached = self.util.cache_from_source(init_path) + expected = {'__name__': name, + '__package__': name, + '__file__': init_path, + '__cached__': cached, + '__path__': [os.path.dirname(init_path)], + '__doc__': None, + 'eggs': None, + } + os.rename(bad_path, init_path) + reloaded = self.init.reload(module) + ns = vars(reloaded).copy() + loader = ns.pop('__loader__') + spec = ns.pop('__spec__') + ns.pop('__builtins__', None) # An implementation detail. + self.assertEqual(spec.name, name) + self.assertEqual(spec.loader, loader) + self.assertIs(reloaded, module) + self.assertEqual(loader.path, init_path) + self.assertEqual(ns, expected) + + def test_reload_submodule(self): + # See #19851. + name = 'spam' + subname = 'ham' + with test_util.temp_module(name, pkg=True) as pkg_dir: + fullname, _ = test_util.submodule(name, subname, pkg_dir) + ham = self.init.import_module(fullname) + reloaded = self.init.reload(ham) + self.assertIs(reloaded, ham) + + def test_module_missing_spec(self): + #Test that reload() throws ModuleNotFounderror when reloading + # a module whose missing a spec. (bpo-29851) + name = 'spam' + with test_util.uncache(name): + module = sys.modules[name] = types.ModuleType(name) + # Sanity check by attempting an import. + module = self.init.import_module(name) + self.assertIsNone(module.__spec__) + with self.assertRaises(ModuleNotFoundError): + self.init.reload(module) + + def test_reload_traceback_with_non_str(self): + # gh-125519 + with support.captured_stdout() as stdout: + try: + self.init.reload("typing") + except TypeError as exc: + traceback.print_exception(exc, file=stdout) + else: + self.fail("Expected TypeError to be raised") + printed_traceback = stdout.getvalue() + self.assertIn("TypeError", printed_traceback) + self.assertNotIn("AttributeError", printed_traceback) + self.assertNotIn("module.__spec__.name", printed_traceback) + + +(Frozen_ReloadTests, + Source_ReloadTests + ) = test_util.test_both( + ReloadTests, init=init, util=util, machinery=machinery) + + +class InvalidateCacheTests: + + def test_method_called(self): + # If defined the method should be called. + class InvalidatingNullFinder: + def __init__(self, *ignored): + self.called = False + def invalidate_caches(self): + self.called = True + + key = os.path.abspath('gobledeegook') + meta_ins = InvalidatingNullFinder() + path_ins = InvalidatingNullFinder() + sys.meta_path.insert(0, meta_ins) + self.addCleanup(lambda: sys.path_importer_cache.__delitem__(key)) + sys.path_importer_cache[key] = path_ins + self.addCleanup(lambda: sys.meta_path.remove(meta_ins)) + self.init.invalidate_caches() + self.assertTrue(meta_ins.called) + self.assertTrue(path_ins.called) + + def test_method_lacking(self): + # There should be no issues if the method is not defined. + key = 'gobbledeegook' + sys.path_importer_cache[key] = None + self.addCleanup(lambda: sys.path_importer_cache.pop(key, None)) + self.init.invalidate_caches() # Shouldn't trigger an exception. + + +(Frozen_InvalidateCacheTests, + Source_InvalidateCacheTests + ) = test_util.test_both( + InvalidateCacheTests, init=init, util=util, machinery=machinery) + + +class FrozenImportlibTests(unittest.TestCase): + + def test_no_frozen_importlib(self): + # Should be able to import w/o _frozen_importlib being defined. + # Can't do an isinstance() check since separate copies of importlib + # may have been used for import, so just check the name is not for the + # frozen loader. + source_init = init['Source'] + self.assertNotEqual(source_init.__loader__.__class__.__name__, + 'FrozenImporter') + + +class StartupTests: + + def test_everyone_has___loader__(self): + # Issue #17098: all modules should have __loader__ defined. + for name, module in sys.modules.items(): + if isinstance(module, types.ModuleType): + with self.subTest(name=name): + self.assertHasAttr(module, '__loader__') + if self.machinery.BuiltinImporter.find_spec(name): + self.assertIsNot(module.__loader__, None) + elif self.machinery.FrozenImporter.find_spec(name): + self.assertIsNot(module.__loader__, None) + + def test_everyone_has___spec__(self): + for name, module in sys.modules.items(): + if isinstance(module, types.ModuleType): + with self.subTest(name=name): + self.assertHasAttr(module, '__spec__') + if self.machinery.BuiltinImporter.find_spec(name): + self.assertIsNot(module.__spec__, None) + elif self.machinery.FrozenImporter.find_spec(name): + self.assertIsNot(module.__spec__, None) + + +(Frozen_StartupTests, + Source_StartupTests + ) = test_util.test_both(StartupTests, machinery=machinery) + + +class TestModuleAll(unittest.TestCase): + def test_machinery(self): + extra = ( + # from importlib._bootstrap and importlib._bootstrap_external + 'AppleFrameworkLoader', + 'BYTECODE_SUFFIXES', + 'BuiltinImporter', + 'DEBUG_BYTECODE_SUFFIXES', + 'EXTENSION_SUFFIXES', + 'ExtensionFileLoader', + 'FileFinder', + 'FrozenImporter', + 'ModuleSpec', + 'NamespaceLoader', + 'OPTIMIZED_BYTECODE_SUFFIXES', + 'PathFinder', + 'SOURCE_SUFFIXES', + 'SourceFileLoader', + 'SourcelessFileLoader', + 'WindowsRegistryFinder', + ) + support.check__all__(self, machinery['Source'], extra=extra) + + def test_util(self): + extra = ( + # from importlib.abc, importlib._bootstrap + # and importlib._bootstrap_external + 'Loader', + 'MAGIC_NUMBER', + 'cache_from_source', + 'decode_source', + 'module_from_spec', + 'source_from_cache', + 'spec_from_file_location', + 'spec_from_loader', + ) + support.check__all__(self, util['Source'], extra=extra) + + +class TestDeprecations(unittest.TestCase): + def test_machinery_deprecated_attributes(self): + from importlib import machinery + attributes = ( + 'DEBUG_BYTECODE_SUFFIXES', + 'OPTIMIZED_BYTECODE_SUFFIXES', + ) + for attr in attributes: + with self.subTest(attr=attr): + with self.assertWarns(DeprecationWarning): + getattr(machinery, attr) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/test_lazy.py b/stdlib/test/test_importlib/test_lazy.py new file mode 100644 index 000000000..e48fad889 --- /dev/null +++ b/stdlib/test/test_importlib/test_lazy.py @@ -0,0 +1,229 @@ +import importlib +from importlib import abc +from importlib import util +import sys +import time +import threading +import types +import unittest + +from test.support import threading_helper +from test.test_importlib import util as test_util + + +class CollectInit: + + def __init__(self, *args, **kwargs): + self.args = args + self.kwargs = kwargs + + def exec_module(self, module): + return self + + +class LazyLoaderFactoryTests(unittest.TestCase): + + def test_init(self): + factory = util.LazyLoader.factory(CollectInit) + # E.g. what importlib.machinery.FileFinder instantiates loaders with + # plus keyword arguments. + lazy_loader = factory('module name', 'module path', kw='kw') + loader = lazy_loader.loader + self.assertEqual(('module name', 'module path'), loader.args) + self.assertEqual({'kw': 'kw'}, loader.kwargs) + + def test_validation(self): + # No exec_module(), no lazy loading. + with self.assertRaises(TypeError): + util.LazyLoader.factory(object) + + +class TestingImporter(abc.MetaPathFinder, abc.Loader): + + module_name = 'lazy_loader_test' + mutated_name = 'changed' + loaded = None + load_count = 0 + source_code = 'attr = 42; __name__ = {!r}'.format(mutated_name) + + def find_spec(self, name, path, target=None): + if name != self.module_name: + return None + return util.spec_from_loader(name, util.LazyLoader(self)) + + def exec_module(self, module): + time.sleep(0.01) # Simulate a slow load. + exec(self.source_code, module.__dict__) + self.loaded = module + self.load_count += 1 + + +class LazyLoaderTests(unittest.TestCase): + + def test_init(self): + with self.assertRaises(TypeError): + # Classes that don't define exec_module() trigger TypeError. + util.LazyLoader(object) + + def new_module(self, source_code=None, loader=None): + if loader is None: + loader = TestingImporter() + if source_code is not None: + loader.source_code = source_code + spec = util.spec_from_loader(TestingImporter.module_name, + util.LazyLoader(loader)) + module = spec.loader.create_module(spec) + if module is None: + module = types.ModuleType(TestingImporter.module_name) + module.__spec__ = spec + module.__loader__ = spec.loader + spec.loader.exec_module(module) + # Module is now lazy. + self.assertIsNone(loader.loaded) + return module + + def test_e2e(self): + # End-to-end test to verify the load is in fact lazy. + importer = TestingImporter() + assert importer.loaded is None + with test_util.uncache(importer.module_name): + with test_util.import_state(meta_path=[importer]): + module = importlib.import_module(importer.module_name) + self.assertIsNone(importer.loaded) + # Trigger load. + self.assertEqual(module.__loader__, importer) + self.assertIsNotNone(importer.loaded) + self.assertEqual(module, importer.loaded) + + def test_attr_unchanged(self): + # An attribute only mutated as a side-effect of import should not be + # changed needlessly. + module = self.new_module() + self.assertEqual(TestingImporter.mutated_name, module.__name__) + + def test_new_attr(self): + # A new attribute should persist. + module = self.new_module() + module.new_attr = 42 + self.assertEqual(42, module.new_attr) + + def test_mutated_preexisting_attr(self): + # Changing an attribute that already existed on the module -- + # e.g. __name__ -- should persist. + module = self.new_module() + module.__name__ = 'bogus' + self.assertEqual('bogus', module.__name__) + + def test_mutated_attr(self): + # Changing an attribute that comes into existence after an import + # should persist. + module = self.new_module() + module.attr = 6 + self.assertEqual(6, module.attr) + + def test_delete_eventual_attr(self): + # Deleting an attribute should stay deleted. + module = self.new_module() + del module.attr + self.assertNotHasAttr(module, 'attr') + + def test_delete_preexisting_attr(self): + module = self.new_module() + del module.__name__ + self.assertNotHasAttr(module, '__name__') + + def test_module_substitution_error(self): + with test_util.uncache(TestingImporter.module_name): + fresh_module = types.ModuleType(TestingImporter.module_name) + sys.modules[TestingImporter.module_name] = fresh_module + module = self.new_module() + with self.assertRaisesRegex(ValueError, "substituted"): + module.__name__ + + def test_module_already_in_sys(self): + with test_util.uncache(TestingImporter.module_name): + module = self.new_module() + sys.modules[TestingImporter.module_name] = module + # Force the load; just care that no exception is raised. + module.__name__ + + @threading_helper.requires_working_threading() + def test_module_load_race(self): + with test_util.uncache(TestingImporter.module_name): + loader = TestingImporter() + module = self.new_module(loader=loader) + self.assertEqual(loader.load_count, 0) + + class RaisingThread(threading.Thread): + exc = None + def run(self): + try: + super().run() + except Exception as exc: + self.exc = exc + + def access_module(): + return module.attr + + threads = [] + for _ in range(2): + threads.append(thread := RaisingThread(target=access_module)) + thread.start() + + # Races could cause errors + for thread in threads: + thread.join() + self.assertIsNone(thread.exc) + + # Or multiple load attempts + self.assertEqual(loader.load_count, 1) + + def test_lazy_self_referential_modules(self): + # Directory modules with submodules that reference the parent can attempt to access + # the parent module during a load. Verify that this common pattern works with lazy loading. + # json is a good example in the stdlib. + json_modules = [name for name in sys.modules if name.startswith('json')] + with test_util.uncache(*json_modules): + # Standard lazy loading, unwrapped + spec = util.find_spec('json') + loader = util.LazyLoader(spec.loader) + spec.loader = loader + module = util.module_from_spec(spec) + sys.modules['json'] = module + loader.exec_module(module) + + # Trigger load with attribute lookup, ensure expected behavior + test_load = module.loads('{}') + self.assertEqual(test_load, {}) + + def test_lazy_module_type_override(self): + # Verify that lazy loading works with a module that modifies + # its __class__ to be a custom type. + + # Example module from PEP 726 + module = self.new_module(source_code="""\ +import sys +from types import ModuleType + +CONSTANT = 3.14 + +class ImmutableModule(ModuleType): + def __setattr__(self, name, value): + raise AttributeError('Read-only attribute!') + + def __delattr__(self, name): + raise AttributeError('Read-only attribute!') + +sys.modules[__name__].__class__ = ImmutableModule +""") + sys.modules[TestingImporter.module_name] = module + self.assertIsInstance(module, util._LazyModule) + self.assertEqual(module.CONSTANT, 3.14) + with self.assertRaises(AttributeError): + module.CONSTANT = 2.71 + with self.assertRaises(AttributeError): + del module.CONSTANT + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/test_locks.py b/stdlib/test/test_importlib/test_locks.py new file mode 100644 index 000000000..655e5881a --- /dev/null +++ b/stdlib/test/test_importlib/test_locks.py @@ -0,0 +1,161 @@ +from test.test_importlib import util as test_util + +init = test_util.import_importlib('importlib') + +import sys +import threading +import unittest +import weakref + +from test import support +from test.support import threading_helper +from test import lock_tests + + +threading_helper.requires_working_threading(module=True) + + +class ModuleLockAsRLockTests: + locktype = classmethod(lambda cls: cls.LockType("some_lock")) + + # _is_owned() unsupported + test__is_owned = None + # acquire(blocking=False) unsupported + test_try_acquire = None + test_try_acquire_contended = None + # `with` unsupported + test_with = None + # acquire(timeout=...) unsupported + test_timeout = None + # _release_save() unsupported + test_release_save_unacquired = None + # _recursion_count() unsupported + test_recursion_count = None + # lock status in repr unsupported + test_repr = None + test_locked_repr = None + test_repr_count = None + + def tearDown(self): + for splitinit in init.values(): + splitinit._bootstrap._blocking_on.clear() + + +LOCK_TYPES = {kind: splitinit._bootstrap._ModuleLock + for kind, splitinit in init.items()} + +(Frozen_ModuleLockAsRLockTests, + Source_ModuleLockAsRLockTests + ) = test_util.test_both(ModuleLockAsRLockTests, lock_tests.RLockTests, + LockType=LOCK_TYPES) + + +class DeadlockAvoidanceTests: + + def setUp(self): + try: + self.old_switchinterval = sys.getswitchinterval() + support.setswitchinterval(0.000001) + except AttributeError: + self.old_switchinterval = None + + def tearDown(self): + if self.old_switchinterval is not None: + sys.setswitchinterval(self.old_switchinterval) + + def run_deadlock_avoidance_test(self, create_deadlock): + NLOCKS = 10 + locks = [self.LockType(str(i)) for i in range(NLOCKS)] + pairs = [(locks[i], locks[(i+1)%NLOCKS]) for i in range(NLOCKS)] + if create_deadlock: + NTHREADS = NLOCKS + else: + NTHREADS = NLOCKS - 1 + barrier = threading.Barrier(NTHREADS) + results = [] + + def _acquire(lock): + """Try to acquire the lock. Return True on success, + False on deadlock.""" + try: + lock.acquire() + except self.DeadlockError: + return False + else: + return True + + def f(): + a, b = pairs.pop() + ra = _acquire(a) + barrier.wait() + rb = _acquire(b) + results.append((ra, rb)) + if rb: + b.release() + if ra: + a.release() + with lock_tests.Bunch(f, NTHREADS): + pass + self.assertEqual(len(results), NTHREADS) + return results + + def test_deadlock(self): + results = self.run_deadlock_avoidance_test(True) + # At least one of the threads detected a potential deadlock on its + # second acquire() call. It may be several of them, because the + # deadlock avoidance mechanism is conservative. + nb_deadlocks = results.count((True, False)) + self.assertGreaterEqual(nb_deadlocks, 1) + self.assertEqual(results.count((True, True)), len(results) - nb_deadlocks) + + def test_no_deadlock(self): + results = self.run_deadlock_avoidance_test(False) + self.assertEqual(results.count((True, False)), 0) + self.assertEqual(results.count((True, True)), len(results)) + + +DEADLOCK_ERRORS = {kind: splitinit._bootstrap._DeadlockError + for kind, splitinit in init.items()} + +(Frozen_DeadlockAvoidanceTests, + Source_DeadlockAvoidanceTests + ) = test_util.test_both(DeadlockAvoidanceTests, + LockType=LOCK_TYPES, + DeadlockError=DEADLOCK_ERRORS) + + +class LifetimeTests: + + @property + def bootstrap(self): + return self.init._bootstrap + + def test_lock_lifetime(self): + name = "xyzzy" + self.assertNotIn(name, self.bootstrap._module_locks) + lock = self.bootstrap._get_module_lock(name) + self.assertIn(name, self.bootstrap._module_locks) + wr = weakref.ref(lock) + del lock + support.gc_collect() + self.assertNotIn(name, self.bootstrap._module_locks) + self.assertIsNone(wr()) + + def test_all_locks(self): + support.gc_collect() + self.assertEqual(0, len(self.bootstrap._module_locks), + self.bootstrap._module_locks) + + +(Frozen_LifetimeTests, + Source_LifetimeTests + ) = test_util.test_both(LifetimeTests, init=init) + + +def setUpModule(): + thread_info = threading_helper.threading_setup() + unittest.addModuleCleanup(threading_helper.threading_cleanup, *thread_info) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/test_namespace_pkgs.py b/stdlib/test/test_importlib/test_namespace_pkgs.py new file mode 100644 index 000000000..6ca0978f9 --- /dev/null +++ b/stdlib/test/test_importlib/test_namespace_pkgs.py @@ -0,0 +1,379 @@ +import contextlib +import importlib +import importlib.abc +import importlib.machinery +import os +import sys +import tempfile +import unittest + +from test.test_importlib import util + +# needed tests: +# +# need to test when nested, so that the top-level path isn't sys.path +# need to test dynamic path detection, both at top-level and nested +# with dynamic path, check when a loader is returned on path reload (that is, +# trying to switch from a namespace package to a regular package) + + +@contextlib.contextmanager +def sys_modules_context(): + """ + Make sure sys.modules is the same object and has the same content + when exiting the context as when entering. + + Similar to importlib.test.util.uncache, but doesn't require explicit + names. + """ + sys_modules_saved = sys.modules + sys_modules_copy = sys.modules.copy() + try: + yield + finally: + sys.modules = sys_modules_saved + sys.modules.clear() + sys.modules.update(sys_modules_copy) + + +@contextlib.contextmanager +def namespace_tree_context(**kwargs): + """ + Save import state and sys.modules cache and restore it on exit. + Typical usage: + + >>> with namespace_tree_context(path=['/tmp/xxyy/portion1', + ... '/tmp/xxyy/portion2']): + ... pass + """ + # use default meta_path and path_hooks unless specified otherwise + kwargs.setdefault('meta_path', sys.meta_path) + kwargs.setdefault('path_hooks', sys.path_hooks) + import_context = util.import_state(**kwargs) + with import_context, sys_modules_context(): + yield + +class NamespacePackageTest(unittest.TestCase): + """ + Subclasses should define self.root and self.paths (under that root) + to be added to sys.path. + """ + root = os.path.join(os.path.dirname(__file__), 'namespace_pkgs') + + def setUp(self): + self.resolved_paths = [ + os.path.join(self.root, path) for path in self.paths + ] + self.enterContext(namespace_tree_context(path=self.resolved_paths)) + + +class SingleNamespacePackage(NamespacePackageTest): + paths = ['portion1'] + + def test_simple_package(self): + import foo.one + self.assertEqual(foo.one.attr, 'portion1 foo one') + + def test_cant_import_other(self): + with self.assertRaises(ImportError): + import foo.two + + def test_simple_repr(self): + import foo.one + self.assertStartsWith(repr(foo), "' + + def __getattr__(self, name): + if name == 'get_filename' and self.path is not None: + return self._get_filename + if name == 'is_package': + return self._is_package + raise AttributeError(name) + + def _get_filename(self, name): + return self.path + + def _is_package(self, name): + return self.package + + def create_module(self, spec): + return None + + +class NewLoader(TestLoader): + + EGGS = 1 + + def exec_module(self, module): + module.eggs = self.EGGS + + +class ModuleSpecTests: + + def setUp(self): + self.name = 'spam' + self.path = 'spam.py' + self.cached = self.util.cache_from_source(self.path) + self.loader = TestLoader() + self.spec = self.machinery.ModuleSpec(self.name, self.loader) + self.loc_spec = self.machinery.ModuleSpec(self.name, self.loader, + origin=self.path) + self.loc_spec._set_fileattr = True + + def test_default(self): + spec = self.machinery.ModuleSpec(self.name, self.loader) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.loader) + self.assertIs(spec.origin, None) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertIs(spec.cached, None) + self.assertFalse(spec.has_location) + + def test_default_no_loader(self): + spec = self.machinery.ModuleSpec(self.name, None) + + self.assertEqual(spec.name, self.name) + self.assertIs(spec.loader, None) + self.assertIs(spec.origin, None) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertIs(spec.cached, None) + self.assertFalse(spec.has_location) + + def test_default_is_package_false(self): + spec = self.machinery.ModuleSpec(self.name, self.loader, + is_package=False) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.loader) + self.assertIs(spec.origin, None) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertIs(spec.cached, None) + self.assertFalse(spec.has_location) + + def test_default_is_package_true(self): + spec = self.machinery.ModuleSpec(self.name, self.loader, + is_package=True) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.loader) + self.assertIs(spec.origin, None) + self.assertIs(spec.loader_state, None) + self.assertEqual(spec.submodule_search_locations, []) + self.assertIs(spec.cached, None) + self.assertFalse(spec.has_location) + + def test_has_location_setter(self): + spec = self.machinery.ModuleSpec(self.name, self.loader, + origin='somewhere') + self.assertFalse(spec.has_location) + spec.has_location = True + self.assertTrue(spec.has_location) + + def test_equality(self): + other = type(sys.implementation)(name=self.name, + loader=self.loader, + origin=None, + submodule_search_locations=None, + has_location=False, + cached=None, + ) + + self.assertTrue(self.spec == other) + + def test_equality_location(self): + other = type(sys.implementation)(name=self.name, + loader=self.loader, + origin=self.path, + submodule_search_locations=None, + has_location=True, + cached=self.cached, + ) + + self.assertEqual(self.loc_spec, other) + + def test_inequality(self): + other = type(sys.implementation)(name='ham', + loader=self.loader, + origin=None, + submodule_search_locations=None, + has_location=False, + cached=None, + ) + + self.assertNotEqual(self.spec, other) + + def test_inequality_incomplete(self): + other = type(sys.implementation)(name=self.name, + loader=self.loader, + ) + + self.assertNotEqual(self.spec, other) + + def test_package(self): + spec = self.machinery.ModuleSpec('spam.eggs', self.loader) + + self.assertEqual(spec.parent, 'spam') + + def test_package_is_package(self): + spec = self.machinery.ModuleSpec('spam.eggs', self.loader, + is_package=True) + + self.assertEqual(spec.parent, 'spam.eggs') + + # cached + + def test_cached_set(self): + before = self.spec.cached + self.spec.cached = 'there' + after = self.spec.cached + + self.assertIs(before, None) + self.assertEqual(after, 'there') + + def test_cached_no_origin(self): + spec = self.machinery.ModuleSpec(self.name, self.loader) + + self.assertIs(spec.cached, None) + + def test_cached_with_origin_not_location(self): + spec = self.machinery.ModuleSpec(self.name, self.loader, + origin=self.path) + + self.assertIs(spec.cached, None) + + def test_cached_source(self): + expected = self.util.cache_from_source(self.path) + + self.assertEqual(self.loc_spec.cached, expected) + + def test_cached_source_unknown_suffix(self): + self.loc_spec.origin = 'spam.spamspamspam' + + self.assertIs(self.loc_spec.cached, None) + + def test_cached_source_missing_cache_tag(self): + original = sys.implementation.cache_tag + sys.implementation.cache_tag = None + try: + cached = self.loc_spec.cached + finally: + sys.implementation.cache_tag = original + + self.assertIs(cached, None) + + def test_cached_sourceless(self): + self.loc_spec.origin = 'spam.pyc' + + self.assertEqual(self.loc_spec.cached, 'spam.pyc') + + +(Frozen_ModuleSpecTests, + Source_ModuleSpecTests + ) = test_util.test_both(ModuleSpecTests, util=util, machinery=machinery) + + +class ModuleSpecMethodsTests: + + @property + def bootstrap(self): + return self.init._bootstrap + + def setUp(self): + self.name = 'spam' + self.path = 'spam.py' + self.cached = self.util.cache_from_source(self.path) + self.loader = TestLoader() + self.spec = self.machinery.ModuleSpec(self.name, self.loader) + self.loc_spec = self.machinery.ModuleSpec(self.name, self.loader, + origin=self.path) + self.loc_spec._set_fileattr = True + + # exec() + + def test_exec(self): + self.spec.loader = NewLoader() + module = self.util.module_from_spec(self.spec) + sys.modules[self.name] = module + self.assertNotHasAttr(module, 'eggs') + self.bootstrap._exec(self.spec, module) + + self.assertEqual(module.eggs, 1) + + # load() + + def test_load(self): + self.spec.loader = NewLoader() + with CleanImport(self.spec.name): + loaded = self.bootstrap._load(self.spec) + installed = sys.modules[self.spec.name] + + self.assertEqual(loaded.eggs, 1) + self.assertIs(loaded, installed) + + def test_load_replaced(self): + replacement = object() + class ReplacingLoader(TestLoader): + def exec_module(self, module): + sys.modules[module.__name__] = replacement + self.spec.loader = ReplacingLoader() + with CleanImport(self.spec.name): + loaded = self.bootstrap._load(self.spec) + installed = sys.modules[self.spec.name] + + self.assertIs(loaded, replacement) + self.assertIs(installed, replacement) + + def test_load_failed(self): + class FailedLoader(TestLoader): + def exec_module(self, module): + raise RuntimeError + self.spec.loader = FailedLoader() + with CleanImport(self.spec.name): + with self.assertRaises(RuntimeError): + loaded = self.bootstrap._load(self.spec) + self.assertNotIn(self.spec.name, sys.modules) + + def test_load_failed_removed(self): + class FailedLoader(TestLoader): + def exec_module(self, module): + del sys.modules[module.__name__] + raise RuntimeError + self.spec.loader = FailedLoader() + with CleanImport(self.spec.name): + with self.assertRaises(RuntimeError): + loaded = self.bootstrap._load(self.spec) + self.assertNotIn(self.spec.name, sys.modules) + + def test_load_legacy_attributes_immutable(self): + module = object() + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ImportWarning) + class ImmutableLoader(TestLoader): + def load_module(self, name): + sys.modules[name] = module + return module + self.spec.loader = ImmutableLoader() + with CleanImport(self.spec.name): + loaded = self.bootstrap._load(self.spec) + + self.assertIs(sys.modules[self.spec.name], module) + + # reload() + + def test_reload(self): + self.spec.loader = NewLoader() + with CleanImport(self.spec.name): + loaded = self.bootstrap._load(self.spec) + reloaded = self.bootstrap._exec(self.spec, loaded) + installed = sys.modules[self.spec.name] + + self.assertEqual(loaded.eggs, 1) + self.assertIs(reloaded, loaded) + self.assertIs(installed, loaded) + + def test_reload_modified(self): + self.spec.loader = NewLoader() + with CleanImport(self.spec.name): + loaded = self.bootstrap._load(self.spec) + loaded.eggs = 2 + reloaded = self.bootstrap._exec(self.spec, loaded) + + self.assertEqual(loaded.eggs, 1) + self.assertIs(reloaded, loaded) + + def test_reload_extra_attributes(self): + self.spec.loader = NewLoader() + with CleanImport(self.spec.name): + loaded = self.bootstrap._load(self.spec) + loaded.available = False + reloaded = self.bootstrap._exec(self.spec, loaded) + + self.assertFalse(loaded.available) + self.assertIs(reloaded, loaded) + + def test_reload_init_module_attrs(self): + self.spec.loader = NewLoader() + with CleanImport(self.spec.name): + loaded = self.bootstrap._load(self.spec) + loaded.__name__ = 'ham' + del loaded.__loader__ + del loaded.__package__ + del loaded.__spec__ + self.bootstrap._exec(self.spec, loaded) + + self.assertEqual(loaded.__name__, self.spec.name) + self.assertIs(loaded.__loader__, self.spec.loader) + self.assertEqual(loaded.__package__, self.spec.parent) + self.assertIs(loaded.__spec__, self.spec) + self.assertNotHasAttr(loaded, '__path__') + self.assertNotHasAttr(loaded, '__file__') + self.assertNotHasAttr(loaded, '__cached__') + + +(Frozen_ModuleSpecMethodsTests, + Source_ModuleSpecMethodsTests + ) = test_util.test_both(ModuleSpecMethodsTests, init=init, util=util, + machinery=machinery) + + +class FactoryTests: + + def setUp(self): + self.name = 'spam' + self.path = os.path.abspath('spam.py') + self.cached = self.util.cache_from_source(self.path) + self.loader = TestLoader() + self.fileloader = TestLoader(self.path) + self.pkgloader = TestLoader(self.path, True) + + # spec_from_loader() + + def test_spec_from_loader_default(self): + spec = self.util.spec_from_loader(self.name, self.loader) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.loader) + self.assertIs(spec.origin, None) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertIs(spec.cached, None) + self.assertFalse(spec.has_location) + + def test_spec_from_loader_default_with_bad_is_package(self): + class Loader: + def is_package(self, name): + raise ImportError + loader = Loader() + spec = self.util.spec_from_loader(self.name, loader) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, loader) + self.assertIs(spec.origin, None) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertIs(spec.cached, None) + self.assertFalse(spec.has_location) + + def test_spec_from_loader_origin(self): + origin = 'somewhere over the rainbow' + spec = self.util.spec_from_loader(self.name, self.loader, + origin=origin) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.loader) + self.assertIs(spec.origin, origin) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertIs(spec.cached, None) + self.assertFalse(spec.has_location) + + def test_spec_from_loader_is_package_false(self): + spec = self.util.spec_from_loader(self.name, self.loader, + is_package=False) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.loader) + self.assertIs(spec.origin, None) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertIs(spec.cached, None) + self.assertFalse(spec.has_location) + + def test_spec_from_loader_is_package_true(self): + spec = self.util.spec_from_loader(self.name, self.loader, + is_package=True) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.loader) + self.assertIs(spec.origin, None) + self.assertIs(spec.loader_state, None) + self.assertEqual(spec.submodule_search_locations, []) + self.assertIs(spec.cached, None) + self.assertFalse(spec.has_location) + + def test_spec_from_loader_origin_and_is_package(self): + origin = 'where the streets have no name' + spec = self.util.spec_from_loader(self.name, self.loader, + origin=origin, is_package=True) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.loader) + self.assertIs(spec.origin, origin) + self.assertIs(spec.loader_state, None) + self.assertEqual(spec.submodule_search_locations, []) + self.assertIs(spec.cached, None) + self.assertFalse(spec.has_location) + + def test_spec_from_loader_is_package_with_loader_false(self): + loader = TestLoader(is_package=False) + spec = self.util.spec_from_loader(self.name, loader) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, loader) + self.assertIs(spec.origin, None) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertIs(spec.cached, None) + self.assertFalse(spec.has_location) + + def test_spec_from_loader_is_package_with_loader_true(self): + loader = TestLoader(is_package=True) + spec = self.util.spec_from_loader(self.name, loader) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, loader) + self.assertIs(spec.origin, None) + self.assertIs(spec.loader_state, None) + self.assertEqual(spec.submodule_search_locations, []) + self.assertIs(spec.cached, None) + self.assertFalse(spec.has_location) + + def test_spec_from_loader_default_with_file_loader(self): + spec = self.util.spec_from_loader(self.name, self.fileloader) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.fileloader) + self.assertEqual(spec.origin, self.path) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertEqual(spec.cached, self.cached) + self.assertTrue(spec.has_location) + + def test_spec_from_loader_is_package_false_with_fileloader(self): + spec = self.util.spec_from_loader(self.name, self.fileloader, + is_package=False) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.fileloader) + self.assertEqual(spec.origin, self.path) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertEqual(spec.cached, self.cached) + self.assertTrue(spec.has_location) + + def test_spec_from_loader_is_package_true_with_fileloader(self): + spec = self.util.spec_from_loader(self.name, self.fileloader, + is_package=True) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.fileloader) + self.assertEqual(spec.origin, self.path) + self.assertIs(spec.loader_state, None) + location = cwd if (cwd := os.getcwd()) != '/' else '' + self.assertEqual(spec.submodule_search_locations, [location]) + self.assertEqual(spec.cached, self.cached) + self.assertTrue(spec.has_location) + + # spec_from_file_location() + + def test_spec_from_file_location_default(self): + spec = self.util.spec_from_file_location(self.name, self.path) + + self.assertEqual(spec.name, self.name) + # Need to use a circuitous route to get at importlib.machinery to make + # sure the same class object is used in the isinstance() check as + # would have been used to create the loader. + SourceFileLoader = self.util.spec_from_file_location.__globals__['SourceFileLoader'] + self.assertIsInstance(spec.loader, SourceFileLoader) + self.assertEqual(spec.loader.name, self.name) + self.assertEqual(spec.loader.path, self.path) + self.assertEqual(spec.origin, self.path) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertEqual(spec.cached, self.cached) + self.assertTrue(spec.has_location) + + def test_spec_from_file_location_path_like_arg(self): + spec = self.util.spec_from_file_location(self.name, + pathlib.PurePath(self.path)) + self.assertEqual(spec.origin, self.path) + + def test_spec_from_file_location_default_without_location(self): + spec = self.util.spec_from_file_location(self.name) + + self.assertIs(spec, None) + + def test_spec_from_file_location_default_bad_suffix(self): + spec = self.util.spec_from_file_location(self.name, 'spam.eggs') + + self.assertIs(spec, None) + + def test_spec_from_file_location_loader_no_location(self): + spec = self.util.spec_from_file_location(self.name, + loader=self.fileloader) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.fileloader) + self.assertEqual(spec.origin, self.path) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertEqual(spec.cached, self.cached) + self.assertTrue(spec.has_location) + + def test_spec_from_file_location_loader_no_location_no_get_filename(self): + spec = self.util.spec_from_file_location(self.name, + loader=self.loader) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.loader) + self.assertEqual(spec.origin, '') + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertIs(spec.cached, None) + self.assertTrue(spec.has_location) + + def test_spec_from_file_location_loader_no_location_bad_get_filename(self): + class Loader: + def get_filename(self, name): + raise ImportError + loader = Loader() + spec = self.util.spec_from_file_location(self.name, loader=loader) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, loader) + self.assertEqual(spec.origin, '') + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertIs(spec.cached, None) + self.assertTrue(spec.has_location) + + def test_spec_from_file_location_smsl_none(self): + spec = self.util.spec_from_file_location(self.name, self.path, + loader=self.fileloader, + submodule_search_locations=None) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.fileloader) + self.assertEqual(spec.origin, self.path) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertEqual(spec.cached, self.cached) + self.assertTrue(spec.has_location) + + def test_spec_from_file_location_smsl_empty(self): + spec = self.util.spec_from_file_location(self.name, self.path, + loader=self.fileloader, + submodule_search_locations=[]) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.fileloader) + self.assertEqual(spec.origin, self.path) + self.assertIs(spec.loader_state, None) + location = cwd if (cwd := os.getcwd()) != '/' else '' + self.assertEqual(spec.submodule_search_locations, [location]) + self.assertEqual(spec.cached, self.cached) + self.assertTrue(spec.has_location) + + def test_spec_from_file_location_smsl_not_empty(self): + spec = self.util.spec_from_file_location(self.name, self.path, + loader=self.fileloader, + submodule_search_locations=['eggs']) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.fileloader) + self.assertEqual(spec.origin, self.path) + self.assertIs(spec.loader_state, None) + self.assertEqual(spec.submodule_search_locations, ['eggs']) + self.assertEqual(spec.cached, self.cached) + self.assertTrue(spec.has_location) + + def test_spec_from_file_location_smsl_default(self): + spec = self.util.spec_from_file_location(self.name, self.path, + loader=self.pkgloader) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.pkgloader) + self.assertEqual(spec.origin, self.path) + self.assertIs(spec.loader_state, None) + location = cwd if (cwd := os.getcwd()) != '/' else '' + self.assertEqual(spec.submodule_search_locations, [location]) + self.assertEqual(spec.cached, self.cached) + self.assertTrue(spec.has_location) + + def test_spec_from_file_location_smsl_default_not_package(self): + class Loader: + def is_package(self, name): + return False + loader = Loader() + spec = self.util.spec_from_file_location(self.name, self.path, + loader=loader) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, loader) + self.assertEqual(spec.origin, self.path) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertEqual(spec.cached, self.cached) + self.assertTrue(spec.has_location) + + def test_spec_from_file_location_smsl_default_no_is_package(self): + spec = self.util.spec_from_file_location(self.name, self.path, + loader=self.fileloader) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.fileloader) + self.assertEqual(spec.origin, self.path) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertEqual(spec.cached, self.cached) + self.assertTrue(spec.has_location) + + def test_spec_from_file_location_smsl_default_bad_is_package(self): + class Loader: + def is_package(self, name): + raise ImportError + loader = Loader() + spec = self.util.spec_from_file_location(self.name, self.path, + loader=loader) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, loader) + self.assertEqual(spec.origin, self.path) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertEqual(spec.cached, self.cached) + self.assertTrue(spec.has_location) + + def test_spec_from_file_location_relative_path(self): + spec = self.util.spec_from_file_location(self.name, + os.path.basename(self.path), loader=self.fileloader) + + self.assertEqual(spec.name, self.name) + self.assertEqual(spec.loader, self.fileloader) + self.assertEqual(spec.origin, self.path) + self.assertIs(spec.loader_state, None) + self.assertIs(spec.submodule_search_locations, None) + self.assertEqual(spec.cached, self.cached) + self.assertTrue(spec.has_location) + +(Frozen_FactoryTests, + Source_FactoryTests + ) = test_util.test_both(FactoryTests, util=util, machinery=machinery) + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/test_threaded_import.py b/stdlib/test/test_importlib/test_threaded_import.py new file mode 100644 index 000000000..8b793ebf2 --- /dev/null +++ b/stdlib/test/test_importlib/test_threaded_import.py @@ -0,0 +1,340 @@ +# This is a variant of the very old (early 90's) file +# Demo/threads/bug.py. It simply provokes a number of threads into +# trying to import the same module "at the same time". +# There are no pleasant failure modes -- most likely is that Python +# complains several times about module random having no attribute +# randrange, and then Python hangs. + +import _imp as imp +import os +import importlib +import sys +import time +import shutil +import threading +import unittest +from test import support +from test.support import verbose +from test.support.import_helper import forget, mock_register_at_fork +from test.support.os_helper import (TESTFN, unlink, rmtree) +from test.support import script_helper, threading_helper + +threading_helper.requires_working_threading(module=True) + +def task(N, done, done_tasks, errors): + try: + # We don't use modulefinder but still import it in order to stress + # importing of different modules from several threads. + if len(done_tasks) % 2: + import modulefinder + import random + else: + import random + import modulefinder + # This will fail if random is not completely initialized + x = random.randrange(1, 3) + except Exception as e: + errors.append(e.with_traceback(None)) + finally: + done_tasks.append(threading.get_ident()) + finished = len(done_tasks) == N + if finished: + done.set() + +# Create a circular import structure: A -> C -> B -> D -> A +# NOTE: `time` is already loaded and therefore doesn't threaten to deadlock. + +circular_imports_modules = { + 'A': """if 1: + import time + time.sleep(%(delay)s) + x = 'a' + import C + """, + 'B': """if 1: + import time + time.sleep(%(delay)s) + x = 'b' + import D + """, + 'C': """import B""", + 'D': """import A""", +} + +class Finder: + """A dummy finder to detect concurrent access to its find_spec() + method.""" + + def __init__(self): + self.numcalls = 0 + self.x = 0 + self.lock = threading.Lock() + + def find_spec(self, name, path=None, target=None): + # Simulate some thread-unsafe behaviour. If calls to find_spec() + # are properly serialized, `x` will end up the same as `numcalls`. + # Otherwise not. + assert imp.lock_held() + with self.lock: + self.numcalls += 1 + x = self.x + time.sleep(0.01) + self.x = x + 1 + +class FlushingFinder: + """A dummy finder which flushes sys.path_importer_cache when it gets + called.""" + + def find_spec(self, name, path=None, target=None): + sys.path_importer_cache.clear() + + +class ThreadedImportTests(unittest.TestCase): + + def setUp(self): + self.old_random = sys.modules.pop('random', None) + + def tearDown(self): + # If the `random` module was already initialized, we restore the + # old module at the end so that pickling tests don't fail. + # See http://bugs.python.org/issue3657#msg110461 + if self.old_random is not None: + sys.modules['random'] = self.old_random + + @mock_register_at_fork + def check_parallel_module_init(self, mock_os): + if imp.lock_held(): + # This triggers on, e.g., from test import autotest. + raise unittest.SkipTest("can't run when import lock is held") + + done = threading.Event() + for N in (20, 50) * 3: + if verbose: + print("Trying", N, "threads ...", end=' ') + # Make sure that random and modulefinder get reimported freshly + for modname in ['random', 'modulefinder']: + try: + del sys.modules[modname] + except KeyError: + pass + errors = [] + done_tasks = [] + done.clear() + t0 = time.monotonic() + with threading_helper.start_threads( + threading.Thread(target=task, args=(N, done, done_tasks, errors,)) + for i in range(N)): + pass + completed = done.wait(10 * 60) + dt = time.monotonic() - t0 + if verbose: + print("%.1f ms" % (dt*1e3), flush=True, end=" ") + dbg_info = 'done: %s/%s' % (len(done_tasks), N) + self.assertFalse(errors, dbg_info) + self.assertTrue(completed, dbg_info) + if verbose: + print("OK.") + + @support.bigmemtest(size=50, memuse=76*2**20, dry_run=False) + def test_parallel_module_init(self, size): + self.check_parallel_module_init() + + @support.bigmemtest(size=50, memuse=76*2**20, dry_run=False) + def test_parallel_meta_path(self, size): + finder = Finder() + sys.meta_path.insert(0, finder) + try: + self.check_parallel_module_init() + self.assertGreater(finder.numcalls, 0) + self.assertEqual(finder.x, finder.numcalls) + finally: + sys.meta_path.remove(finder) + + @support.bigmemtest(size=50, memuse=76*2**20, dry_run=False) + def test_parallel_path_hooks(self, size): + # Here the Finder instance is only used to check concurrent calls + # to path_hook(). + finder = Finder() + # In order for our path hook to be called at each import, we need + # to flush the path_importer_cache, which we do by registering a + # dedicated meta_path entry. + flushing_finder = FlushingFinder() + def path_hook(path): + finder.find_spec('') + raise ImportError + sys.path_hooks.insert(0, path_hook) + sys.meta_path.append(flushing_finder) + try: + # Flush the cache a first time + flushing_finder.find_spec('') + numtests = self.check_parallel_module_init() + self.assertGreater(finder.numcalls, 0) + self.assertEqual(finder.x, finder.numcalls) + finally: + sys.meta_path.remove(flushing_finder) + sys.path_hooks.remove(path_hook) + + def test_import_hangers(self): + # In case this test is run again, make sure the helper module + # gets loaded from scratch again. + try: + del sys.modules['test.test_importlib.threaded_import_hangers'] + except KeyError: + pass + import test.test_importlib.threaded_import_hangers + self.assertFalse(test.test_importlib.threaded_import_hangers.errors) + + def test_circular_imports(self): + # The goal of this test is to exercise implementations of the import + # lock which use a per-module lock, rather than a global lock. + # In these implementations, there is a possible deadlock with + # circular imports, for example: + # - thread 1 imports A (grabbing the lock for A) which imports B + # - thread 2 imports B (grabbing the lock for B) which imports A + # Such implementations should be able to detect such situations and + # resolve them one way or the other, without freezing. + # NOTE: our test constructs a slightly less trivial import cycle, + # in order to better stress the deadlock avoidance mechanism. + delay = 0.5 + os.mkdir(TESTFN) + self.addCleanup(shutil.rmtree, TESTFN) + sys.path.insert(0, TESTFN) + self.addCleanup(sys.path.remove, TESTFN) + for name, contents in circular_imports_modules.items(): + contents = contents % {'delay': delay} + with open(os.path.join(TESTFN, name + ".py"), "wb") as f: + f.write(contents.encode('utf-8')) + self.addCleanup(forget, name) + + importlib.invalidate_caches() + results = [] + def import_ab(): + import A + results.append(getattr(A, 'x', None)) + def import_ba(): + import B + results.append(getattr(B, 'x', None)) + t1 = threading.Thread(target=import_ab) + t2 = threading.Thread(target=import_ba) + t1.start() + t2.start() + t1.join() + t2.join() + self.assertEqual(set(results), {'a', 'b'}) + + @mock_register_at_fork + def test_side_effect_import(self, mock_os): + code = """if 1: + import threading + def target(): + import random + t = threading.Thread(target=target) + t.start() + t.join() + t = None""" + sys.path.insert(0, os.curdir) + self.addCleanup(sys.path.remove, os.curdir) + filename = TESTFN + ".py" + with open(filename, "wb") as f: + f.write(code.encode('utf-8')) + self.addCleanup(unlink, filename) + self.addCleanup(forget, TESTFN) + self.addCleanup(rmtree, '__pycache__') + importlib.invalidate_caches() + with threading_helper.wait_threads_exit(): + __import__(TESTFN) + del sys.modules[TESTFN] + + @support.bigmemtest(size=1, memuse=1.8*2**30, dry_run=False) + def test_concurrent_futures_circular_import(self, size): + # Regression test for bpo-43515 + fn = os.path.join(os.path.dirname(__file__), + 'partial', 'cfimport.py') + script_helper.assert_python_ok(fn) + + @support.bigmemtest(size=1, memuse=1.8*2**30, dry_run=False) + def test_multiprocessing_pool_circular_import(self, size): + # Regression test for bpo-41567 + fn = os.path.join(os.path.dirname(__file__), + 'partial', 'pool_in_threads.py') + script_helper.assert_python_ok(fn) + + def test_import_failure_race_condition(self): + # Regression test for race condition where a thread could receive + # a partially-initialized module when another thread's import fails. + # The race occurs when: + # 1. Thread 1 starts importing, adds module to sys.modules + # 2. Thread 2 sees the module in sys.modules + # 3. Thread 1's import fails, removes module from sys.modules + # 4. Thread 2 should NOT return the stale module reference + os.mkdir(TESTFN) + self.addCleanup(shutil.rmtree, TESTFN) + sys.path.insert(0, TESTFN) + self.addCleanup(sys.path.remove, TESTFN) + + # Create a module that partially initializes then fails + modname = 'failing_import_module' + with open(os.path.join(TESTFN, modname + '.py'), 'w') as f: + f.write(''' +import time +PARTIAL_ATTR = 'initialized' +time.sleep(0.05) # Widen race window +raise RuntimeError("Intentional import failure") +''') + self.addCleanup(forget, modname) + importlib.invalidate_caches() + + errors = [] + results = [] + + def do_import(delay=0): + time.sleep(delay) + try: + mod = __import__(modname) + # If we got a module, verify it's in sys.modules + if modname not in sys.modules: + errors.append( + f"Got module {mod!r} but {modname!r} not in sys.modules" + ) + elif sys.modules[modname] is not mod: + errors.append( + f"Got different module than sys.modules[{modname!r}]" + ) + else: + results.append(('success', mod)) + except RuntimeError: + results.append(('RuntimeError',)) + except Exception as e: + errors.append(f"Unexpected exception: {e}") + + # Run multiple iterations to increase chance of hitting the race + for _ in range(10): + errors.clear() + results.clear() + if modname in sys.modules: + del sys.modules[modname] + + t1 = threading.Thread(target=do_import, args=(0,)) + t2 = threading.Thread(target=do_import, args=(0.01,)) + t1.start() + t2.start() + t1.join() + t2.join() + + # Neither thread should have errors about stale modules + self.assertEqual(errors, [], f"Race condition detected: {errors}") + + +def setUpModule(): + thread_info = threading_helper.threading_setup() + unittest.addModuleCleanup(threading_helper.threading_cleanup, *thread_info) + try: + old_switchinterval = sys.getswitchinterval() + unittest.addModuleCleanup(sys.setswitchinterval, old_switchinterval) + support.setswitchinterval(1e-5) + except AttributeError: + pass + + +if __name__ == "__main__": + unittest.main() diff --git a/stdlib/test/test_importlib/test_util.py b/stdlib/test/test_importlib/test_util.py new file mode 100644 index 000000000..8c14b9627 --- /dev/null +++ b/stdlib/test/test_importlib/test_util.py @@ -0,0 +1,861 @@ +from test.test_importlib import util + +abc = util.import_importlib('importlib.abc') +init = util.import_importlib('importlib') +machinery = util.import_importlib('importlib.machinery') +importlib_util = util.import_importlib('importlib.util') + +import importlib.util +from importlib import _bootstrap_external +import os +import pathlib +import string +import sys +from test import support +from test.support import os_helper +import textwrap +import types +import unittest +import unittest.mock +import warnings + +try: + import _testsinglephase +except ImportError: + _testsinglephase = None +try: + import _testmultiphase +except ImportError: + _testmultiphase = None +try: + import _interpreters +except ModuleNotFoundError: + _interpreters = None + + +class DecodeSourceBytesTests: + + source = "string ='ü'" + + def test_ut8_default(self): + source_bytes = self.source.encode('utf-8') + self.assertEqual(self.util.decode_source(source_bytes), self.source) + + def test_specified_encoding(self): + source = '# coding=latin-1\n' + self.source + source_bytes = source.encode('latin-1') + assert source_bytes != source.encode('utf-8') + self.assertEqual(self.util.decode_source(source_bytes), source) + + def test_universal_newlines(self): + source = '\r\n'.join([self.source, self.source]) + source_bytes = source.encode('utf-8') + self.assertEqual(self.util.decode_source(source_bytes), + '\n'.join([self.source, self.source])) + + +(Frozen_DecodeSourceBytesTests, + Source_DecodeSourceBytesTests + ) = util.test_both(DecodeSourceBytesTests, util=importlib_util) + + +class ModuleFromSpecTests: + + def test_no_create_module(self): + class Loader: + def exec_module(self, module): + pass + spec = self.machinery.ModuleSpec('test', Loader()) + with self.assertRaises(ImportError): + module = self.util.module_from_spec(spec) + + def test_create_module_returns_None(self): + class Loader(self.abc.Loader): + def create_module(self, spec): + return None + spec = self.machinery.ModuleSpec('test', Loader()) + module = self.util.module_from_spec(spec) + self.assertIsInstance(module, types.ModuleType) + self.assertEqual(module.__name__, spec.name) + + def test_create_module(self): + name = 'already set' + class CustomModule(types.ModuleType): + pass + class Loader(self.abc.Loader): + def create_module(self, spec): + module = CustomModule(spec.name) + module.__name__ = name + return module + spec = self.machinery.ModuleSpec('test', Loader()) + module = self.util.module_from_spec(spec) + self.assertIsInstance(module, CustomModule) + self.assertEqual(module.__name__, name) + + def test___name__(self): + spec = self.machinery.ModuleSpec('test', object()) + module = self.util.module_from_spec(spec) + self.assertEqual(module.__name__, spec.name) + + def test___spec__(self): + spec = self.machinery.ModuleSpec('test', object()) + module = self.util.module_from_spec(spec) + self.assertEqual(module.__spec__, spec) + + def test___loader__(self): + loader = object() + spec = self.machinery.ModuleSpec('test', loader) + module = self.util.module_from_spec(spec) + self.assertIs(module.__loader__, loader) + + def test___package__(self): + spec = self.machinery.ModuleSpec('test.pkg', object()) + module = self.util.module_from_spec(spec) + self.assertEqual(module.__package__, spec.parent) + + def test___path__(self): + spec = self.machinery.ModuleSpec('test', object(), is_package=True) + module = self.util.module_from_spec(spec) + self.assertEqual(module.__path__, spec.submodule_search_locations) + + def test___file__(self): + spec = self.machinery.ModuleSpec('test', object(), origin='some/path') + spec.has_location = True + module = self.util.module_from_spec(spec) + self.assertEqual(module.__file__, spec.origin) + + def test___cached__(self): + spec = self.machinery.ModuleSpec('test', object()) + spec.cached = 'some/path' + spec.has_location = True + module = self.util.module_from_spec(spec) + self.assertEqual(module.__cached__, spec.cached) + +(Frozen_ModuleFromSpecTests, + Source_ModuleFromSpecTests +) = util.test_both(ModuleFromSpecTests, abc=abc, machinery=machinery, + util=importlib_util) + + +class ResolveNameTests: + + """Tests importlib.util.resolve_name().""" + + def test_absolute(self): + # bacon + self.assertEqual('bacon', self.util.resolve_name('bacon', None)) + + def test_absolute_within_package(self): + # bacon in spam + self.assertEqual('bacon', self.util.resolve_name('bacon', 'spam')) + + def test_no_package(self): + # .bacon in '' + with self.assertRaises(ImportError): + self.util.resolve_name('.bacon', '') + + def test_in_package(self): + # .bacon in spam + self.assertEqual('spam.eggs.bacon', + self.util.resolve_name('.bacon', 'spam.eggs')) + + def test_other_package(self): + # ..bacon in spam.bacon + self.assertEqual('spam.bacon', + self.util.resolve_name('..bacon', 'spam.eggs')) + + def test_escape(self): + # ..bacon in spam + with self.assertRaises(ImportError): + self.util.resolve_name('..bacon', 'spam') + + +(Frozen_ResolveNameTests, + Source_ResolveNameTests + ) = util.test_both(ResolveNameTests, util=importlib_util) + + +class FindSpecTests: + + class FakeMetaFinder: + @staticmethod + def find_spec(name, path=None, target=None): return name, path, target + + def test_sys_modules(self): + name = 'some_mod' + with util.uncache(name): + module = types.ModuleType(name) + loader = 'a loader!' + spec = self.machinery.ModuleSpec(name, loader) + module.__loader__ = loader + module.__spec__ = spec + sys.modules[name] = module + found = self.util.find_spec(name) + self.assertEqual(found, spec) + + def test_sys_modules_without___loader__(self): + name = 'some_mod' + with util.uncache(name): + module = types.ModuleType(name) + del module.__loader__ + loader = 'a loader!' + spec = self.machinery.ModuleSpec(name, loader) + module.__spec__ = spec + sys.modules[name] = module + found = self.util.find_spec(name) + self.assertEqual(found, spec) + + def test_sys_modules_spec_is_None(self): + name = 'some_mod' + with util.uncache(name): + module = types.ModuleType(name) + module.__spec__ = None + sys.modules[name] = module + with self.assertRaises(ValueError): + self.util.find_spec(name) + + def test_sys_modules_loader_is_None(self): + name = 'some_mod' + with util.uncache(name): + module = types.ModuleType(name) + spec = self.machinery.ModuleSpec(name, None) + module.__spec__ = spec + sys.modules[name] = module + found = self.util.find_spec(name) + self.assertEqual(found, spec) + + def test_sys_modules_spec_is_not_set(self): + name = 'some_mod' + with util.uncache(name): + module = types.ModuleType(name) + try: + del module.__spec__ + except AttributeError: + pass + sys.modules[name] = module + with self.assertRaises(ValueError): + self.util.find_spec(name) + + def test_success(self): + name = 'some_mod' + with util.uncache(name): + with util.import_state(meta_path=[self.FakeMetaFinder]): + self.assertEqual((name, None, None), + self.util.find_spec(name)) + + def test_nothing(self): + # None is returned upon failure to find a loader. + self.assertIsNone(self.util.find_spec('nevergoingtofindthismodule')) + + def test_find_submodule(self): + name = 'spam' + subname = 'ham' + with util.temp_module(name, pkg=True) as pkg_dir: + fullname, _ = util.submodule(name, subname, pkg_dir) + spec = self.util.find_spec(fullname) + self.assertIsNot(spec, None) + self.assertIn(name, sorted(sys.modules)) + self.assertNotIn(fullname, sorted(sys.modules)) + # Ensure successive calls behave the same. + spec_again = self.util.find_spec(fullname) + self.assertEqual(spec_again, spec) + + def test_find_submodule_parent_already_imported(self): + name = 'spam' + subname = 'ham' + with util.temp_module(name, pkg=True) as pkg_dir: + self.init.import_module(name) + fullname, _ = util.submodule(name, subname, pkg_dir) + spec = self.util.find_spec(fullname) + self.assertIsNot(spec, None) + self.assertIn(name, sorted(sys.modules)) + self.assertNotIn(fullname, sorted(sys.modules)) + # Ensure successive calls behave the same. + spec_again = self.util.find_spec(fullname) + self.assertEqual(spec_again, spec) + + def test_find_relative_module(self): + name = 'spam' + subname = 'ham' + with util.temp_module(name, pkg=True) as pkg_dir: + fullname, _ = util.submodule(name, subname, pkg_dir) + relname = '.' + subname + spec = self.util.find_spec(relname, name) + self.assertIsNot(spec, None) + self.assertIn(name, sorted(sys.modules)) + self.assertNotIn(fullname, sorted(sys.modules)) + # Ensure successive calls behave the same. + spec_again = self.util.find_spec(fullname) + self.assertEqual(spec_again, spec) + + def test_find_relative_module_missing_package(self): + name = 'spam' + subname = 'ham' + with util.temp_module(name, pkg=True) as pkg_dir: + fullname, _ = util.submodule(name, subname, pkg_dir) + relname = '.' + subname + with self.assertRaises(ImportError): + self.util.find_spec(relname) + self.assertNotIn(name, sorted(sys.modules)) + self.assertNotIn(fullname, sorted(sys.modules)) + + def test_find_submodule_in_module(self): + # ModuleNotFoundError raised when a module is specified as + # a parent instead of a package. + with self.assertRaises(ModuleNotFoundError): + self.util.find_spec('module.name') + + +(Frozen_FindSpecTests, + Source_FindSpecTests + ) = util.test_both(FindSpecTests, init=init, util=importlib_util, + machinery=machinery) + + +class MagicNumberTests: + + def test_length(self): + # Should be 4 bytes. + self.assertEqual(len(self.util.MAGIC_NUMBER), 4) + + def test_incorporates_rn(self): + # The magic number uses \r\n to come out wrong when splitting on lines. + self.assertEndsWith(self.util.MAGIC_NUMBER, b'\r\n') + + +(Frozen_MagicNumberTests, + Source_MagicNumberTests + ) = util.test_both(MagicNumberTests, util=importlib_util) + + +class PEP3147Tests: + + """Tests of PEP 3147-related functions: cache_from_source and source_from_cache.""" + + tag = sys.implementation.cache_tag + + @unittest.skipIf(sys.implementation.cache_tag is None, + 'requires sys.implementation.cache_tag not be None') + def test_cache_from_source(self): + # Given the path to a .py file, return the path to its PEP 3147 + # defined .pyc file (i.e. under __pycache__). + path = os.path.join('foo', 'bar', 'baz', 'qux.py') + expect = os.path.join('foo', 'bar', 'baz', '__pycache__', + 'qux.{}.pyc'.format(self.tag)) + self.assertEqual(self.util.cache_from_source(path, optimization=''), + expect) + + def test_cache_from_source_no_cache_tag(self): + # No cache tag means NotImplementedError. + with support.swap_attr(sys.implementation, 'cache_tag', None): + with self.assertRaises(NotImplementedError): + self.util.cache_from_source('whatever.py') + + def test_cache_from_source_no_dot(self): + # Directory with a dot, filename without dot. + path = os.path.join('foo.bar', 'file') + expect = os.path.join('foo.bar', '__pycache__', + 'file{}.pyc'.format(self.tag)) + self.assertEqual(self.util.cache_from_source(path, optimization=''), + expect) + + def test_cache_from_source_debug_override(self): + # Given the path to a .py file, return the path to its PEP 3147/PEP 488 + # defined .pyc file (i.e. under __pycache__). + path = os.path.join('foo', 'bar', 'baz', 'qux.py') + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + self.assertEqual(self.util.cache_from_source(path, False), + self.util.cache_from_source(path, optimization=1)) + self.assertEqual(self.util.cache_from_source(path, True), + self.util.cache_from_source(path, optimization='')) + with warnings.catch_warnings(): + warnings.simplefilter('error') + with self.assertRaises(DeprecationWarning): + self.util.cache_from_source(path, False) + with self.assertRaises(DeprecationWarning): + self.util.cache_from_source(path, True) + + def test_cache_from_source_cwd(self): + path = 'foo.py' + expect = os.path.join('__pycache__', 'foo.{}.pyc'.format(self.tag)) + self.assertEqual(self.util.cache_from_source(path, optimization=''), + expect) + + def test_cache_from_source_override(self): + # When debug_override is not None, it can be any true-ish or false-ish + # value. + path = os.path.join('foo', 'bar', 'baz.py') + # However if the bool-ishness can't be determined, the exception + # propagates. + class Bearish: + def __bool__(self): raise RuntimeError + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + self.assertEqual(self.util.cache_from_source(path, []), + self.util.cache_from_source(path, optimization=1)) + self.assertEqual(self.util.cache_from_source(path, [17]), + self.util.cache_from_source(path, optimization='')) + with self.assertRaises(RuntimeError): + self.util.cache_from_source('/foo/bar/baz.py', Bearish()) + + + def test_cache_from_source_optimization_empty_string(self): + # Setting 'optimization' to '' leads to no optimization tag (PEP 488). + path = 'foo.py' + expect = os.path.join('__pycache__', 'foo.{}.pyc'.format(self.tag)) + self.assertEqual(self.util.cache_from_source(path, optimization=''), + expect) + + def test_cache_from_source_optimization_None(self): + # Setting 'optimization' to None uses the interpreter's optimization. + # (PEP 488) + path = 'foo.py' + optimization_level = sys.flags.optimize + almost_expect = os.path.join('__pycache__', 'foo.{}'.format(self.tag)) + if optimization_level == 0: + expect = almost_expect + '.pyc' + elif optimization_level <= 2: + expect = almost_expect + '.opt-{}.pyc'.format(optimization_level) + else: + msg = '{!r} is a non-standard optimization level'.format(optimization_level) + self.skipTest(msg) + self.assertEqual(self.util.cache_from_source(path, optimization=None), + expect) + + def test_cache_from_source_optimization_set(self): + # The 'optimization' parameter accepts anything that has a string repr + # that passes str.alnum(). + path = 'foo.py' + valid_characters = string.ascii_letters + string.digits + almost_expect = os.path.join('__pycache__', 'foo.{}'.format(self.tag)) + got = self.util.cache_from_source(path, optimization=valid_characters) + # Test all valid characters are accepted. + self.assertEqual(got, + almost_expect + '.opt-{}.pyc'.format(valid_characters)) + # str() should be called on argument. + self.assertEqual(self.util.cache_from_source(path, optimization=42), + almost_expect + '.opt-42.pyc') + # Invalid characters raise ValueError. + with self.assertRaises(ValueError): + self.util.cache_from_source(path, optimization='path/is/bad') + + def test_cache_from_source_debug_override_optimization_both_set(self): + # Can only set one of the optimization-related parameters. + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + with self.assertRaises(TypeError): + self.util.cache_from_source('foo.py', False, optimization='') + + @unittest.skipUnless(os.sep == '\\' and os.altsep == '/', + 'test meaningful only where os.altsep is defined') + def test_sep_altsep_and_sep_cache_from_source(self): + # Windows path and PEP 3147 where sep is right of altsep. + self.assertEqual( + self.util.cache_from_source('\\foo\\bar\\baz/qux.py', optimization=''), + '\\foo\\bar\\baz\\__pycache__\\qux.{}.pyc'.format(self.tag)) + + @unittest.skipIf(sys.implementation.cache_tag is None, + 'requires sys.implementation.cache_tag not be None') + def test_cache_from_source_path_like_arg(self): + path = pathlib.PurePath('foo', 'bar', 'baz', 'qux.py') + expect = os.path.join('foo', 'bar', 'baz', '__pycache__', + 'qux.{}.pyc'.format(self.tag)) + self.assertEqual(self.util.cache_from_source(path, optimization=''), + expect) + + @unittest.skipIf(sys.implementation.cache_tag is None, + 'requires sys.implementation.cache_tag to not be None') + def test_source_from_cache(self): + # Given the path to a PEP 3147 defined .pyc file, return the path to + # its source. This tests the good path. + path = os.path.join('foo', 'bar', 'baz', '__pycache__', + 'qux.{}.pyc'.format(self.tag)) + expect = os.path.join('foo', 'bar', 'baz', 'qux.py') + self.assertEqual(self.util.source_from_cache(path), expect) + + def test_source_from_cache_no_cache_tag(self): + # If sys.implementation.cache_tag is None, raise NotImplementedError. + path = os.path.join('blah', '__pycache__', 'whatever.pyc') + with support.swap_attr(sys.implementation, 'cache_tag', None): + with self.assertRaises(NotImplementedError): + self.util.source_from_cache(path) + + def test_source_from_cache_bad_path(self): + # When the path to a pyc file is not in PEP 3147 format, a ValueError + # is raised. + self.assertRaises( + ValueError, self.util.source_from_cache, '/foo/bar/bazqux.pyc') + + def test_source_from_cache_no_slash(self): + # No slashes at all in path -> ValueError + self.assertRaises( + ValueError, self.util.source_from_cache, 'foo.cpython-32.pyc') + + def test_source_from_cache_too_few_dots(self): + # Too few dots in final path component -> ValueError + self.assertRaises( + ValueError, self.util.source_from_cache, '__pycache__/foo.pyc') + + def test_source_from_cache_too_many_dots(self): + with self.assertRaises(ValueError): + self.util.source_from_cache( + '__pycache__/foo.cpython-32.opt-1.foo.pyc') + + def test_source_from_cache_not_opt(self): + # Non-`opt-` path component -> ValueError + self.assertRaises( + ValueError, self.util.source_from_cache, + '__pycache__/foo.cpython-32.foo.pyc') + + def test_source_from_cache_no__pycache__(self): + # Another problem with the path -> ValueError + self.assertRaises( + ValueError, self.util.source_from_cache, + '/foo/bar/foo.cpython-32.foo.pyc') + + def test_source_from_cache_optimized_bytecode(self): + # Optimized bytecode is not an issue. + path = os.path.join('__pycache__', 'foo.{}.opt-1.pyc'.format(self.tag)) + self.assertEqual(self.util.source_from_cache(path), 'foo.py') + + def test_source_from_cache_missing_optimization(self): + # An empty optimization level is a no-no. + path = os.path.join('__pycache__', 'foo.{}.opt-.pyc'.format(self.tag)) + with self.assertRaises(ValueError): + self.util.source_from_cache(path) + + @unittest.skipIf(sys.implementation.cache_tag is None, + 'requires sys.implementation.cache_tag to not be None') + def test_source_from_cache_path_like_arg(self): + path = pathlib.PurePath('foo', 'bar', 'baz', '__pycache__', + 'qux.{}.pyc'.format(self.tag)) + expect = os.path.join('foo', 'bar', 'baz', 'qux.py') + self.assertEqual(self.util.source_from_cache(path), expect) + + @unittest.skipIf(sys.implementation.cache_tag is None, + 'requires sys.implementation.cache_tag to not be None') + def test_cache_from_source_respects_pycache_prefix(self): + # If pycache_prefix is set, cache_from_source will return a bytecode + # path inside that directory (in a subdirectory mirroring the .py file's + # path) rather than in a __pycache__ dir next to the py file. + pycache_prefixes = [ + os.path.join(os.path.sep, 'tmp', 'bytecode'), + os.path.join(os.path.sep, 'tmp', '\u2603'), # non-ASCII in path! + os.path.join(os.path.sep, 'tmp', 'trailing-slash') + os.path.sep, + ] + drive = '' + if os.name == 'nt': + drive = 'C:' + pycache_prefixes = [ + f'{drive}{prefix}' for prefix in pycache_prefixes] + pycache_prefixes += [r'\\?\C:\foo', r'\\localhost\c$\bar'] + for pycache_prefix in pycache_prefixes: + with self.subTest(path=pycache_prefix): + path = drive + os.path.join( + os.path.sep, 'foo', 'bar', 'baz', 'qux.py') + expect = os.path.join( + pycache_prefix, 'foo', 'bar', 'baz', + 'qux.{}.pyc'.format(self.tag)) + with util.temporary_pycache_prefix(pycache_prefix): + self.assertEqual( + self.util.cache_from_source(path, optimization=''), + expect) + + @unittest.skipIf(sys.implementation.cache_tag is None, + 'requires sys.implementation.cache_tag to not be None') + def test_cache_from_source_respects_pycache_prefix_relative(self): + # If the .py path we are given is relative, we will resolve to an + # absolute path before prefixing with pycache_prefix, to avoid any + # possible ambiguity. + pycache_prefix = os.path.join(os.path.sep, 'tmp', 'bytecode') + path = os.path.join('foo', 'bar', 'baz', 'qux.py') + root = os.path.splitdrive(os.getcwd())[0] + os.path.sep + expect = os.path.join( + pycache_prefix, + os.path.relpath(os.getcwd(), root), + 'foo', 'bar', 'baz', f'qux.{self.tag}.pyc') + with util.temporary_pycache_prefix(pycache_prefix): + self.assertEqual( + self.util.cache_from_source(path, optimization=''), + os.path.normpath(expect)) + + @unittest.skipIf(sys.implementation.cache_tag is None, + 'requires sys.implementation.cache_tag to not be None') + def test_cache_from_source_in_root_with_pycache_prefix(self): + # Regression test for gh-82916 + pycache_prefix = os.path.join(os.path.sep, 'tmp', 'bytecode') + path = 'qux.py' + expect = os.path.join(os.path.sep, 'tmp', 'bytecode', + f'qux.{self.tag}.pyc') + with util.temporary_pycache_prefix(pycache_prefix): + with os_helper.change_cwd('/'): + self.assertEqual(self.util.cache_from_source(path), expect) + + @unittest.skipIf(sys.implementation.cache_tag is None, + 'requires sys.implementation.cache_tag to not be None') + def test_source_from_cache_inside_pycache_prefix(self): + # If pycache_prefix is set and the cache path we get is inside it, + # we return an absolute path to the py file based on the remainder of + # the path within pycache_prefix. + pycache_prefix = os.path.join(os.path.sep, 'tmp', 'bytecode') + path = os.path.join(pycache_prefix, 'foo', 'bar', 'baz', + f'qux.{self.tag}.pyc') + expect = os.path.join(os.path.sep, 'foo', 'bar', 'baz', 'qux.py') + with util.temporary_pycache_prefix(pycache_prefix): + self.assertEqual(self.util.source_from_cache(path), expect) + + @unittest.skipIf(sys.implementation.cache_tag is None, + 'requires sys.implementation.cache_tag to not be None') + def test_source_from_cache_outside_pycache_prefix(self): + # If pycache_prefix is set but the cache path we get is not inside + # it, just ignore it and handle the cache path according to the default + # behavior. + pycache_prefix = os.path.join(os.path.sep, 'tmp', 'bytecode') + path = os.path.join('foo', 'bar', 'baz', '__pycache__', + f'qux.{self.tag}.pyc') + expect = os.path.join('foo', 'bar', 'baz', 'qux.py') + with util.temporary_pycache_prefix(pycache_prefix): + self.assertEqual(self.util.source_from_cache(path), expect) + + +(Frozen_PEP3147Tests, + Source_PEP3147Tests + ) = util.test_both(PEP3147Tests, util=importlib_util) + + +class MagicNumberTests(unittest.TestCase): + """ + Test release compatibility issues relating to importlib + """ + @unittest.skipUnless( + sys.version_info.releaselevel in ('candidate', 'final'), + 'only applies to candidate or final python release levels' + ) + def test_magic_number(self): + # Each python minor release should generally have a MAGIC_NUMBER + # that does not change once the release reaches candidate status. + + # Once a release reaches candidate status, the value of the constant + # EXPECTED_MAGIC_NUMBER in this test should be changed. + # This test will then check that the actual MAGIC_NUMBER matches + # the expected value for the release. + + # In exceptional cases, it may be required to change the MAGIC_NUMBER + # for a maintenance release. In this case the change should be + # discussed in python-dev. If a change is required, community + # stakeholders such as OS package maintainers must be notified + # in advance. Such exceptional releases will then require an + # adjustment to this test case. + EXPECTED_MAGIC_NUMBER = 3627 + actual = int.from_bytes(importlib.util.MAGIC_NUMBER[:2], 'little') + + msg = ( + "To avoid breaking backwards compatibility with cached bytecode " + "files that can't be automatically regenerated by the current " + "user, candidate and final releases require the current " + "importlib.util.MAGIC_NUMBER to match the expected " + "magic number in this test. Set the expected " + "magic number in this test to the current MAGIC_NUMBER to " + "continue with the release.\n\n" + "Changing the MAGIC_NUMBER for a maintenance release " + "requires discussion in python-dev and notification of " + "community stakeholders." + ) + self.assertEqual(EXPECTED_MAGIC_NUMBER, actual, msg) + + +@unittest.skipIf(_interpreters is None, 'subinterpreters required') +class IncompatibleExtensionModuleRestrictionsTests(unittest.TestCase): + + def run_with_own_gil(self, script): + interpid = _interpreters.create('isolated') + def ensure_destroyed(): + try: + _interpreters.destroy(interpid) + except _interpreters.InterpreterNotFoundError: + pass + self.addCleanup(ensure_destroyed) + excsnap = _interpreters.exec(interpid, script) + if excsnap is not None: + if excsnap.type.__name__ == 'ImportError': + raise ImportError(excsnap.msg) + + def run_with_shared_gil(self, script): + interpid = _interpreters.create('legacy') + def ensure_destroyed(): + try: + _interpreters.destroy(interpid) + except _interpreters.InterpreterNotFoundError: + pass + self.addCleanup(ensure_destroyed) + excsnap = _interpreters.exec(interpid, script) + if excsnap is not None: + if excsnap.type.__name__ == 'ImportError': + raise ImportError(excsnap.msg) + + @unittest.skipIf(_testsinglephase is None, "test requires _testsinglephase module") + # gh-117649: single-phase init modules are not currently supported in + # subinterpreters in the free-threaded build + @support.expected_failure_if_gil_disabled() + def test_single_phase_init_module(self): + script = textwrap.dedent(''' + from importlib.util import _incompatible_extension_module_restrictions + with _incompatible_extension_module_restrictions(disable_check=True): + import _testsinglephase + ''') + with self.subTest('check disabled, shared GIL'): + self.run_with_shared_gil(script) + with self.subTest('check disabled, per-interpreter GIL'): + self.run_with_own_gil(script) + + script = textwrap.dedent(f''' + from importlib.util import _incompatible_extension_module_restrictions + with _incompatible_extension_module_restrictions(disable_check=False): + import _testsinglephase + ''') + with self.subTest('check enabled, shared GIL'): + with self.assertRaises(ImportError): + self.run_with_shared_gil(script) + with self.subTest('check enabled, per-interpreter GIL'): + with self.assertRaises(ImportError): + self.run_with_own_gil(script) + + @unittest.skipIf(_testmultiphase is None, "test requires _testmultiphase module") + @support.requires_gil_enabled("gh-117649: not supported in free-threaded build") + def test_incomplete_multi_phase_init_module(self): + # Apple extensions must be distributed as frameworks. This requires + # a specialist loader. + if support.is_apple_mobile: + loader = "AppleFrameworkLoader" + else: + loader = "ExtensionFileLoader" + + prescript = textwrap.dedent(f''' + from importlib.util import spec_from_loader, module_from_spec + from importlib.machinery import {loader} + + name = '_test_shared_gil_only' + filename = {_testmultiphase.__file__!r} + loader = {loader}(name, filename) + spec = spec_from_loader(name, loader) + + ''') + + script = prescript + textwrap.dedent(''' + from importlib.util import _incompatible_extension_module_restrictions + with _incompatible_extension_module_restrictions(disable_check=True): + module = module_from_spec(spec) + loader.exec_module(module) + ''') + with self.subTest('check disabled, shared GIL'): + self.run_with_shared_gil(script) + with self.subTest('check disabled, per-interpreter GIL'): + self.run_with_own_gil(script) + + script = prescript + textwrap.dedent(''' + from importlib.util import _incompatible_extension_module_restrictions + with _incompatible_extension_module_restrictions(disable_check=False): + module = module_from_spec(spec) + loader.exec_module(module) + ''') + with self.subTest('check enabled, shared GIL'): + self.run_with_shared_gil(script) + with self.subTest('check enabled, per-interpreter GIL'): + with self.assertRaises(ImportError): + self.run_with_own_gil(script) + + @unittest.skipIf(_testmultiphase is None, "test requires _testmultiphase module") + def test_complete_multi_phase_init_module(self): + script = textwrap.dedent(''' + from importlib.util import _incompatible_extension_module_restrictions + with _incompatible_extension_module_restrictions(disable_check=True): + import _testmultiphase + ''') + with self.subTest('check disabled, shared GIL'): + self.run_with_shared_gil(script) + with self.subTest('check disabled, per-interpreter GIL'): + self.run_with_own_gil(script) + + script = textwrap.dedent(f''' + from importlib.util import _incompatible_extension_module_restrictions + with _incompatible_extension_module_restrictions(disable_check=False): + import _testmultiphase + ''') + with self.subTest('check enabled, shared GIL'): + self.run_with_shared_gil(script) + with self.subTest('check enabled, per-interpreter GIL'): + self.run_with_own_gil(script) + + +class PatchAtomicWrites: + def __init__(self, truncate_at_length, never_complete=False): + self.truncate_at_length = truncate_at_length + self.never_complete = never_complete + self.seen_write = False + self._children = [] + + def __enter__(self): + import _pyio + + oldwrite = os.write + + # Emulate an os.write that only writes partial data. + def write(fd, data): + if self.seen_write and self.never_complete: + return None + self.seen_write = True + return oldwrite(fd, data[:self.truncate_at_length]) + + # Need to patch _io to be _pyio, so that io.FileIO is affected by the + # os.write patch. + self.children = [ + support.swap_attr(_bootstrap_external, '_io', _pyio), + support.swap_attr(os, 'write', write) + ] + for child in self.children: + child.__enter__() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + for child in self.children: + child.__exit__(exc_type, exc_val, exc_tb) + + +class MiscTests(unittest.TestCase): + + def test_atomic_write_retries_incomplete_writes(self): + truncate_at_length = 100 + length = truncate_at_length * 2 + + with PatchAtomicWrites(truncate_at_length=truncate_at_length) as cm: + # Make sure we write something longer than the point where we + # truncate. + content = b'x' * length + _bootstrap_external._write_atomic(os_helper.TESTFN, content) + self.assertTrue(cm.seen_write) + + self.assertEqual(os.stat(support.os_helper.TESTFN).st_size, length) + os.unlink(support.os_helper.TESTFN) + + def test_atomic_write_errors_if_unable_to_complete(self): + truncate_at_length = 100 + + with ( + PatchAtomicWrites( + truncate_at_length=truncate_at_length, never_complete=True, + ) as cm, + self.assertRaises(OSError) + ): + # Make sure we write something longer than the point where we + # truncate. + content = b'x' * (truncate_at_length * 2) + _bootstrap_external._write_atomic(os_helper.TESTFN, content) + self.assertTrue(cm.seen_write) + + with self.assertRaises(OSError): + os.stat(support.os_helper.TESTFN) # Check that the file did not get written. + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/test_windows.py b/stdlib/test/test_importlib/test_windows.py new file mode 100644 index 000000000..bef4fb46f --- /dev/null +++ b/stdlib/test/test_importlib/test_windows.py @@ -0,0 +1,210 @@ +from test.test_importlib import util as test_util +machinery = test_util.import_importlib('importlib.machinery') + +import os +import re +import sys +import unittest +from test import support +from test.support import import_helper +from contextlib import contextmanager +from test.test_importlib.util import temp_module + +import_helper.import_module('winreg', required_on=['win']) +from winreg import ( + CreateKey, HKEY_CURRENT_USER, + SetValue, REG_SZ, KEY_ALL_ACCESS, + EnumKey, CloseKey, DeleteKey, OpenKey +) + +def get_platform(): + # Port of distutils.util.get_platform(). + TARGET_TO_PLAT = { + 'x86' : 'win32', + 'x64' : 'win-amd64', + 'arm' : 'win-arm32', + } + if ('VSCMD_ARG_TGT_ARCH' in os.environ and + os.environ['VSCMD_ARG_TGT_ARCH'] in TARGET_TO_PLAT): + return TARGET_TO_PLAT[os.environ['VSCMD_ARG_TGT_ARCH']] + elif 'amd64' in sys.version.lower(): + return 'win-amd64' + elif '(arm)' in sys.version.lower(): + return 'win-arm32' + elif '(arm64)' in sys.version.lower(): + return 'win-arm64' + else: + return sys.platform + +def delete_registry_tree(root, subkey): + try: + hkey = OpenKey(root, subkey, access=KEY_ALL_ACCESS) + except OSError: + # subkey does not exist + return + while True: + try: + subsubkey = EnumKey(hkey, 0) + except OSError: + # no more subkeys + break + delete_registry_tree(hkey, subsubkey) + CloseKey(hkey) + DeleteKey(root, subkey) + +@contextmanager +def setup_module(machinery, name, path=None): + if machinery.WindowsRegistryFinder.DEBUG_BUILD: + root = machinery.WindowsRegistryFinder.REGISTRY_KEY_DEBUG + else: + root = machinery.WindowsRegistryFinder.REGISTRY_KEY + key = root.format(fullname=name, + sys_version='%d.%d' % sys.version_info[:2]) + base_key = "Software\\Python\\PythonCore\\{}.{}".format( + sys.version_info.major, sys.version_info.minor) + assert key.casefold().startswith(base_key.casefold()), ( + "expected key '{}' to start with '{}'".format(key, base_key)) + try: + with temp_module(name, "a = 1") as location: + try: + OpenKey(HKEY_CURRENT_USER, base_key) + if machinery.WindowsRegistryFinder.DEBUG_BUILD: + delete_key = os.path.dirname(key) + else: + delete_key = key + except OSError: + delete_key = base_key + subkey = CreateKey(HKEY_CURRENT_USER, key) + if path is None: + path = location + ".py" + SetValue(subkey, "", REG_SZ, path) + yield + finally: + if delete_key: + delete_registry_tree(HKEY_CURRENT_USER, delete_key) + + +@unittest.skipUnless(sys.platform.startswith('win'), 'requires Windows') +class WindowsRegistryFinderTests: + # The module name is process-specific, allowing for + # simultaneous runs of the same test on a single machine. + test_module = "spamham{}".format(os.getpid()) + + def test_find_spec_missing(self): + with self.assertWarnsRegex( + DeprecationWarning, + r"importlib\.machinery\.WindowsRegistryFinder is deprecated; " + r"use site configuration instead\. Future versions of Python may " + r"not enable this finder by default\." + ): + spec = self.machinery.WindowsRegistryFinder.find_spec('spam') + self.assertIsNone(spec) + + def test_module_found(self): + with setup_module(self.machinery, self.test_module): + with self.assertWarnsRegex( + DeprecationWarning, + r"importlib\.machinery\.WindowsRegistryFinder is deprecated; " + r"use site configuration instead\. Future versions of Python may " + r"not enable this finder by default\." + ): + spec = self.machinery.WindowsRegistryFinder.find_spec(self.test_module) + self.assertIsNotNone(spec) + + def test_module_not_found(self): + with setup_module(self.machinery, self.test_module, path="."): + with self.assertWarnsRegex( + DeprecationWarning, + r"importlib\.machinery\.WindowsRegistryFinder is deprecated; " + r"use site configuration instead\. Future versions of Python may " + r"not enable this finder by default\." + ): + spec = self.machinery.WindowsRegistryFinder.find_spec(self.test_module) + self.assertIsNone(spec) + + def test_raises_deprecation_warning(self): + # WindowsRegistryFinder is not meant to be instantiated, so the + # deprecation warning is raised in the 'find_spec' method instead. + with self.assertWarnsRegex( + DeprecationWarning, + r"importlib\.machinery\.WindowsRegistryFinder is deprecated; " + r"use site configuration instead\. Future versions of Python may " + r"not enable this finder by default\." + ): + self.machinery.WindowsRegistryFinder.find_spec('spam') + +(Frozen_WindowsRegistryFinderTests, + Source_WindowsRegistryFinderTests + ) = test_util.test_both(WindowsRegistryFinderTests, machinery=machinery) + +@unittest.skipUnless(sys.platform.startswith('win'), 'requires Windows') +class WindowsExtensionSuffixTests: + def test_tagged_suffix(self): + suffixes = self.machinery.EXTENSION_SUFFIXES + abi_flags = "t" if support.Py_GIL_DISABLED else "" + ver = sys.version_info + platform = re.sub('[^a-zA-Z0-9]', '_', get_platform()) + expected_tag = f".cp{ver.major}{ver.minor}{abi_flags}-{platform}.pyd" + try: + untagged_i = suffixes.index(".pyd") + except ValueError: + untagged_i = suffixes.index("_d.pyd") + expected_tag = "_d" + expected_tag + + self.assertIn(expected_tag, suffixes) + + # Ensure the tags are in the correct order. + tagged_i = suffixes.index(expected_tag) + self.assertLess(tagged_i, untagged_i) + +(Frozen_WindowsExtensionSuffixTests, + Source_WindowsExtensionSuffixTests + ) = test_util.test_both(WindowsExtensionSuffixTests, machinery=machinery) + + +@unittest.skipUnless(sys.platform.startswith('win'), 'requires Windows') +class WindowsBootstrapPathTests(unittest.TestCase): + def check_join(self, expected, *inputs): + from importlib._bootstrap_external import _path_join + actual = _path_join(*inputs) + if expected.casefold() == actual.casefold(): + return + self.assertEqual(expected, actual) + + def test_path_join(self): + self.check_join(r"C:\A\B", "C:\\", "A", "B") + self.check_join(r"C:\A\B", "D:\\", "D", "C:\\", "A", "B") + self.check_join(r"C:\A\B", "C:\\", "A", "C:B") + self.check_join(r"C:\A\B", "C:\\", "A\\B") + self.check_join(r"C:\A\B", r"C:\A\B") + + self.check_join("D:A", r"D:", "A") + self.check_join("D:A", r"C:\B\C", "D:", "A") + self.check_join("D:A", r"C:\B\C", r"D:A") + + self.check_join(r"A\B\C", "A", "B", "C") + self.check_join(r"A\B\C", "A", r"B\C") + self.check_join(r"A\B/C", "A", "B/C") + self.check_join(r"A\B\C", "A/", "B\\", "C") + + # Dots are not normalised by this function + self.check_join(r"A\../C", "A", "../C") + self.check_join(r"A.\.\B", "A.", ".", "B") + + self.check_join(r"\\Server\Share\A\B\C", r"\\Server\Share", "A", "B", "C") + self.check_join(r"\\Server\Share\A\B\C", r"\\Server\Share", "D", r"\A", "B", "C") + self.check_join(r"\\Server\Share\A\B\C", r"\\Server2\Share2", "D", + r"\\Server\Share", "A", "B", "C") + self.check_join(r"\\Server\Share\A\B\C", r"\\Server", r"\Share", "A", "B", "C") + self.check_join(r"\\Server\Share", r"\\Server\Share") + self.check_join(r"\\Server\Share\\", r"\\Server\Share\\") + + # Handle edge cases with empty segments + self.check_join("C:\\A", "C:/A", "") + self.check_join("C:\\", "C:/", "") + self.check_join("C:", "C:", "") + self.check_join("//Server/Share\\", "//Server/Share/", "") + self.check_join("//Server/Share\\", "//Server/Share", "") + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_importlib/threaded_import_hangers.py b/stdlib/test/test_importlib/threaded_import_hangers.py new file mode 100644 index 000000000..5484e60a0 --- /dev/null +++ b/stdlib/test/test_importlib/threaded_import_hangers.py @@ -0,0 +1,45 @@ +# This is a helper module for test_threaded_import. The test imports this +# module, and this module tries to run various Python library functions in +# their own thread, as a side effect of being imported. If the spawned +# thread doesn't complete in TIMEOUT seconds, an "appeared to hang" message +# is appended to the module-global `errors` list. That list remains empty +# if (and only if) all functions tested complete. + +TIMEOUT = 10 + +import threading + +import tempfile +import os.path + +errors = [] + +# This class merely runs a function in its own thread T. The thread importing +# this module holds the import lock, so if the function called by T tries +# to do its own imports it will block waiting for this module's import +# to complete. +class Worker(threading.Thread): + def __init__(self, function, args): + threading.Thread.__init__(self) + self.function = function + self.args = args + + def run(self): + self.function(*self.args) + +for name, func, args in [ + # Bug 147376: TemporaryFile hung on Windows, starting in Python 2.4. + ("tempfile.TemporaryFile", lambda: tempfile.TemporaryFile().close(), ()), + + # The real cause for bug 147376: ntpath.abspath() caused the hang. + ("os.path.abspath", os.path.abspath, ('.',)), + ]: + + try: + t = Worker(func, args) + t.start() + t.join(TIMEOUT) + if t.is_alive(): + errors.append("%s appeared to hang" % name) + finally: + del t diff --git a/stdlib/test/test_importlib/util.py b/stdlib/test/test_importlib/util.py new file mode 100644 index 000000000..bd64b03b7 --- /dev/null +++ b/stdlib/test/test_importlib/util.py @@ -0,0 +1,402 @@ +import builtins +import contextlib +import errno +import functools +from importlib import machinery, util, invalidate_caches +import marshal +import os +import os.path +from test import support +from test.support import import_helper +from test.support import is_apple_mobile +from test.support import os_helper +import unittest +import sys +import tempfile +import types + +import_helper.import_module("_testmultiphase") + + +BUILTINS = types.SimpleNamespace() +BUILTINS.good_name = None +BUILTINS.bad_name = None +if 'errno' in sys.builtin_module_names: + BUILTINS.good_name = 'errno' +if 'importlib' not in sys.builtin_module_names: + BUILTINS.bad_name = 'importlib' + +if support.is_wasi: + # dlopen() is a shim for WASI as of WASI SDK which fails by default. + # We don't provide an implementation, so tests will fail. + # But we also don't want to turn off dynamic loading for those that provide + # a working implementation. + def _extension_details(): + global EXTENSIONS + EXTENSIONS = None +else: + EXTENSIONS = types.SimpleNamespace() + EXTENSIONS.path = None + EXTENSIONS.ext = None + EXTENSIONS.filename = None + EXTENSIONS.file_path = None + EXTENSIONS.name = '_testsinglephase' + + def _extension_details(): + global EXTENSIONS + for path in sys.path: + for ext in machinery.EXTENSION_SUFFIXES: + # Apple mobile platforms mechanically load .so files, + # but the findable files are labelled .fwork + if is_apple_mobile: + ext = ext.replace(".so", ".fwork") + + filename = EXTENSIONS.name + ext + file_path = os.path.join(path, filename) + if os.path.exists(file_path): + EXTENSIONS.path = path + EXTENSIONS.ext = ext + EXTENSIONS.filename = filename + EXTENSIONS.file_path = file_path + return + +_extension_details() + + +def import_importlib(module_name): + """Import a module from importlib both w/ and w/o _frozen_importlib.""" + fresh = ('importlib',) if '.' in module_name else () + frozen = import_helper.import_fresh_module(module_name) + source = import_helper.import_fresh_module(module_name, fresh=fresh, + blocked=('_frozen_importlib', '_frozen_importlib_external')) + return {'Frozen': frozen, 'Source': source} + + +def specialize_class(cls, kind, base=None, **kwargs): + # XXX Support passing in submodule names--load (and cache) them? + # That would clean up the test modules a bit more. + if base is None: + base = unittest.TestCase + elif not isinstance(base, type): + base = base[kind] + name = '{}_{}'.format(kind, cls.__name__) + bases = (cls, base) + specialized = types.new_class(name, bases) + specialized.__module__ = cls.__module__ + specialized._NAME = cls.__name__ + specialized._KIND = kind + for attr, values in kwargs.items(): + value = values[kind] + setattr(specialized, attr, value) + return specialized + + +def split_frozen(cls, base=None, **kwargs): + frozen = specialize_class(cls, 'Frozen', base, **kwargs) + source = specialize_class(cls, 'Source', base, **kwargs) + return frozen, source + + +def test_both(test_class, base=None, **kwargs): + return split_frozen(test_class, base, **kwargs) + + +CASE_INSENSITIVE_FS = True +# Windows is the only OS that is *always* case-insensitive +# (OS X *can* be case-sensitive). +if sys.platform not in ('win32', 'cygwin'): + changed_name = __file__.upper() + if changed_name == __file__: + changed_name = __file__.lower() + if not os.path.exists(changed_name): + CASE_INSENSITIVE_FS = False + +source_importlib = import_importlib('importlib')['Source'] +__import__ = {'Frozen': staticmethod(builtins.__import__), + 'Source': staticmethod(source_importlib.__import__)} + + +def case_insensitive_tests(test): + """Class decorator that nullifies tests requiring a case-insensitive + file system.""" + return unittest.skipIf(not CASE_INSENSITIVE_FS, + "requires a case-insensitive filesystem")(test) + + +def submodule(parent, name, pkg_dir, content=''): + path = os.path.join(pkg_dir, name + '.py') + with open(path, 'w', encoding='utf-8') as subfile: + subfile.write(content) + return '{}.{}'.format(parent, name), path + + +def get_code_from_pyc(pyc_path): + """Reads a pyc file and returns the unmarshalled code object within. + + No header validation is performed. + """ + with open(pyc_path, 'rb') as pyc_f: + pyc_f.seek(16) + return marshal.load(pyc_f) + + +@contextlib.contextmanager +def uncache(*names): + """Uncache a module from sys.modules. + + A basic sanity check is performed to prevent uncaching modules that either + cannot/shouldn't be uncached. + + """ + for name in names: + if name in ('sys', 'marshal'): + raise ValueError("cannot uncache {}".format(name)) + try: + del sys.modules[name] + except KeyError: + pass + try: + yield + finally: + for name in names: + try: + del sys.modules[name] + except KeyError: + pass + + +@contextlib.contextmanager +def temp_module(name, content='', *, pkg=False): + conflicts = [n for n in sys.modules if n.partition('.')[0] == name] + with os_helper.temp_cwd(None) as cwd: + with uncache(name, *conflicts): + with import_helper.DirsOnSysPath(cwd): + invalidate_caches() + + location = os.path.join(cwd, name) + if pkg: + modpath = os.path.join(location, '__init__.py') + os.mkdir(name) + else: + modpath = location + '.py' + if content is None: + # Make sure the module file gets created. + content = '' + if content is not None: + # not a namespace package + with open(modpath, 'w', encoding='utf-8') as modfile: + modfile.write(content) + yield location + + +@contextlib.contextmanager +def import_state(**kwargs): + """Context manager to manage the various importers and stored state in the + sys module. + + The 'modules' attribute is not supported as the interpreter state stores a + pointer to the dict that the interpreter uses internally; + reassigning to sys.modules does not have the desired effect. + + """ + originals = {} + try: + for attr, default in (('meta_path', []), ('path', []), + ('path_hooks', []), + ('path_importer_cache', {})): + originals[attr] = getattr(sys, attr) + if attr in kwargs: + new_value = kwargs[attr] + del kwargs[attr] + else: + new_value = default + setattr(sys, attr, new_value) + if len(kwargs): + raise ValueError('unrecognized arguments: {}'.format(kwargs)) + yield + finally: + for attr, value in originals.items(): + setattr(sys, attr, value) + + +class _ImporterMock: + + """Base class to help with creating importer mocks.""" + + def __init__(self, *names, module_code={}): + self.modules = {} + self.module_code = {} + for name in names: + if not name.endswith('.__init__'): + import_name = name + else: + import_name = name[:-len('.__init__')] + if '.' not in name: + package = None + elif import_name == name: + package = name.rsplit('.', 1)[0] + else: + package = import_name + module = types.ModuleType(import_name) + module.__loader__ = self + module.__file__ = '' + module.__package__ = package + module.attr = name + if import_name != name: + module.__path__ = [''] + self.modules[import_name] = module + if import_name in module_code: + self.module_code[import_name] = module_code[import_name] + + def __getitem__(self, name): + return self.modules[name] + + def __enter__(self): + self._uncache = uncache(*self.modules.keys()) + self._uncache.__enter__() + return self + + def __exit__(self, *exc_info): + self._uncache.__exit__(None, None, None) + + +class mock_spec(_ImporterMock): + + """Importer mock using PEP 451 APIs.""" + + def find_spec(self, fullname, path=None, parent=None): + try: + module = self.modules[fullname] + except KeyError: + return None + spec = util.spec_from_file_location( + fullname, module.__file__, loader=self, + submodule_search_locations=getattr(module, '__path__', None)) + return spec + + def create_module(self, spec): + if spec.name not in self.modules: + raise ImportError + return self.modules[spec.name] + + def exec_module(self, module): + try: + self.module_code[module.__spec__.name]() + except KeyError: + pass + + +def writes_bytecode_files(fxn): + """Decorator to protect sys.dont_write_bytecode from mutation and to skip + tests that require it to be set to False.""" + if sys.dont_write_bytecode: + return unittest.skip("relies on writing bytecode")(fxn) + @functools.wraps(fxn) + def wrapper(*args, **kwargs): + original = sys.dont_write_bytecode + sys.dont_write_bytecode = False + try: + to_return = fxn(*args, **kwargs) + finally: + sys.dont_write_bytecode = original + return to_return + return wrapper + + +def ensure_bytecode_path(bytecode_path): + """Ensure that the __pycache__ directory for PEP 3147 pyc file exists. + + :param bytecode_path: File system path to PEP 3147 pyc file. + """ + try: + os.mkdir(os.path.dirname(bytecode_path)) + except OSError as error: + if error.errno != errno.EEXIST: + raise + + +@contextlib.contextmanager +def temporary_pycache_prefix(prefix): + """Adjust and restore sys.pycache_prefix.""" + _orig_prefix = sys.pycache_prefix + sys.pycache_prefix = prefix + try: + yield + finally: + sys.pycache_prefix = _orig_prefix + + +@contextlib.contextmanager +def create_modules(*names): + """Temporarily create each named module with an attribute (named 'attr') + that contains the name passed into the context manager that caused the + creation of the module. + + All files are created in a temporary directory returned by + tempfile.mkdtemp(). This directory is inserted at the beginning of + sys.path. When the context manager exits all created files (source and + bytecode) are explicitly deleted. + + No magic is performed when creating packages! This means that if you create + a module within a package you must also create the package's __init__ as + well. + + """ + source = 'attr = {0!r}' + created_paths = [] + mapping = {} + state_manager = None + uncache_manager = None + try: + temp_dir = tempfile.mkdtemp() + mapping['.root'] = temp_dir + import_names = set() + for name in names: + if not name.endswith('__init__'): + import_name = name + else: + import_name = name[:-len('.__init__')] + import_names.add(import_name) + if import_name in sys.modules: + del sys.modules[import_name] + name_parts = name.split('.') + file_path = temp_dir + for directory in name_parts[:-1]: + file_path = os.path.join(file_path, directory) + if not os.path.exists(file_path): + os.mkdir(file_path) + created_paths.append(file_path) + file_path = os.path.join(file_path, name_parts[-1] + '.py') + with open(file_path, 'w', encoding='utf-8') as file: + file.write(source.format(name)) + created_paths.append(file_path) + mapping[name] = file_path + uncache_manager = uncache(*import_names) + uncache_manager.__enter__() + state_manager = import_state(path=[temp_dir]) + state_manager.__enter__() + yield mapping + finally: + if state_manager is not None: + state_manager.__exit__(None, None, None) + if uncache_manager is not None: + uncache_manager.__exit__(None, None, None) + os_helper.rmtree(temp_dir) + + +def mock_path_hook(*entries, importer): + """A mock sys.path_hooks entry.""" + def hook(entry): + if entry not in entries: + raise ImportError + return importer + return hook + + +class CASEOKTestBase: + + def caseok_env_changed(self, *, should_exist): + possibilities = b'PYTHONCASEOK', 'PYTHONCASEOK' + if any(x in self.importlib._bootstrap_external._os.environ + for x in possibilities) != should_exist: + self.skipTest('os.environ changes not reflected in _os.environ') diff --git a/stdlib/test/test_module/__init__.py b/stdlib/test/test_module/__init__.py new file mode 100644 index 000000000..22132b01c --- /dev/null +++ b/stdlib/test/test_module/__init__.py @@ -0,0 +1,402 @@ +# Test the module type +import importlib.machinery +import unittest +import weakref +from test.support import gc_collect +from test.support import import_helper +from test.support.script_helper import assert_python_ok + +import sys +ModuleType = type(sys) + + +class FullLoader: + pass + + +class BareLoader: + pass + + +class ModuleTests(unittest.TestCase): + def test_uninitialized(self): + # An uninitialized module has no __dict__ or __name__, + # and __doc__ is None + foo = ModuleType.__new__(ModuleType) + self.assertTrue(isinstance(foo.__dict__, dict)) + self.assertEqual(dir(foo), []) + try: + s = foo.__name__ + self.fail("__name__ = %s" % repr(s)) + except AttributeError: + pass + self.assertEqual(foo.__doc__, ModuleType.__doc__ or '') + + def test_uninitialized_missing_getattr(self): + # Issue 8297 + # test the text in the AttributeError of an uninitialized module + foo = ModuleType.__new__(ModuleType) + self.assertRaisesRegex( + AttributeError, "module has no attribute 'not_here'", + getattr, foo, "not_here") + + def test_missing_getattr(self): + # Issue 8297 + # test the text in the AttributeError + foo = ModuleType("foo") + self.assertRaisesRegex( + AttributeError, "module 'foo' has no attribute 'not_here'", + getattr, foo, "not_here") + + def test_no_docstring(self): + # Regularly initialized module, no docstring + foo = ModuleType("foo") + self.assertEqual(foo.__name__, "foo") + self.assertEqual(foo.__doc__, None) + self.assertIs(foo.__loader__, None) + self.assertIs(foo.__package__, None) + self.assertIs(foo.__spec__, None) + self.assertEqual(foo.__dict__, {"__name__": "foo", "__doc__": None, + "__loader__": None, "__package__": None, + "__spec__": None}) + + def test_ascii_docstring(self): + # ASCII docstring + foo = ModuleType("foo", "foodoc") + self.assertEqual(foo.__name__, "foo") + self.assertEqual(foo.__doc__, "foodoc") + self.assertEqual(foo.__dict__, + {"__name__": "foo", "__doc__": "foodoc", + "__loader__": None, "__package__": None, + "__spec__": None}) + + def test_unicode_docstring(self): + # Unicode docstring + foo = ModuleType("foo", "foodoc\u1234") + self.assertEqual(foo.__name__, "foo") + self.assertEqual(foo.__doc__, "foodoc\u1234") + self.assertEqual(foo.__dict__, + {"__name__": "foo", "__doc__": "foodoc\u1234", + "__loader__": None, "__package__": None, + "__spec__": None}) + + def test_reinit(self): + # Reinitialization should not replace the __dict__ + foo = ModuleType("foo", "foodoc\u1234") + foo.bar = 42 + d = foo.__dict__ + foo.__init__("foo", "foodoc") + self.assertEqual(foo.__name__, "foo") + self.assertEqual(foo.__doc__, "foodoc") + self.assertEqual(foo.bar, 42) + self.assertEqual(foo.__dict__, + {"__name__": "foo", "__doc__": "foodoc", "bar": 42, + "__loader__": None, "__package__": None, "__spec__": None}) + self.assertTrue(foo.__dict__ is d) + + def test_dont_clear_dict(self): + # See issue 7140. + def f(): + foo = ModuleType("foo") + foo.bar = 4 + return foo + gc_collect() + self.assertEqual(f().__dict__["bar"], 4) + + def test_clear_dict_in_ref_cycle(self): + destroyed = [] + m = ModuleType("foo") + m.destroyed = destroyed + s = """class A: + def __init__(self, l): + self.l = l + def __del__(self): + self.l.append(1) +a = A(destroyed)""" + exec(s, m.__dict__) + del m + gc_collect() + self.assertEqual(destroyed, [1]) + + def test_weakref(self): + m = ModuleType("foo") + wr = weakref.ref(m) + self.assertIs(wr(), m) + del m + gc_collect() + self.assertIs(wr(), None) + + def test_module_getattr(self): + import test.test_module.good_getattr as gga + from test.test_module.good_getattr import test + self.assertEqual(test, "There is test") + self.assertEqual(gga.x, 1) + self.assertEqual(gga.y, 2) + with self.assertRaisesRegex(AttributeError, + "Deprecated, use whatever instead"): + gga.yolo + self.assertEqual(gga.whatever, "There is whatever") + del sys.modules['test.test_module.good_getattr'] + + def test_module_getattr_errors(self): + import test.test_module.bad_getattr as bga + from test.test_module import bad_getattr2 + self.assertEqual(bga.x, 1) + self.assertEqual(bad_getattr2.x, 1) + with self.assertRaises(TypeError): + bga.nope + with self.assertRaises(TypeError): + bad_getattr2.nope + del sys.modules['test.test_module.bad_getattr'] + if 'test.test_module.bad_getattr2' in sys.modules: + del sys.modules['test.test_module.bad_getattr2'] + + def test_module_dir(self): + import test.test_module.good_getattr as gga + self.assertEqual(dir(gga), ['a', 'b', 'c']) + del sys.modules['test.test_module.good_getattr'] + + def test_module_dir_errors(self): + import test.test_module.bad_getattr as bga + from test.test_module import bad_getattr2 + with self.assertRaises(TypeError): + dir(bga) + with self.assertRaises(TypeError): + dir(bad_getattr2) + del sys.modules['test.test_module.bad_getattr'] + if 'test.test_module.bad_getattr2' in sys.modules: + del sys.modules['test.test_module.bad_getattr2'] + + def test_module_getattr_tricky(self): + from test.test_module import bad_getattr3 + # these lookups should not crash + with self.assertRaises(AttributeError): + bad_getattr3.one + with self.assertRaises(AttributeError): + bad_getattr3.delgetattr + if 'test.test_module.bad_getattr3' in sys.modules: + del sys.modules['test.test_module.bad_getattr3'] + + def test_module_repr_minimal(self): + # reprs when modules have no __file__, __name__, or __loader__ + m = ModuleType('foo') + del m.__name__ + self.assertEqual(repr(m), "") + + def test_module_repr_with_name(self): + m = ModuleType('foo') + self.assertEqual(repr(m), "") + + def test_module_repr_with_name_and_filename(self): + m = ModuleType('foo') + m.__file__ = '/tmp/foo.py' + self.assertEqual(repr(m), "") + + def test_module_repr_with_filename_only(self): + m = ModuleType('foo') + del m.__name__ + m.__file__ = '/tmp/foo.py' + self.assertEqual(repr(m), "") + + def test_module_repr_with_loader_as_None(self): + m = ModuleType('foo') + assert m.__loader__ is None + self.assertEqual(repr(m), "") + + def test_module_repr_with_bare_loader_but_no_name(self): + m = ModuleType('foo') + del m.__name__ + # Yes, a class not an instance. + m.__loader__ = BareLoader + loader_repr = repr(BareLoader) + self.assertEqual( + repr(m), "".format(loader_repr)) + + def test_module_repr_with_full_loader_but_no_name(self): + # m.__loader__.module_repr() will fail because the module has no + # m.__name__. This exception will get suppressed and instead the + # loader's repr will be used. + m = ModuleType('foo') + del m.__name__ + # Yes, a class not an instance. + m.__loader__ = FullLoader + loader_repr = repr(FullLoader) + self.assertEqual( + repr(m), "".format(loader_repr)) + + def test_module_repr_with_bare_loader(self): + m = ModuleType('foo') + # Yes, a class not an instance. + m.__loader__ = BareLoader + module_repr = repr(BareLoader) + self.assertEqual( + repr(m), "".format(module_repr)) + + def test_module_repr_with_full_loader(self): + m = ModuleType('foo') + # Yes, a class not an instance. + m.__loader__ = FullLoader + self.assertEqual( + repr(m), f")>") + + def test_module_repr_with_bare_loader_and_filename(self): + m = ModuleType('foo') + # Yes, a class not an instance. + m.__loader__ = BareLoader + m.__file__ = '/tmp/foo.py' + self.assertEqual(repr(m), "") + + def test_module_repr_with_full_loader_and_filename(self): + m = ModuleType('foo') + # Yes, a class not an instance. + m.__loader__ = FullLoader + m.__file__ = '/tmp/foo.py' + self.assertEqual(repr(m), "") + + def test_module_repr_builtin(self): + self.assertEqual(repr(sys), "") + + def test_module_repr_source(self): + r = repr(unittest) + starts_with = "") + + def test_module_repr_with_namespace_package_and_custom_loader(self): + m = ModuleType('foo') + loader = BareLoader() + spec = importlib.machinery.ModuleSpec('foo', loader) + m.__loader__ = loader + m.__spec__ = spec + expected_repr_pattern = r"\)>" + self.assertRegex(repr(m), expected_repr_pattern) + self.assertNotIn('from', repr(m)) + + def test_module_repr_with_fake_namespace_package(self): + m = ModuleType('foo') + loader = BareLoader() + loader._path = ['spam'] + spec = importlib.machinery.ModuleSpec('foo', loader) + m.__loader__ = loader + m.__spec__ = spec + expected_repr_pattern = r"\)>" + self.assertRegex(repr(m), expected_repr_pattern) + self.assertNotIn('from', repr(m)) + + def test_module_finalization_at_shutdown(self): + # Module globals and builtins should still be available during shutdown + rc, out, err = assert_python_ok("-c", "from test.test_module import final_a") + self.assertFalse(err) + lines = out.splitlines() + self.assertEqual(set(lines), { + b"x = a", + b"x = b", + b"final_a.x = a", + b"final_b.x = b", + b"len = len", + b"shutil.rmtree = rmtree"}) + + def test_descriptor_errors_propagate(self): + class Descr: + def __get__(self, o, t): + raise RuntimeError + class M(ModuleType): + melon = Descr() + self.assertRaises(RuntimeError, getattr, M("mymod"), "melon") + + def test_lazy_create_annotations(self): + # module objects lazy create their __annotations__ dict on demand. + # the annotations dict is stored in module.__dict__. + # a freshly created module shouldn't have an annotations dict yet. + foo = ModuleType("foo") + for i in range(4): + self.assertFalse("__annotations__" in foo.__dict__) + d = foo.__annotations__ + self.assertTrue("__annotations__" in foo.__dict__) + self.assertEqual(foo.__annotations__, d) + self.assertEqual(foo.__dict__['__annotations__'], d) + if i % 2: + del foo.__annotations__ + else: + del foo.__dict__['__annotations__'] + + def test_setting_annotations(self): + foo = ModuleType("foo") + for i in range(4): + self.assertFalse("__annotations__" in foo.__dict__) + d = {'a': int} + foo.__annotations__ = d + self.assertTrue("__annotations__" in foo.__dict__) + self.assertEqual(foo.__annotations__, d) + self.assertEqual(foo.__dict__['__annotations__'], d) + if i % 2: + del foo.__annotations__ + else: + del foo.__dict__['__annotations__'] + + def test_annotations_getset_raises(self): + # double delete + foo = ModuleType("foo") + foo.__annotations__ = {} + del foo.__annotations__ + with self.assertRaises(AttributeError): + del foo.__annotations__ + + def test_annotations_are_created_correctly(self): + ann_module4 = import_helper.import_fresh_module( + 'test.typinganndata.ann_module4', + ) + self.assertFalse("__annotations__" in ann_module4.__dict__) + self.assertEqual(ann_module4.__annotations__, {"a": int, "b": str}) + self.assertTrue("__annotations__" in ann_module4.__dict__) + del ann_module4.__annotations__ + self.assertFalse("__annotations__" in ann_module4.__dict__) + + + def test_repeated_attribute_pops(self): + # Repeated accesses to module attribute will be specialized + # Check that popping the attribute doesn't break it + m = ModuleType("test") + d = m.__dict__ + count = 0 + for _ in range(100): + m.attr = 1 + count += m.attr # Might be specialized + d.pop("attr") + self.assertEqual(count, 100) + + # frozen and namespace module reprs are tested in importlib. + + def test_subclass_with_slots(self): + # In 3.11alpha this crashed, as the slots weren't NULLed. + + class ModuleWithSlots(ModuleType): + __slots__ = ("a", "b") + + def __init__(self, name): + super().__init__(name) + + m = ModuleWithSlots("name") + with self.assertRaises(AttributeError): + m.a + with self.assertRaises(AttributeError): + m.b + m.a, m.b = 1, 2 + self.assertEqual(m.a, 1) + self.assertEqual(m.b, 2) + + + +if __name__ == '__main__': + unittest.main() diff --git a/stdlib/test/test_module/bad_getattr.py b/stdlib/test/test_module/bad_getattr.py new file mode 100644 index 000000000..16f901b13 --- /dev/null +++ b/stdlib/test/test_module/bad_getattr.py @@ -0,0 +1,4 @@ +x = 1 + +__getattr__ = "Surprise!" +__dir__ = "Surprise again!" diff --git a/stdlib/test/test_module/bad_getattr2.py b/stdlib/test/test_module/bad_getattr2.py new file mode 100644 index 000000000..0a52a53b5 --- /dev/null +++ b/stdlib/test/test_module/bad_getattr2.py @@ -0,0 +1,7 @@ +def __getattr__(): + "Bad one" + +x = 1 + +def __dir__(bad_sig): + return [] diff --git a/stdlib/test/test_module/bad_getattr3.py b/stdlib/test/test_module/bad_getattr3.py new file mode 100644 index 000000000..0d5f9266c --- /dev/null +++ b/stdlib/test/test_module/bad_getattr3.py @@ -0,0 +1,5 @@ +def __getattr__(name): + if name != 'delgetattr': + raise AttributeError + del globals()['__getattr__'] + raise AttributeError diff --git a/stdlib/test/test_module/final_a.py b/stdlib/test/test_module/final_a.py new file mode 100644 index 000000000..a983f3111 --- /dev/null +++ b/stdlib/test/test_module/final_a.py @@ -0,0 +1,19 @@ +""" +Fodder for module finalization tests in test_module. +""" + +import shutil +import test.test_module.final_b + +x = 'a' + +class C: + def __del__(self): + # Inspect module globals and builtins + print("x =", x) + print("final_b.x =", test.test_module.final_b.x) + print("shutil.rmtree =", getattr(shutil.rmtree, '__name__', None)) + print("len =", getattr(len, '__name__', None)) + +c = C() +_underscored = C() diff --git a/stdlib/test/test_module/final_b.py b/stdlib/test/test_module/final_b.py new file mode 100644 index 000000000..f3e8d5594 --- /dev/null +++ b/stdlib/test/test_module/final_b.py @@ -0,0 +1,19 @@ +""" +Fodder for module finalization tests in test_module. +""" + +import shutil +import test.test_module.final_a + +x = 'b' + +class C: + def __del__(self): + # Inspect module globals and builtins + print("x =", x) + print("final_a.x =", test.test_module.final_a.x) + print("shutil.rmtree =", getattr(shutil.rmtree, '__name__', None)) + print("len =", getattr(len, '__name__', None)) + +c = C() +_underscored = C() diff --git a/stdlib/test/test_module/good_getattr.py b/stdlib/test/test_module/good_getattr.py new file mode 100644 index 000000000..7d27de626 --- /dev/null +++ b/stdlib/test/test_module/good_getattr.py @@ -0,0 +1,11 @@ +x = 1 + +def __dir__(): + return ['a', 'b', 'c'] + +def __getattr__(name): + if name == "yolo": + raise AttributeError("Deprecated, use whatever instead") + return f"There is {name}" + +y = 2 diff --git a/stdlib/test/test_py_compile.py b/stdlib/test/test_py_compile.py new file mode 100644 index 000000000..749a877d0 --- /dev/null +++ b/stdlib/test/test_py_compile.py @@ -0,0 +1,310 @@ +import functools +import importlib.util +import os +import py_compile +import shutil +import stat +import subprocess +import sys +import tempfile +import unittest + +from test import support +from test.support import os_helper, script_helper + + +def without_source_date_epoch(fxn): + """Runs function with SOURCE_DATE_EPOCH unset.""" + @functools.wraps(fxn) + def wrapper(*args, **kwargs): + with os_helper.EnvironmentVarGuard() as env: + env.unset('SOURCE_DATE_EPOCH') + return fxn(*args, **kwargs) + return wrapper + + +def with_source_date_epoch(fxn): + """Runs function with SOURCE_DATE_EPOCH set.""" + @functools.wraps(fxn) + def wrapper(*args, **kwargs): + with os_helper.EnvironmentVarGuard() as env: + env['SOURCE_DATE_EPOCH'] = '123456789' + return fxn(*args, **kwargs) + return wrapper + + +# Run tests with SOURCE_DATE_EPOCH set or unset explicitly. +class SourceDateEpochTestMeta(type(unittest.TestCase)): + def __new__(mcls, name, bases, dct, *, source_date_epoch): + cls = super().__new__(mcls, name, bases, dct) + + for attr in dir(cls): + if attr.startswith('test_'): + meth = getattr(cls, attr) + if source_date_epoch: + wrapper = with_source_date_epoch(meth) + else: + wrapper = without_source_date_epoch(meth) + setattr(cls, attr, wrapper) + + return cls + + +class PyCompileTestsBase: + + def setUp(self): + self.directory = tempfile.mkdtemp(dir=os.getcwd()) + self.source_path = os.path.join(self.directory, '_test.py') + self.pyc_path = self.source_path + 'c' + self.cache_path = importlib.util.cache_from_source(self.source_path) + self.cwd_drive = os.path.splitdrive(os.getcwd())[0] + # In these tests we compute relative paths. When using Windows, the + # current working directory path and the 'self.source_path' might be + # on different drives. Therefore we need to switch to the drive where + # the temporary source file lives. + drive = os.path.splitdrive(self.source_path)[0] + if drive: + os.chdir(drive) + with open(self.source_path, 'w') as file: + file.write('x = 123\n') + + def tearDown(self): + shutil.rmtree(self.directory) + if self.cwd_drive: + os.chdir(self.cwd_drive) + + def test_absolute_path(self): + py_compile.compile(self.source_path, self.pyc_path) + self.assertTrue(os.path.exists(self.pyc_path)) + self.assertFalse(os.path.exists(self.cache_path)) + + def test_do_not_overwrite_symlinks(self): + # In the face of a cfile argument being a symlink, bail out. + # Issue #17222 + try: + os.symlink(self.pyc_path + '.actual', self.pyc_path) + except (NotImplementedError, OSError): + self.skipTest('need to be able to create a symlink for a file') + else: + assert os.path.islink(self.pyc_path) + with self.assertRaises(FileExistsError): + py_compile.compile(self.source_path, self.pyc_path) + + @unittest.skipIf(not os.path.exists(os.devnull) or os.path.isfile(os.devnull), + 'requires os.devnull and for it to be a non-regular file') + def test_do_not_overwrite_nonregular_files(self): + # In the face of a cfile argument being a non-regular file, bail out. + # Issue #17222 + with self.assertRaises(FileExistsError): + py_compile.compile(self.source_path, os.devnull) + + def test_cache_path(self): + py_compile.compile(self.source_path) + self.assertTrue(os.path.exists(self.cache_path)) + + def test_cwd(self): + with os_helper.change_cwd(self.directory): + py_compile.compile(os.path.basename(self.source_path), + os.path.basename(self.pyc_path)) + self.assertTrue(os.path.exists(self.pyc_path)) + self.assertFalse(os.path.exists(self.cache_path)) + + def test_relative_path(self): + py_compile.compile(os.path.relpath(self.source_path), + os.path.relpath(self.pyc_path)) + self.assertTrue(os.path.exists(self.pyc_path)) + self.assertFalse(os.path.exists(self.cache_path)) + + @os_helper.skip_if_dac_override + @unittest.skipIf(os.name == 'nt', + 'cannot control directory permissions on Windows') + @os_helper.skip_unless_working_chmod + def test_exceptions_propagate(self): + # Make sure that exceptions raised thanks to issues with writing + # bytecode. + # http://bugs.python.org/issue17244 + mode = os.stat(self.directory) + os.chmod(self.directory, stat.S_IREAD) + try: + with self.assertRaises(IOError): + py_compile.compile(self.source_path, self.pyc_path) + finally: + os.chmod(self.directory, mode.st_mode) + + def test_bad_coding(self): + bad_coding = os.path.join(os.path.dirname(__file__), + 'tokenizedata', + 'bad_coding2.py') + with support.captured_stderr(): + self.assertIsNone(py_compile.compile(bad_coding, doraise=False)) + self.assertFalse(os.path.exists( + importlib.util.cache_from_source(bad_coding))) + + def test_source_date_epoch(self): + py_compile.compile(self.source_path, self.pyc_path) + self.assertTrue(os.path.exists(self.pyc_path)) + self.assertFalse(os.path.exists(self.cache_path)) + with open(self.pyc_path, 'rb') as fp: + flags = importlib._bootstrap_external._classify_pyc( + fp.read(), 'test', {}) + if os.environ.get('SOURCE_DATE_EPOCH'): + expected_flags = 0b11 + else: + expected_flags = 0b00 + + self.assertEqual(flags, expected_flags) + + @unittest.skipIf(sys.flags.optimize > 0, 'test does not work with -O') + def test_double_dot_no_clobber(self): + # http://bugs.python.org/issue22966 + # py_compile foo.bar.py -> __pycache__/foo.cpython-34.pyc + weird_path = os.path.join(self.directory, 'foo.bar.py') + cache_path = importlib.util.cache_from_source(weird_path) + pyc_path = weird_path + 'c' + head, tail = os.path.split(cache_path) + penultimate_tail = os.path.basename(head) + self.assertEqual( + os.path.join(penultimate_tail, tail), + os.path.join( + '__pycache__', + 'foo.bar.{}.pyc'.format(sys.implementation.cache_tag))) + with open(weird_path, 'w') as file: + file.write('x = 123\n') + py_compile.compile(weird_path) + self.assertTrue(os.path.exists(cache_path)) + self.assertFalse(os.path.exists(pyc_path)) + + def test_optimization_path(self): + # Specifying optimized bytecode should lead to a path reflecting that. + self.assertIn('opt-2', py_compile.compile(self.source_path, optimize=2)) + + def test_invalidation_mode(self): + py_compile.compile( + self.source_path, + invalidation_mode=py_compile.PycInvalidationMode.CHECKED_HASH, + ) + with open(self.cache_path, 'rb') as fp: + flags = importlib._bootstrap_external._classify_pyc( + fp.read(), 'test', {}) + self.assertEqual(flags, 0b11) + py_compile.compile( + self.source_path, + invalidation_mode=py_compile.PycInvalidationMode.UNCHECKED_HASH, + ) + with open(self.cache_path, 'rb') as fp: + flags = importlib._bootstrap_external._classify_pyc( + fp.read(), 'test', {}) + self.assertEqual(flags, 0b1) + + def test_quiet(self): + bad_coding = os.path.join(os.path.dirname(__file__), + 'tokenizedata', + 'bad_coding2.py') + with support.captured_stderr() as stderr: + self.assertIsNone(py_compile.compile(bad_coding, doraise=False, quiet=2)) + self.assertIsNone(py_compile.compile(bad_coding, doraise=True, quiet=2)) + self.assertEqual(stderr.getvalue(), '') + with self.assertRaises(py_compile.PyCompileError): + py_compile.compile(bad_coding, doraise=True, quiet=1) + + def test_utf7_decoded_cr_compiles(self): + with open(self.source_path, 'wb') as file: + file.write(b"#coding=U7+AA0''\n") + + pyc_path = py_compile.compile(self.source_path, self.pyc_path, doraise=True) + self.assertEqual(pyc_path, self.pyc_path) + self.assertTrue(os.path.exists(self.pyc_path)) + + +class PyCompileTestsWithSourceEpoch(PyCompileTestsBase, + unittest.TestCase, + metaclass=SourceDateEpochTestMeta, + source_date_epoch=True): + pass + + +class PyCompileTestsWithoutSourceEpoch(PyCompileTestsBase, + unittest.TestCase, + metaclass=SourceDateEpochTestMeta, + source_date_epoch=False): + pass + + +class PyCompileCLITestCase(unittest.TestCase): + + def setUp(self): + self.directory = tempfile.mkdtemp() + self.source_path = os.path.join(self.directory, '_test.py') + self.cache_path = importlib.util.cache_from_source(self.source_path, + optimization='' if __debug__ else 1) + with open(self.source_path, 'w') as file: + file.write('x = 123\n') + + def tearDown(self): + os_helper.rmtree(self.directory) + + @support.requires_subprocess() + def pycompilecmd(self, *args, **kwargs): + # assert_python_* helpers don't return proc object. We'll just use + # subprocess.run() instead of spawn_python() and its friends to test + # stdin support of the CLI. + opts = '-m' if __debug__ else '-Om' + if args and args[0] == '-' and 'input' in kwargs: + return subprocess.run([sys.executable, opts, 'py_compile', '-'], + input=kwargs['input'].encode(), + capture_output=True) + return script_helper.assert_python_ok(opts, 'py_compile', *args, **kwargs) + + def pycompilecmd_failure(self, *args): + return script_helper.assert_python_failure('-m', 'py_compile', *args) + + def test_stdin(self): + self.assertFalse(os.path.exists(self.cache_path)) + result = self.pycompilecmd('-', input=self.source_path) + self.assertEqual(result.returncode, 0) + self.assertEqual(result.stdout, b'') + self.assertEqual(result.stderr, b'') + self.assertTrue(os.path.exists(self.cache_path)) + + def test_with_files(self): + rc, stdout, stderr = self.pycompilecmd(self.source_path, self.source_path) + self.assertEqual(rc, 0) + self.assertEqual(stdout, b'') + self.assertEqual(stderr, b'') + self.assertTrue(os.path.exists(self.cache_path)) + + def test_bad_syntax(self): + bad_syntax = os.path.join(os.path.dirname(__file__), + 'tokenizedata', + 'badsyntax_3131.py') + rc, stdout, stderr = self.pycompilecmd_failure(bad_syntax) + self.assertEqual(rc, 1) + self.assertEqual(stdout, b'') + self.assertIn(b'SyntaxError', stderr) + + def test_bad_syntax_with_quiet(self): + bad_syntax = os.path.join(os.path.dirname(__file__), + 'tokenizedata', + 'badsyntax_3131.py') + rc, stdout, stderr = self.pycompilecmd_failure('-q', bad_syntax) + self.assertEqual(rc, 1) + self.assertEqual(stdout, b'') + self.assertEqual(stderr, b'') + + def test_file_not_exists(self): + should_not_exists = os.path.join(os.path.dirname(__file__), 'should_not_exists.py') + rc, stdout, stderr = self.pycompilecmd_failure(self.source_path, should_not_exists) + self.assertEqual(rc, 1) + self.assertEqual(stdout, b'') + self.assertIn(b'no such file or directory', stderr.lower()) + + def test_file_not_exists_with_quiet(self): + should_not_exists = os.path.join(os.path.dirname(__file__), 'should_not_exists.py') + rc, stdout, stderr = self.pycompilecmd_failure('-q', self.source_path, should_not_exists) + self.assertEqual(rc, 1) + self.assertEqual(stdout, b'') + self.assertEqual(stderr, b'') + + +if __name__ == "__main__": + unittest.main() diff --git a/stdlib/zipapp.py b/stdlib/zipapp.py new file mode 100644 index 000000000..7a4ef96ea --- /dev/null +++ b/stdlib/zipapp.py @@ -0,0 +1,231 @@ +import contextlib +import os +import pathlib +import shutil +import stat +import sys +import zipfile + +__all__ = ['ZipAppError', 'create_archive', 'get_interpreter'] + + +# The __main__.py used if the users specifies "-m module:fn". +# Note that this will always be written as UTF-8 (module and +# function names can be non-ASCII in Python 3). +# We add a coding cookie even though UTF-8 is the default in Python 3 +# because the resulting archive may be intended to be run under Python 2. +MAIN_TEMPLATE = """\ +# -*- coding: utf-8 -*- +import {module} +{module}.{fn}() +""" + + +# The Windows launcher defaults to UTF-8 when parsing shebang lines if the +# file has no BOM. So use UTF-8 on Windows. +# On Unix, use the filesystem encoding. +if sys.platform.startswith('win'): + shebang_encoding = 'utf-8' +else: + shebang_encoding = sys.getfilesystemencoding() + + +class ZipAppError(ValueError): + pass + + +@contextlib.contextmanager +def _maybe_open(archive, mode): + if isinstance(archive, (str, os.PathLike)): + with open(archive, mode) as f: + yield f + else: + yield archive + + +def _write_file_prefix(f, interpreter): + """Write a shebang line.""" + if interpreter: + shebang = b'#!' + interpreter.encode(shebang_encoding) + b'\n' + f.write(shebang) + + +def _copy_archive(archive, new_archive, interpreter=None): + """Copy an application archive, modifying the shebang line.""" + with _maybe_open(archive, 'rb') as src: + # Skip the shebang line from the source. + # Read 2 bytes of the source and check if they are #!. + first_2 = src.read(2) + if first_2 == b'#!': + # Discard the initial 2 bytes and the rest of the shebang line. + first_2 = b'' + src.readline() + + with _maybe_open(new_archive, 'wb') as dst: + _write_file_prefix(dst, interpreter) + # If there was no shebang, "first_2" contains the first 2 bytes + # of the source file, so write them before copying the rest + # of the file. + dst.write(first_2) + shutil.copyfileobj(src, dst) + + if interpreter and isinstance(new_archive, str): + os.chmod(new_archive, os.stat(new_archive).st_mode | stat.S_IEXEC) + + +def create_archive(source, target=None, interpreter=None, main=None, + filter=None, compressed=False): + """Create an application archive from SOURCE. + + The SOURCE can be the name of a directory, or a filename or a file-like + object referring to an existing archive. + + The content of SOURCE is packed into an application archive in TARGET, + which can be a filename or a file-like object. If SOURCE is a directory, + TARGET can be omitted and will default to the name of SOURCE with .pyz + appended. + + The created application archive will have a shebang line specifying + that it should run with INTERPRETER (there will be no shebang line if + INTERPRETER is None), and a __main__.py which runs MAIN (if MAIN is + not specified, an existing __main__.py will be used). It is an error + to specify MAIN for anything other than a directory source with no + __main__.py, and it is an error to omit MAIN if the directory has no + __main__.py. + """ + # Are we copying an existing archive? + source_is_file = False + if hasattr(source, 'read') and hasattr(source, 'readline'): + source_is_file = True + else: + source = pathlib.Path(source) + if source.is_file(): + source_is_file = True + + if source_is_file: + _copy_archive(source, target, interpreter) + return + + # We are creating a new archive from a directory. + if not source.exists(): + raise ZipAppError("Source does not exist") + has_main = (source / '__main__.py').is_file() + if main and has_main: + raise ZipAppError( + "Cannot specify entry point if the source has __main__.py") + if not (main or has_main): + raise ZipAppError("Archive has no entry point") + + main_py = None + if main: + # Check that main has the right format. + mod, sep, fn = main.partition(':') + mod_ok = all(part.isidentifier() for part in mod.split('.')) + fn_ok = all(part.isidentifier() for part in fn.split('.')) + if not (sep == ':' and mod_ok and fn_ok): + raise ZipAppError("Invalid entry point: " + main) + main_py = MAIN_TEMPLATE.format(module=mod, fn=fn) + + if target is None: + target = source.with_suffix('.pyz') + elif not hasattr(target, 'write'): + target = pathlib.Path(target) + + # Create the list of files to add to the archive now, in case + # the target is being created in the source directory - we + # don't want the target being added to itself + files_to_add = {} + for path in sorted(source.rglob('*')): + relative_path = path.relative_to(source) + if filter is None or filter(relative_path): + files_to_add[path] = relative_path + + # The target cannot be in the list of files to add. If it were, we'd + # end up overwriting the source file and writing the archive into + # itself, which is an error. We therefore check for that case and + # provide a helpful message for the user. + + # Note that we only do a simple path equality check. This won't + # catch every case, but it will catch the common case where the + # source is the CWD and the target is a file in the CWD. More + # thorough checks don't provide enough value to justify the extra + # cost. + + # If target is a file-like object, it will simply fail to compare + # equal to any of the entries in files_to_add, so there's no need + # to add a special check for that. + if target in files_to_add: + raise ZipAppError( + f"The target archive {target} overwrites one of the source files.") + + with _maybe_open(target, 'wb') as fd: + _write_file_prefix(fd, interpreter) + compression = (zipfile.ZIP_DEFLATED if compressed else + zipfile.ZIP_STORED) + with zipfile.ZipFile(fd, 'w', compression=compression) as z: + for path, relative_path in files_to_add.items(): + z.write(path, relative_path.as_posix()) + if main_py: + z.writestr('__main__.py', main_py.encode('utf-8')) + + if interpreter and not hasattr(target, 'write'): + target.chmod(target.stat().st_mode | stat.S_IEXEC) + + +def get_interpreter(archive): + with _maybe_open(archive, 'rb') as f: + if f.read(2) == b'#!': + return f.readline().strip().decode(shebang_encoding) + + +def main(args=None): + """Run the zipapp command line interface. + + The ARGS parameter lets you specify the argument list directly. + Omitting ARGS (or setting it to None) works as for argparse, using + sys.argv[1:] as the argument list. + """ + import argparse + + parser = argparse.ArgumentParser(color=True) + parser.add_argument('--output', '-o', default=None, + help="The name of the output archive. " + "Required if SOURCE is an archive.") + parser.add_argument('--python', '-p', default=None, + help="The name of the Python interpreter to use " + "(default: no shebang line).") + parser.add_argument('--main', '-m', default=None, + help="The main function of the application " + "(default: use an existing __main__.py).") + parser.add_argument('--compress', '-c', action='store_true', + help="Compress files with the deflate method. " + "Files are stored uncompressed by default.") + parser.add_argument('--info', default=False, action='store_true', + help="Display the interpreter from the archive.") + parser.add_argument('source', + help="Source directory (or existing archive).") + + args = parser.parse_args(args) + + # Handle `python -m zipapp archive.pyz --info`. + if args.info: + if not os.path.isfile(args.source): + raise SystemExit("Can only get info for an archive file") + interpreter = get_interpreter(args.source) + print("Interpreter: {}".format(interpreter or "")) + sys.exit(0) + + if os.path.isfile(args.source): + if args.output is None or (os.path.exists(args.output) and + os.path.samefile(args.source, args.output)): + raise SystemExit("In-place editing of archives is not supported") + if args.main: + raise SystemExit("Cannot change the main function when copying") + + create_archive(args.source, args.output, + interpreter=args.python, main=args.main, + compressed=args.compress) + + +if __name__ == '__main__': + main() diff --git a/stdlib/zipimport.py b/stdlib/zipimport.py new file mode 100644 index 000000000..3455bbc9f --- /dev/null +++ b/stdlib/zipimport.py @@ -0,0 +1,825 @@ +"""zipimport provides support for importing Python modules from Zip archives. + +This module exports two objects: +- zipimporter: a class; its constructor takes a path to a Zip archive. +- ZipImportError: exception raised by zipimporter objects. It's a + subclass of ImportError, so it can be caught as ImportError, too. + +It is usually not needed to use the zipimport module explicitly; it is +used by the builtin import mechanism for sys.path items that are paths +to Zip archives. +""" + +# gopy resolves imports Go-side and never freezes the bootstrap modules, +# so the frozen _frozen_importlib / _frozen_importlib_external names are +# unavailable. The vendored importlib._bootstrap and +# importlib._bootstrap_external are the same source, so import them +# directly. +from importlib import _bootstrap_external +from importlib._bootstrap_external import _unpack_uint16, _unpack_uint32, _unpack_uint64 +from importlib import _bootstrap # for _verbose_message +import _imp # for check_hash_based_pycs +import _io # for open +import marshal # for loads +import sys # for modules +import time # for mktime + +__all__ = ['ZipImportError', 'zipimporter'] + + +path_sep = _bootstrap_external.path_sep +alt_path_sep = _bootstrap_external.path_separators[1:] + + +class ZipImportError(ImportError): + pass + +# _read_directory() cache +_zip_directory_cache = {} + +_module_type = type(sys) + +END_CENTRAL_DIR_SIZE = 22 +END_CENTRAL_DIR_SIZE_64 = 56 +END_CENTRAL_DIR_LOCATOR_SIZE_64 = 20 +STRING_END_ARCHIVE = b'PK\x05\x06' # standard EOCD signature +STRING_END_LOCATOR_64 = b'PK\x06\x07' # Zip64 EOCD Locator signature +STRING_END_ZIP_64 = b'PK\x06\x06' # Zip64 EOCD signature +MAX_COMMENT_LEN = (1 << 16) - 1 +MAX_UINT32 = 0xffffffff +ZIP64_EXTRA_TAG = 0x1 + +class zipimporter(_bootstrap_external._LoaderBasics): + """zipimporter(archivepath) -> zipimporter object + + Create a new zipimporter instance. 'archivepath' must be a path to + a zipfile, or to a specific path inside a zipfile. For example, it can be + '/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a + valid directory inside the archive. + + 'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip + archive. + + The 'archive' attribute of zipimporter objects contains the name of the + zipfile targeted. + """ + + # Split the "subdirectory" from the Zip archive path, lookup a matching + # entry in sys.path_importer_cache, fetch the file directory from there + # if found, or else read it from the archive. + def __init__(self, path): + if not isinstance(path, str): + raise TypeError(f"expected str, not {type(path)!r}") + if not path: + raise ZipImportError('archive path is empty', path=path) + if alt_path_sep: + path = path.replace(alt_path_sep, path_sep) + + prefix = [] + while True: + try: + st = _bootstrap_external._path_stat(path) + except (OSError, ValueError): + # On Windows a ValueError is raised for too long paths. + # Back up one path element. + dirname, basename = _bootstrap_external._path_split(path) + if dirname == path: + raise ZipImportError('not a Zip file', path=path) + path = dirname + prefix.append(basename) + else: + # it exists + if (st.st_mode & 0o170000) != 0o100000: # stat.S_ISREG + # it's a not file + raise ZipImportError('not a Zip file', path=path) + break + + if path not in _zip_directory_cache: + _zip_directory_cache[path] = _read_directory(path) + self.archive = path + # a prefix directory following the ZIP file path. + self.prefix = _bootstrap_external._path_join(*prefix[::-1]) + if self.prefix: + self.prefix += path_sep + + + def find_spec(self, fullname, target=None): + """Create a ModuleSpec for the specified module. + + Returns None if the module cannot be found. + """ + module_info = _get_module_info(self, fullname) + if module_info is not None: + return _bootstrap.spec_from_loader(fullname, self, is_package=module_info) + else: + # Not a module or regular package. See if this is a directory, and + # therefore possibly a portion of a namespace package. + + # We're only interested in the last path component of fullname + # earlier components are recorded in self.prefix. + modpath = _get_module_path(self, fullname) + if _is_dir(self, modpath): + # This is possibly a portion of a namespace + # package. Return the string representing its path, + # without a trailing separator. + path = f'{self.archive}{path_sep}{modpath}' + spec = _bootstrap.ModuleSpec(name=fullname, loader=None, + is_package=True) + spec.submodule_search_locations.append(path) + return spec + else: + return None + + def get_code(self, fullname): + """get_code(fullname) -> code object. + + Return the code object for the specified module. Raise ZipImportError + if the module couldn't be imported. + """ + code, ispackage, modpath = _get_module_code(self, fullname) + return code + + + def get_data(self, pathname): + """get_data(pathname) -> string with file data. + + Return the data associated with 'pathname'. Raise OSError if + the file wasn't found. + """ + if alt_path_sep: + pathname = pathname.replace(alt_path_sep, path_sep) + + key = pathname + if pathname.startswith(self.archive + path_sep): + key = pathname[len(self.archive + path_sep):] + + try: + toc_entry = self._get_files()[key] + except KeyError: + raise OSError(0, '', key) + if toc_entry is None: + return b'' + return _get_data(self.archive, toc_entry) + + + # Return a string matching __file__ for the named module + def get_filename(self, fullname): + """get_filename(fullname) -> filename string. + + Return the filename for the specified module or raise ZipImportError + if it couldn't be imported. + """ + # Deciding the filename requires working out where the code + # would come from if the module was actually loaded + code, ispackage, modpath = _get_module_code(self, fullname) + return modpath + + + def get_source(self, fullname): + """get_source(fullname) -> source string. + + Return the source code for the specified module. Raise ZipImportError + if the module couldn't be found, return None if the archive does + contain the module, but has no source for it. + """ + mi = _get_module_info(self, fullname) + if mi is None: + raise ZipImportError(f"can't find module {fullname!r}", name=fullname) + + path = _get_module_path(self, fullname) + if mi: + fullpath = _bootstrap_external._path_join(path, '__init__.py') + else: + fullpath = f'{path}.py' + + try: + toc_entry = self._get_files()[fullpath] + except KeyError: + # we have the module, but no source + return None + return _get_data(self.archive, toc_entry).decode() + + + # Return a bool signifying whether the module is a package or not. + def is_package(self, fullname): + """is_package(fullname) -> bool. + + Return True if the module specified by fullname is a package. + Raise ZipImportError if the module couldn't be found. + """ + mi = _get_module_info(self, fullname) + if mi is None: + raise ZipImportError(f"can't find module {fullname!r}", name=fullname) + return mi + + + # Load and return the module named by 'fullname'. + def load_module(self, fullname): + """load_module(fullname) -> module. + + Load the module specified by 'fullname'. 'fullname' must be the + fully qualified (dotted) module name. It returns the imported + module, or raises ZipImportError if it could not be imported. + + Deprecated since Python 3.10. Use exec_module() instead. + """ + import warnings + warnings._deprecated("zipimport.zipimporter.load_module", + f"{warnings._DEPRECATED_MSG}; " + "use zipimport.zipimporter.exec_module() instead", + remove=(3, 15)) + code, ispackage, modpath = _get_module_code(self, fullname) + mod = sys.modules.get(fullname) + if mod is None or not isinstance(mod, _module_type): + mod = _module_type(fullname) + sys.modules[fullname] = mod + mod.__loader__ = self + + try: + if ispackage: + # add __path__ to the module *before* the code gets + # executed + path = _get_module_path(self, fullname) + fullpath = _bootstrap_external._path_join(self.archive, path) + mod.__path__ = [fullpath] + + if not hasattr(mod, '__builtins__'): + mod.__builtins__ = __builtins__ + _bootstrap_external._fix_up_module(mod.__dict__, fullname, modpath) + exec(code, mod.__dict__) + except: + del sys.modules[fullname] + raise + + try: + mod = sys.modules[fullname] + except KeyError: + raise ImportError(f'Loaded module {fullname!r} not found in sys.modules') + _bootstrap._verbose_message('import {} # loaded from Zip {}', fullname, modpath) + return mod + + + def get_resource_reader(self, fullname): + """Return the ResourceReader for a module in a zip file.""" + from importlib.readers import ZipReader + + return ZipReader(self, fullname) + + + def _get_files(self): + """Return the files within the archive path.""" + try: + files = _zip_directory_cache[self.archive] + except KeyError: + try: + files = _zip_directory_cache[self.archive] = _read_directory(self.archive) + except ZipImportError: + files = {} + + return files + + + def invalidate_caches(self): + """Invalidates the cache of file data of the archive path.""" + _zip_directory_cache.pop(self.archive, None) + + + def __repr__(self): + return f'' + + +# _zip_searchorder defines how we search for a module in the Zip +# archive: we first search for a package __init__, then for +# non-package .pyc, and .py entries. The .pyc entries +# are swapped by initzipimport() if we run in optimized mode. Also, +# '/' is replaced by path_sep there. +_zip_searchorder = ( + (path_sep + '__init__.pyc', True, True), + (path_sep + '__init__.py', False, True), + ('.pyc', True, False), + ('.py', False, False), +) + +# Given a module name, return the potential file path in the +# archive (without extension). +def _get_module_path(self, fullname): + return self.prefix + fullname.rpartition('.')[2] + +# Does this path represent a directory? +def _is_dir(self, path): + # See if this is a "directory". If so, it's eligible to be part + # of a namespace package. We test by seeing if the name, with an + # appended path separator, exists. + dirpath = path + path_sep + # If dirpath is present in self._get_files(), we have a directory. + return dirpath in self._get_files() + +# Return some information about a module. +def _get_module_info(self, fullname): + path = _get_module_path(self, fullname) + for suffix, isbytecode, ispackage in _zip_searchorder: + fullpath = path + suffix + if fullpath in self._get_files(): + return ispackage + return None + + +# implementation + +# _read_directory(archive) -> files dict (new reference) +# +# Given a path to a Zip archive, build a dict, mapping file names +# (local to the archive, using SEP as a separator) to toc entries. +# +# A toc_entry is a tuple: +# +# (__file__, # value to use for __file__, available for all files, +# # encoded to the filesystem encoding +# compress, # compression kind; 0 for uncompressed +# data_size, # size of compressed data on disk +# file_size, # size of decompressed data +# file_offset, # offset of file header from start of archive +# time, # mod time of file (in dos format) +# date, # mod data of file (in dos format) +# crc, # crc checksum of the data +# ) +# +# Directories can be recognized by the trailing path_sep in the name, +# data_size and file_offset are 0. +def _read_directory(archive): + try: + fp = _io.open_code(archive) + except OSError: + raise ZipImportError(f"can't open Zip file: {archive!r}", path=archive) + + with fp: + # GH-87235: On macOS all file descriptors for /dev/fd/N share the same + # file offset, reset the file offset after scanning the zipfile directory + # to not cause problems when some runs 'python3 /dev/fd/9 9= 0 and pos64+END_CENTRAL_DIR_SIZE_64+END_CENTRAL_DIR_LOCATOR_SIZE_64==pos): + # Zip64 at "correct" offset from standard EOCD + buffer = data[pos64:pos64 + END_CENTRAL_DIR_SIZE_64] + if len(buffer) != END_CENTRAL_DIR_SIZE_64: + raise ZipImportError( + f"corrupt Zip64 file: Expected {END_CENTRAL_DIR_SIZE_64} byte " + f"zip64 central directory, but read {len(buffer)} bytes.", + path=archive) + header_position = file_size - len(data) + pos64 + + central_directory_size = _unpack_uint64(buffer[40:48]) + central_directory_position = _unpack_uint64(buffer[48:56]) + num_entries = _unpack_uint64(buffer[24:32]) + elif pos >= 0: + buffer = data[pos:pos+END_CENTRAL_DIR_SIZE] + if len(buffer) != END_CENTRAL_DIR_SIZE: + raise ZipImportError(f"corrupt Zip file: {archive!r}", + path=archive) + + header_position = file_size - len(data) + pos + + # Buffer now contains a valid EOCD, and header_position gives the + # starting position of it. + central_directory_size = _unpack_uint32(buffer[12:16]) + central_directory_position = _unpack_uint32(buffer[16:20]) + num_entries = _unpack_uint16(buffer[8:10]) + + # N.b. if someday you want to prefer the standard (non-zip64) EOCD, + # you need to adjust position by 76 for arc to be 0. + else: + raise ZipImportError(f'not a Zip file: {archive!r}', + path=archive) + + # Buffer now contains a valid EOCD, and header_position gives the + # starting position of it. + # XXX: These are cursory checks but are not as exact or strict as they + # could be. Checking the arc-adjusted value is probably good too. + if header_position < central_directory_size: + raise ZipImportError(f'bad central directory size: {archive!r}', path=archive) + if header_position < central_directory_position: + raise ZipImportError(f'bad central directory offset: {archive!r}', path=archive) + header_position -= central_directory_size + # On just-a-zipfile these values are the same and arc_offset is zero; if + # the file has some bytes prepended, `arc_offset` is the number of such + # bytes. This is used for pex as well as self-extracting .exe. + arc_offset = header_position - central_directory_position + if arc_offset < 0: + raise ZipImportError(f'bad central directory size or offset: {archive!r}', path=archive) + + files = {} + # Start of Central Directory + count = 0 + try: + fp.seek(header_position) + except OSError: + raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) + while True: + buffer = fp.read(46) + if len(buffer) < 4: + raise EOFError('EOF read where not expected') + # Start of file header + if buffer[:4] != b'PK\x01\x02': + if count != num_entries: + raise ZipImportError( + f"mismatched num_entries: {count} should be {num_entries} in {archive!r}", + path=archive, + ) + break # Bad: Central Dir File Header + if len(buffer) != 46: + raise EOFError('EOF read where not expected') + flags = _unpack_uint16(buffer[8:10]) + compress = _unpack_uint16(buffer[10:12]) + time = _unpack_uint16(buffer[12:14]) + date = _unpack_uint16(buffer[14:16]) + crc = _unpack_uint32(buffer[16:20]) + data_size = _unpack_uint32(buffer[20:24]) + file_size = _unpack_uint32(buffer[24:28]) + name_size = _unpack_uint16(buffer[28:30]) + extra_size = _unpack_uint16(buffer[30:32]) + comment_size = _unpack_uint16(buffer[32:34]) + file_offset = _unpack_uint32(buffer[42:46]) + header_size = name_size + extra_size + comment_size + + try: + name = fp.read(name_size) + except OSError: + raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) + if len(name) != name_size: + raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) + # On Windows, calling fseek to skip over the fields we don't use is + # slower than reading the data because fseek flushes stdio's + # internal buffers. See issue #8745. + try: + extra_data_len = header_size - name_size + extra_data = memoryview(fp.read(extra_data_len)) + + if len(extra_data) != extra_data_len: + raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) + except OSError: + raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) + + if flags & 0x800: + # UTF-8 file names extension + name = name.decode() + else: + # Historical ZIP filename encoding + try: + name = name.decode('ascii') + except UnicodeDecodeError: + name = name.decode('latin1').translate(cp437_table) + + name = name.replace('/', path_sep) + path = _bootstrap_external._path_join(archive, name) + + # Ordering matches unpacking below. + if ( + file_size == MAX_UINT32 or + data_size == MAX_UINT32 or + file_offset == MAX_UINT32 + ): + # need to decode extra_data looking for a zip64 extra (which might not + # be present) + while extra_data: + if len(extra_data) < 4: + raise ZipImportError(f"can't read header extra: {archive!r}", path=archive) + tag = _unpack_uint16(extra_data[:2]) + size = _unpack_uint16(extra_data[2:4]) + if len(extra_data) < 4 + size: + raise ZipImportError(f"can't read header extra: {archive!r}", path=archive) + if tag == ZIP64_EXTRA_TAG: + if (len(extra_data) - 4) % 8 != 0: + raise ZipImportError(f"can't read header extra: {archive!r}", path=archive) + num_extra_values = (len(extra_data) - 4) // 8 + if num_extra_values > 3: + raise ZipImportError(f"can't read header extra: {archive!r}", path=archive) + import struct + values = list(struct.unpack_from(f"<{min(num_extra_values, 3)}Q", + extra_data, offset=4)) + + # N.b. Here be dragons: the ordering of these is different than + # the header fields, and it's really easy to get it wrong since + # naturally-occurring zips that use all 3 are >4GB + if file_size == MAX_UINT32: + file_size = values.pop(0) + if data_size == MAX_UINT32: + data_size = values.pop(0) + if file_offset == MAX_UINT32: + file_offset = values.pop(0) + + break + + # For a typical zip, this bytes-slicing only happens 2-3 times, on + # small data like timestamps and filesizes. + extra_data = extra_data[4+size:] + else: + _bootstrap._verbose_message( + "zipimport: suspected zip64 but no zip64 extra for {!r}", + path, + ) + # XXX These two statements seem swapped because `central_directory_position` + # is a position within the actual file, but `file_offset` (when compared) is + # as encoded in the entry, not adjusted for this file. + # N.b. this must be after we've potentially read the zip64 extra which can + # change `file_offset`. + if file_offset > central_directory_position: + raise ZipImportError(f'bad local header offset: {archive!r}', path=archive) + file_offset += arc_offset + + t = (path, compress, data_size, file_size, file_offset, time, date, crc) + files[name] = t + count += 1 + finally: + fp.seek(start_offset) + _bootstrap._verbose_message('zipimport: found {} names in {!r}', count, archive) + + # Add implicit directories. + count = 0 + for name in list(files): + while True: + i = name.rstrip(path_sep).rfind(path_sep) + if i < 0: + break + name = name[:i + 1] + if name in files: + break + files[name] = None + count += 1 + if count: + _bootstrap._verbose_message('zipimport: added {} implicit directories in {!r}', + count, archive) + return files + +# During bootstrap, we may need to load the encodings +# package from a ZIP file. But the cp437 encoding is implemented +# in Python in the encodings package. +# +# Break out of this dependency by using the translation table for +# the cp437 encoding. +cp437_table = ( + # ASCII part, 8 rows x 16 chars + '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f' + '\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f' + ' !"#$%&\'()*+,-./' + '0123456789:;<=>?' + '@ABCDEFGHIJKLMNO' + 'PQRSTUVWXYZ[\\]^_' + '`abcdefghijklmno' + 'pqrstuvwxyz{|}~\x7f' + # non-ASCII part, 16 rows x 8 chars + '\xc7\xfc\xe9\xe2\xe4\xe0\xe5\xe7' + '\xea\xeb\xe8\xef\xee\xec\xc4\xc5' + '\xc9\xe6\xc6\xf4\xf6\xf2\xfb\xf9' + '\xff\xd6\xdc\xa2\xa3\xa5\u20a7\u0192' + '\xe1\xed\xf3\xfa\xf1\xd1\xaa\xba' + '\xbf\u2310\xac\xbd\xbc\xa1\xab\xbb' + '\u2591\u2592\u2593\u2502\u2524\u2561\u2562\u2556' + '\u2555\u2563\u2551\u2557\u255d\u255c\u255b\u2510' + '\u2514\u2534\u252c\u251c\u2500\u253c\u255e\u255f' + '\u255a\u2554\u2569\u2566\u2560\u2550\u256c\u2567' + '\u2568\u2564\u2565\u2559\u2558\u2552\u2553\u256b' + '\u256a\u2518\u250c\u2588\u2584\u258c\u2590\u2580' + '\u03b1\xdf\u0393\u03c0\u03a3\u03c3\xb5\u03c4' + '\u03a6\u0398\u03a9\u03b4\u221e\u03c6\u03b5\u2229' + '\u2261\xb1\u2265\u2264\u2320\u2321\xf7\u2248' + '\xb0\u2219\xb7\u221a\u207f\xb2\u25a0\xa0' +) + +_importing_zlib = False + +# Return the zlib.decompress function object, or NULL if zlib couldn't +# be imported. The function is cached when found, so subsequent calls +# don't import zlib again. +def _get_decompress_func(): + global _importing_zlib + if _importing_zlib: + # Someone has a zlib.py[co] in their Zip file + # let's avoid a stack overflow. + _bootstrap._verbose_message('zipimport: zlib UNAVAILABLE') + raise ZipImportError("can't decompress data; zlib not available") + + _importing_zlib = True + try: + from zlib import decompress + except Exception: + _bootstrap._verbose_message('zipimport: zlib UNAVAILABLE') + raise ZipImportError("can't decompress data; zlib not available") + finally: + _importing_zlib = False + + _bootstrap._verbose_message('zipimport: zlib available') + return decompress + +# Given a path to a Zip file and a toc_entry, return the (uncompressed) data. +def _get_data(archive, toc_entry): + datapath, compress, data_size, file_size, file_offset, time, date, crc = toc_entry + if data_size < 0: + raise ZipImportError('negative data size') + + with _io.open_code(archive) as fp: + # Check to make sure the local file header is correct + try: + fp.seek(file_offset) + except OSError: + raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) + buffer = fp.read(30) + if len(buffer) != 30: + raise EOFError('EOF read where not expected') + + if buffer[:4] != b'PK\x03\x04': + # Bad: Local File Header + raise ZipImportError(f'bad local file header: {archive!r}', path=archive) + + name_size = _unpack_uint16(buffer[26:28]) + extra_size = _unpack_uint16(buffer[28:30]) + header_size = 30 + name_size + extra_size + file_offset += header_size # Start of file data + try: + fp.seek(file_offset) + except OSError: + raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) + raw_data = fp.read(data_size) + if len(raw_data) != data_size: + raise OSError("zipimport: can't read data") + + if compress == 0: + # data is not compressed + return raw_data + + # Decompress with zlib + try: + decompress = _get_decompress_func() + except Exception: + raise ZipImportError("can't decompress data; zlib not available") + return decompress(raw_data, -15) + + +# Lenient date/time comparison function. The precision of the mtime +# in the archive is lower than the mtime stored in a .pyc: we +# must allow a difference of at most one second. +def _eq_mtime(t1, t2): + # dostime only stores even seconds, so be lenient + return abs(t1 - t2) <= 1 + + +# Given the contents of a .py[co] file, unmarshal the data +# and return the code object. Raises ImportError it the magic word doesn't +# match, or if the recorded .py[co] metadata does not match the source. +def _unmarshal_code(self, pathname, fullpath, fullname, data): + exc_details = { + 'name': fullname, + 'path': fullpath, + } + + flags = _bootstrap_external._classify_pyc(data, fullname, exc_details) + + hash_based = flags & 0b1 != 0 + if hash_based: + check_source = flags & 0b10 != 0 + if (_imp.check_hash_based_pycs != 'never' and + (check_source or _imp.check_hash_based_pycs == 'always')): + source_bytes = _get_pyc_source(self, fullpath) + if source_bytes is not None: + source_hash = _imp.source_hash( + _imp.pyc_magic_number_token, + source_bytes, + ) + + _bootstrap_external._validate_hash_pyc( + data, source_hash, fullname, exc_details) + else: + source_mtime, source_size = \ + _get_mtime_and_size_of_source(self, fullpath) + + if source_mtime: + # We don't use _bootstrap_external._validate_timestamp_pyc + # to allow for a more lenient timestamp check. + if (not _eq_mtime(_unpack_uint32(data[8:12]), source_mtime) or + _unpack_uint32(data[12:16]) != source_size): + _bootstrap._verbose_message( + f'bytecode is stale for {fullname!r}') + return None + + code = marshal.loads(data[16:]) + if not isinstance(code, _code_type): + raise TypeError(f'compiled module {pathname!r} is not a code object') + return code + +_code_type = type(_unmarshal_code.__code__) + + +# Replace any occurrences of '\r\n?' in the input string with '\n'. +# This converts DOS and Mac line endings to Unix line endings. +def _normalize_line_endings(source): + source = source.replace(b'\r\n', b'\n') + source = source.replace(b'\r', b'\n') + return source + +# Given a string buffer containing Python source code, compile it +# and return a code object. +def _compile_source(pathname, source): + source = _normalize_line_endings(source) + return compile(source, pathname, 'exec', dont_inherit=True) + +# Convert the date/time values found in the Zip archive to a value +# that's compatible with the time stamp stored in .pyc files. +def _parse_dostime(d, t): + return time.mktime(( + (d >> 9) + 1980, # bits 9..15: year + (d >> 5) & 0xF, # bits 5..8: month + d & 0x1F, # bits 0..4: day + t >> 11, # bits 11..15: hours + (t >> 5) & 0x3F, # bits 8..10: minutes + (t & 0x1F) * 2, # bits 0..7: seconds / 2 + -1, -1, -1)) + +# Given a path to a .pyc file in the archive, return the +# modification time of the matching .py file and its size, +# or (0, 0) if no source is available. +def _get_mtime_and_size_of_source(self, path): + try: + # strip 'c' or 'o' from *.py[co] + assert path[-1:] in ('c', 'o') + path = path[:-1] + toc_entry = self._get_files()[path] + # fetch the time stamp of the .py file for comparison + # with an embedded pyc time stamp + time = toc_entry[5] + date = toc_entry[6] + uncompressed_size = toc_entry[3] + return _parse_dostime(date, time), uncompressed_size + except (KeyError, IndexError, TypeError): + return 0, 0 + + +# Given a path to a .pyc file in the archive, return the +# contents of the matching .py file, or None if no source +# is available. +def _get_pyc_source(self, path): + # strip 'c' or 'o' from *.py[co] + assert path[-1:] in ('c', 'o') + path = path[:-1] + + try: + toc_entry = self._get_files()[path] + except KeyError: + return None + else: + return _get_data(self.archive, toc_entry) + + +# Get the code object associated with the module specified by +# 'fullname'. +def _get_module_code(self, fullname): + path = _get_module_path(self, fullname) + import_error = None + for suffix, isbytecode, ispackage in _zip_searchorder: + fullpath = path + suffix + _bootstrap._verbose_message('trying {}{}{}', self.archive, path_sep, fullpath, verbosity=2) + try: + toc_entry = self._get_files()[fullpath] + except KeyError: + pass + else: + modpath = toc_entry[0] + data = _get_data(self.archive, toc_entry) + code = None + if isbytecode: + try: + code = _unmarshal_code(self, modpath, fullpath, fullname, data) + except ImportError as exc: + import_error = exc + else: + code = _compile_source(modpath, data) + if code is None: + # bad magic number or non-matching mtime + # in byte code, try next + continue + modpath = toc_entry[0] + return code, ispackage, modpath + else: + if import_error: + msg = f"module load failed: {import_error}" + raise ZipImportError(msg, name=fullname) from import_error + else: + raise ZipImportError(f"can't find module {fullname!r}", name=fullname) diff --git a/stdlibinit/registry.go b/stdlibinit/registry.go index eeac2ec23..73c67c558 100644 --- a/stdlibinit/registry.go +++ b/stdlibinit/registry.go @@ -174,6 +174,20 @@ import ( // CPython: Modules/_testcapi/vectorcall.c:1 vectorcall fixtures _ "github.com/tamnd/gopy/module/_testcapi" + // Built-in module: _testmultiphase. Registers itself via + // module/_testmultiphase/module.go init(). Reproduces the PEP 489 + // multi-phase init extension's main module so test.test_importlib.util + // imports instead of raising SkipTest. + // CPython: Modules/_testmultiphase.c:447 PyInit__testmultiphase + _ "github.com/tamnd/gopy/module/_testmultiphase" + + // Built-in module: _testsinglephase. Registers itself via + // module/_testsinglephase/module.go init(). Reproduces the legacy + // single-phase init extension and the gh-144601 raise-on-init fixture + // the SubinterpImportTests drive through ExtensionFileLoader. + // CPython: Modules/_testsinglephase.c:533 PyInit__testsinglephase + _ "github.com/tamnd/gopy/module/_testsinglephase" + // Built-in module: _json. Registers itself via // module/_json/module.go init(). Accelerates json.py with // scanstring and encode_basestring helpers. @@ -311,6 +325,13 @@ import ( // CPython: Modules/_winapi.c:3023 _winapi_exec _ "github.com/tamnd/gopy/module/_winapi" + // Built-in module: winreg. Registers itself via module/winreg/ + // module.go init(). Exposes the HKEY_*/KEY_*/REG_* constants and the + // error alias that importlib._bootstrap_external imports at module top + // level on Windows. + // CPython: PC/winreg.c:2121 exec_module + _ "github.com/tamnd/gopy/module/winreg" + // Built-in module: _hashlib. Registers itself via // module/_hashlib/module.go init(). Backs Lib/hashlib.py with the // HASH object type and openssl_* convenience constructors using diff --git a/test/cpython/pyclbr_input.py b/test/cpython/pyclbr_input.py new file mode 100644 index 000000000..5535edbfa --- /dev/null +++ b/test/cpython/pyclbr_input.py @@ -0,0 +1,85 @@ +"""Test cases for test_pyclbr.py""" + +def f(): pass + +class Other(object): + @classmethod + def foo(c): pass + + def om(self): pass + +class B (object): + def bm(self): pass + +class C (B): + d = 10 + + # This one is correctly considered by both test_pyclbr.py and pyclbr.py + # as a non-method of C. + foo = Other().foo + + # This causes test_pyclbr.py to fail, but only because the + # introspection-based is_method() code in the test can't + # distinguish between this and a genuine method function like m(). + # + # The pyclbr.py module gets this right as it parses the text. + om = Other.om + f = f + + def m(self): pass + + @staticmethod + def sm(self): pass + + @classmethod + def cm(self): pass + +# Check that mangling is correctly handled + +class a: + def a(self): pass + def _(self): pass + def _a(self): pass + def __(self): pass + def ___(self): pass + def __a(self): pass + +class _: + def a(self): pass + def _(self): pass + def _a(self): pass + def __(self): pass + def ___(self): pass + def __a(self): pass + +class __: + def a(self): pass + def _(self): pass + def _a(self): pass + def __(self): pass + def ___(self): pass + def __a(self): pass + +class ___: + def a(self): pass + def _(self): pass + def _a(self): pass + def __(self): pass + def ___(self): pass + def __a(self): pass + +class _a: + def a(self): pass + def _(self): pass + def _a(self): pass + def __(self): pass + def ___(self): pass + def __a(self): pass + +class __a: + def a(self): pass + def _(self): pass + def _a(self): pass + def __(self): pass + def ___(self): pass + def __a(self): pass diff --git a/vm/builtins_hook.go b/vm/builtins_hook.go index 07aeebc1e..aab4d13f4 100644 --- a/vm/builtins_hook.go +++ b/vm/builtins_hook.go @@ -10,6 +10,7 @@ import ( "errors" "fmt" "os" + "strings" "github.com/tamnd/gopy/builtins" pyerrors "github.com/tamnd/gopy/errors" @@ -609,12 +610,13 @@ func currentEvaluator(code *objects.Code, globals, locals, closure objects.Objec // currentImporter is the hook builtins.__import__ delegates to. It // reuses vmExecutor so the import can run frozen / built-in module -// init code, then forwards to imp.ImportModuleLevel. fromlist is -// accepted for signature parity; the existing IMPORT_NAME arm -// likewise drops it pending fromlist-driven submodule discovery. +// init code. fromlist is the raw object the caller passed, threaded +// untouched into _handle_fromlist so a non-str entry raises the same +// TypeError CPython raises and a custom iterable is iterated the same +// way. // // CPython: Python/import.c:1561 PyImport_ImportModuleLevelObject -func currentImporter(name, pkgname string, level int, _ []string) (objects.Object, error) { +func currentImporter(name, pkgname string, level int, fromlist objects.Object, globals objects.Object) (objects.Object, error) { ts := currentThread() if ts == nil { ts = state.NewThread() @@ -624,13 +626,72 @@ func currentImporter(name, pkgname string, level int, _ []string) (objects.Objec // // CPython: Python/import.c:1759 import_name reads interp->builtins_module. var b objects.Object - if f := frameStackFor(ts).Top(); f != nil { - b = callerBuiltins(f) + topFrame := frameStackFor(ts).Top() + if topFrame != nil { + b = callerBuiltins(topFrame) } + + // Prefer the live Python importlib, matching CPython where the builtin + // __import__ IS PyImport_ImportModuleLevelObject. That C body resolves + // the name, drives _gcd_import / _find_and_load, and performs the + // fromlist / dotted-head selection itself. importModuleLevelObject ports + // it; the manual Go driver below only runs during early bootstrap before + // _bootstrap._install has wired the frozen importer. + // + // CPython: Python/bltinmodule.c:259 builtin___import___impl + // CPython: Python/import.c:3798 PyImport_ImportModuleLevelObject + // + // The globals handed in must be the dict the caller passed to + // __import__, because resolve_name / _calc___package__ derives the + // relative-import anchor from it. A frame-globals fallback would anchor + // a bare __import__('', {'__package__': 'pkg'}, level=2) against the + // caller's own package; a missing globals must reach _calc___package__ + // as None so it raises the same KeyError("'__name__' not in globals"). + // + // CPython: Python/import.c:3576 resolve_name + // CPython: Lib/importlib/_bootstrap.py:1349 _calc___package__ + callerGlobals := globals + if callerGlobals == nil { + callerGlobals = objects.None() + } + if mod, ok, derr := importModuleLevelObject(name, callerGlobals, fromlist, level); ok { + return mod, derr + } + exec := &vmExecutor{ts: ts, builtins: b} mod, err := imp.ImportModuleLevel(exec, name, pkgname, level) if err != nil { + // A missing module must surface as a ModuleNotFoundError whose + // `name` member is the dotted name being imported. runpy reads + // exc.name to decide whether to keep searching, so a generic Go + // error synthesized without the attribute breaks that contract. + // + // CPython: Python/import.c:1759 import_name (ModuleNotFoundError, name=) + if errors.Is(err, imp.ErrModuleNotFound) { + exc := pyerrors.MakeModuleNotFound(name) + return nil, objects.NewRaisedError(exc, err.Error()) + } return nil, err } + // _handle_fromlist / head-of-dotted-name selection, exactly like the + // IMPORT_NAME opcode path in importName: a non-empty fromlist forces + // the named submodules and returns the deepest module, while an empty + // fromlist for a dotted import returns the top-level package. + // + // CPython: Python/bltinmodule.c:259 builtin___import___impl + // CPython: Lib/importlib/_bootstrap.py:1463 _handle_fromlist + e := &evalState{ts: ts, f: topFrame} + if !isEmptyFromlist(fromlist) { + if herr := e.handleFromlist(mod, fromlist, false); herr != nil { + return nil, herr + } + return mod, nil + } + if strings.Contains(name, ".") { + top := name[:strings.IndexByte(name, '.')] + if tm, ok := imp.GetModule(top); ok { + return tm, nil + } + } return mod, nil } diff --git a/vm/builtins_hook_test.go b/vm/builtins_hook_test.go index 1e60b0096..32d66c6cc 100644 --- a/vm/builtins_hook_test.go +++ b/vm/builtins_hook_test.go @@ -82,7 +82,7 @@ func TestCurrentImporterRoutesThroughInittab(t *testing.T) { } defer imp.RemoveModule(name) - got, err := currentImporter(name, "", 0, nil) + got, err := currentImporter(name, "", 0, nil, nil) if err != nil { t.Fatalf("currentImporter: %v", err) } diff --git a/vm/copyreg_hook.go b/vm/copyreg_hook.go index ccabb6d47..21f466764 100644 --- a/vm/copyreg_hook.go +++ b/vm/copyreg_hook.go @@ -20,6 +20,52 @@ func init() { objects.BuiltinLookup = builtinLookup objects.CurrentBuiltinsHook = currentBuiltins objects.ImportModuleHook = importModuleByName + objects.ModuleReprHook = moduleReprViaImportlib +} + +// moduleReprViaImportlib renders a module's repr by calling +// importlib._bootstrap._module_repr, the same delegation CPython's C +// module_repr performs through _PyImport_ImportlibModuleRepr. Any +// failure to reach importlib falls back to the minimal Go rendering so +// repr() never raises. +// +// CPython: Python/import.c:3346 _PyImport_ImportlibModuleRepr +func moduleReprViaImportlib(m objects.Object) (string, error) { + bootstrap, ok := imp.GetModule("importlib._bootstrap") + if !ok || bootstrap == nil { + mod, err := importModuleByName("importlib._bootstrap") + if err != nil || mod == nil { + return objects.ModuleReprFallback(m) + } + var modOk bool + if bootstrap, modOk = mod.(*objects.Module); !modOk { + return objects.ModuleReprFallback(m) + } + } + // importlib._bootstrap caches the _bootstrap_external module in a + // module global, normally wired by _install_external_importers during + // the frozen bootstrap. gopy resolves imports Go-side and never runs + // that hook, so _module_repr_from_spec's isinstance(loader, + // NamespaceLoader) check would always miss. Wire the global the way + // _install_external_importers does so the namespace-package repr (and + // the other consumers of the cached module) behave like CPython. + // + // CPython: Lib/importlib/_bootstrap.py:1565 _install_external_importers + if cur, _ := bootstrap.Dict().GetItem(objects.NewStr("_bootstrap_external")); cur == nil || cur == objects.None() { + ext, err := importModuleByName("importlib._bootstrap_external") + if err == nil && ext != nil { + _ = bootstrap.Dict().SetItem(objects.NewStr("_bootstrap_external"), ext) + } + } + fn, err := bootstrap.Dict().GetItem(objects.NewStr("_module_repr")) + if err != nil || fn == nil { + return objects.ModuleReprFallback(m) + } + res, err := objects.Call(fn, objects.NewTuple([]objects.Object{m}), nil) + if err != nil { + return objects.ModuleReprFallback(m) + } + return objects.Str(res) } // importModuleByName imports an absolute module name, returning the @@ -33,6 +79,21 @@ func importModuleByName(name string) (objects.Object, error) { if mod, ok := imp.GetModule(name); ok && mod != nil { return mod, nil } + // PyImport_ImportModule drives the live importlib _gcd_import, which + // recurses parent packages and resolves namespace packages (directories + // with no __init__.py). The Go ImportModule driver below resolves only + // the named module against sys.path and does not import the parents, so + // it misses a deeply dotted namespace submodule. Prefer _gcd_import once + // _frozen_importlib is installed; fall back to the Go driver during early + // bootstrap, before importlib is live. + // + // CPython: Python/import.c:1450 PyImport_ImportModule (_gcd_import) + if frozen, ok := imp.GetModule("_frozen_importlib"); ok && frozen != nil { + gcd, err := objects.GetAttr(frozen, objects.NewStr("_gcd_import")) + if err == nil && gcd != nil { + return objects.Call(gcd, objects.NewTuple([]objects.Object{objects.NewStr(name)}), nil) + } + } ts := currentThread() if ts == nil { ts = state.NewThread() diff --git a/vm/dispatch.go b/vm/dispatch.go index f96604ae3..197b16453 100644 --- a/vm/dispatch.go +++ b/vm/dispatch.go @@ -29,8 +29,6 @@ import ( // - otherwise: the loop sets InstrPtr = next and continues. // // CPython: Python/ceval.c switch over op -// -//nolint:gocognit // mirrors CPython's ceval.c per-opcode dispatch; complexity is the surface, not algorithmic branching func (e *evalState) dispatch(op compile.Opcode, oparg uint32) (next int, err error) { // CPython: Python/ceval_macros.h:63 INSTRUCTION_STATS. Bumps the // per-opcode counter + pair counter before any specializer / fast @@ -38,34 +36,19 @@ func (e *evalState) dispatch(op compile.Opcode, oparg uint32) (next int, err err // INSTRUCTION_STATS(op) just before the TARGET label). e.recordOpcode(op) // Instrumentation routing: the common case (op is not an - // INSTRUMENTED_ variant) bails on a single [256]bool load. Only - // when op is one of the 21 INSTRUMENTED_ opcodes do we route - // through the LINE handler / PEP 669 callback fire / base-rewrite - // sequence. Pre-D1, monitor.IsInstrumented was called for every - // dispatch and burned ~6% of CPU on the tight bench just on the - // non-instrumented path. + // INSTRUMENTED_ variant) bails on a single [256]bool load inside + // applyInstrumentation. Only when op is one of the 21 INSTRUMENTED_ + // opcodes do we route through the LINE handler / PEP 669 callback fire + // / base-rewrite / EXTENDED_ARG-prefix sequence. // // CPython: Python/ceval.c TARGET(INSTRUMENTED_*) labels are // reached directly via the computed-goto table, so the // non-instrumented path costs zero. Mirrored here by the // instrumentedRewrite gate. - if instrumentedRewrite[op] { - if op == compile.INSTRUMENTED_LINE { - newOp, err := e.handleInstrumentedLine() - if err != nil { - return 0, err - } - op = newOp - if !instrumentedRewrite[op] { - goto afterInstrument - } - } - if err := e.fireInstrumented(op, oparg); err != nil { - return 0, err - } - op = instrumentedToBase[op] + op, oparg, err = e.applyInstrumentation(op, oparg) + if err != nil { + return 0, err } -afterInstrument: // Specializer routing: only Quickened code carries inline-cache // counters and specialized variants; non-Quickened code (raw // compile output before specialize.Quicken) skips the entire @@ -147,6 +130,61 @@ afterInstrument: return 0, opcodeNotImplemented(op) } +// applyInstrumentation runs the INSTRUMENTED_ routing for op (LINE handler, +// PEP 669 callback fire, base rewrite) and then resolves any EXTENDED_ARG +// prefix the rewrite exposed, returning the real opcode and accumulated arg the +// generic dispatch body should run. The non-instrumented path is a single +// [256]bool load plus the EXTENDED_ARG fast-out. +// +// CPython: Python/ceval.c TARGET(INSTRUMENTED_*) +func (e *evalState) applyInstrumentation(op compile.Opcode, oparg uint32) (compile.Opcode, uint32, error) { + if instrumentedRewrite[op] { + if op == compile.INSTRUMENTED_LINE { + newOp, err := e.handleInstrumentedLine() + if err != nil { + return 0, 0, err + } + op = newOp + if !instrumentedRewrite[op] { + return e.resolveExtendedArgPrefix(op, oparg) + } + } + if err := e.fireInstrumented(op, oparg); err != nil { + return 0, 0, err + } + op = instrumentedToBase[op] + } + return e.resolveExtendedArgPrefix(op, oparg) +} + +// resolveExtendedArgPrefix consumes an EXTENDED_ARG prefix that surfaced from +// the instrumented-line handler (the line started on a prefixed instruction) +// and returns the trailing real opcode and accumulated arg; for any other +// opcode it is a pass-through. EXTENDED_ARG never reaches here from the +// straight-line fetch path, which consumes the prefix run before dispatch. The +// trailing opcode may itself be instrumented (the jump or call the prefix feeds +// is a monitored site), and the instrumentation block already ran for the +// EXTENDED_ARG slot, so its event is fired and rebased here. This mirrors +// CPython dispatching from TARGET(EXTENDED_ARG) straight into TARGET(INSTRUMENTED_*). +// +// CPython: Python/ceval.c TARGET(EXTENDED_ARG) +func (e *evalState) resolveExtendedArgPrefix(op compile.Opcode, oparg uint32) (compile.Opcode, uint32, error) { + if op != compile.EXTENDED_ARG { + return op, oparg, nil + } + realOp, realArg, ok := e.fetchExtended(e.f.InstrPtr, oparg) + if !ok { + return 0, 0, opcodeNotImplemented(compile.EXTENDED_ARG) + } + if instrumentedRewrite[realOp] { + if err := e.fireInstrumented(realOp, realArg); err != nil { + return 0, 0, err + } + realOp = instrumentedToBase[realOp] + } + return realOp, realArg, nil +} + // opcodeNotImplemented wraps ErrNotImplemented with the offending op. func opcodeNotImplemented(op compile.Opcode) error { return ¬Implemented{op: op} diff --git a/vm/eval.go b/vm/eval.go index 7d58729d5..06a2399d2 100644 --- a/vm/eval.go +++ b/vm/eval.go @@ -17,6 +17,7 @@ import ( "github.com/tamnd/gopy/compile" "github.com/tamnd/gopy/frame" "github.com/tamnd/gopy/gil" + "github.com/tamnd/gopy/monitor" "github.com/tamnd/gopy/objects" "github.com/tamnd/gopy/stackref" "github.com/tamnd/gopy/state" @@ -474,6 +475,24 @@ func (e *evalState) advance() int { return ip + 2 } op := compile.Opcode(code[ip]) + // Under monitoring the live byte at ip may be INSTRUMENTED_LINE (a + // marker left in place while dispatch runs the hidden opcode) or an + // INSTRUMENTED_ variant. Both preserve the base opcode's inline + // cache layout, but the cache table is keyed by base opcode, so a + // jump computed off the raw instrumented byte would count zero cache + // codeunits and land one codeunit short of its target. Resolve to the + // base opcode first. + // + // CPython: the JUMPBY stride is the base arm's compile-time + // INLINE_CACHE_ENTRIES_, independent of the instrumented byte. + if op == compile.INSTRUMENTED_LINE { + if data := monitor.CoMonitoring(e.f.Code); data != nil { + if base := monitor.GetOriginalOpcode(data, ip/2); base != 0 { + op = base + } + } + } + op = monitor.DeInstrument(op) return ip + 2 + 2*compile.CacheCount(op) } diff --git a/vm/eval_dispatch_handwritten.go b/vm/eval_dispatch_handwritten.go index 092e0d5cd..224564cb6 100644 --- a/vm/eval_dispatch_handwritten.go +++ b/vm/eval_dispatch_handwritten.go @@ -217,13 +217,20 @@ func deriveGroupHere(src *pyerrors.Exception, subset []*pyerrors.Exception) *pye return pyerrors.New(src.ExcType, objects.NewTuple([]objects.Object{message, leaves})) } -// CPython: Python/bytecodes.c LOAD_CONST: (-- value) reads from frame->code->co_consts[oparg]. +// LOAD_CONST pushes a NEW (owned) reference to co_consts[oparg]: the +// const stays alive in the code object, so the stack slot must own its +// own strong reference. Otherwise an opcode that decrefs its inputs +// (CALL_KW decrefing the kwnames const, BUILD_* consuming a const, +// etc.) drives the shared const's refcount to zero and frees an object +// the code object still references. Mirrors PyStackRef_FromPyObjectNew. +// +// CPython: Python/bytecodes.c LOAD_CONST (value = PyStackRef_FromPyObjectNew(GETITEM(...))) func (e *evalState) opLOAD_CONST(oparg uint32) (next int, ok bool, err error) { co := e.f.Code if int(oparg) >= len(co.Consts) { return 0, true, fmt.Errorf("vm: LOAD_CONST index %d out of range", oparg) } - e.pushObject(e.constAt(int(oparg))) + e.push(stackref.FromObjectNew(e.constAt(int(oparg)))) return e.advance(), true, nil } diff --git a/vm/eval_helpers.go b/vm/eval_helpers.go index d5a3947b7..866844072 100644 --- a/vm/eval_helpers.go +++ b/vm/eval_helpers.go @@ -275,12 +275,24 @@ func (e *evalState) importName(name, fromlist, level objects.Object) objects.Obj mod, ierr := imp.ImportModuleLevel(exec, modname, pkgname, lvl) if ierr != nil { if errors.Is(ierr, imp.ErrModuleNotFound) { - pyerrors.SetString(e.ts, pyerrors.PyExc_ModuleNotFoundError, - fmt.Sprintf("No module named %q", modname)) + pyerrors.SetModuleNotFound(e.ts, modname) } e.pendingErr = ierr return nil } + // A non-empty fromlist drives _handle_fromlist: force-import any + // submodule named in the fromlist that is not already an attribute, + // so a later IMPORT_FROM/import_all_from finds it via plain getattr. + // CPython runs this inside __import__ before returning the module. + // + // CPython: Lib/importlib/_bootstrap.py:1463 _handle_fromlist + if !isEmptyFromlist(fromlist) { + if herr := e.handleFromlist(mod, fromlist, false); herr != nil { + e.pendingErr = herr + return nil + } + } + // When fromlist is empty (`import a.b.c`) return the top-level // package; otherwise return the deepest module so IMPORT_FROM can // extract attributes. diff --git a/vm/eval_import.go b/vm/eval_import.go index b3597f3a2..6d91e4948 100644 --- a/vm/eval_import.go +++ b/vm/eval_import.go @@ -13,6 +13,7 @@ import ( "fmt" "strings" + "github.com/tamnd/gopy/builtins" "github.com/tamnd/gopy/compile" pyerrors "github.com/tamnd/gopy/errors" "github.com/tamnd/gopy/frame" @@ -44,11 +45,11 @@ func callerBuiltins(f *frame.Frame) objects.Object { // the mapping lacks the key, and (nil, false, err) for a real failure. // // CPython: Python/ceval.c:2805 PyMapping_GetOptionalItemString(f_builtins, "__import__") -func optionalImportFunc(builtins objects.Object) (objects.Object, bool, error) { - if builtins == nil { +func optionalImportFunc(builtinsMap objects.Object) (objects.Object, bool, error) { + if builtinsMap == nil { return nil, false, nil } - return objects.MappingGetOptionalItem(builtins, objects.NewStr("__import__")) + return objects.MappingGetOptionalItem(builtinsMap, objects.NewStr("__import__")) } // isDefaultImport reports whether fn is the built-in __import__ the @@ -58,15 +59,7 @@ func optionalImportFunc(builtins objects.Object) (objects.Object, bool, error) { // // CPython: Python/ceval.c:2820 import_name (fast-path identity check) func isDefaultImport(fn objects.Object) bool { - bm, ok := imp.GetModule("builtins") - if !ok || bm == nil { - return false - } - def, err := bm.Dict().GetItem(objects.NewStr("__import__")) - if err != nil || def == nil { - return false - } - return fn == def + return builtins.DefaultImport != nil && fn == builtins.DefaultImport } // frameHasExplicitBuiltins reports whether the frame's globals carry an @@ -196,10 +189,47 @@ func (e *evalState) tryImport(op compile.Opcode, oparg uint32) (next int, ok boo } level := importLevel(levelObj) + // A relative import requires __package__ to be a string. CPython's + // _sanity_check raises TypeError before any resolution when level>0 + // and __package__ is set to a non-string (e.g. an object()). + // + // CPython: Lib/importlib/_bootstrap.py:1390 _sanity_check + if level > 0 { + if terr := checkPackageType(e.f.Globals); terr != nil { + return 0, true, terr + } + } pkgname := globalName(e.f.Globals) + // Route through the live Python importlib the way CPython's + // import_name calls the builtin __import__ (= + // _frozen_importlib.__import__). _bootstrap.__import__ runs + // _find_and_load / _handle_fromlist and returns the head of a dotted + // name for an empty fromlist, so the module pushed here is already + // the one CPython would push. Delegating keeps a single import path + // so a patched loader.exec_module fires and the traceback carries the + // frames. Only when the bootstrap is not yet + // installed (early startup) does the Go driver below run. + // + // CPython: Python/ceval.c:2898 import_name + dlocals := e.f.Locals + if dlocals == nil { + dlocals = e.f.Globals + } + if mod, ok, derr := delegateImport(modname, orNone(e.f.Globals), orNone(dlocals), orNone(fromlistObj), level); ok { + if derr != nil { + // CPython: Python/import.c:3959 import_name trims the importlib + // machinery frames off the traceback before the calling frame is + // recorded on the way out. + removeImportlibFrames(e.ts) + return 0, true, derr + } + e.pushObject(mod) + return e.advance(), true, nil + } + exec := &vmExecutor{ts: e.ts, builtins: builtinsNS} - mod, ierr := imp.ImportModuleLevel(exec, modname, pkgname, level) + mod, ierr := imp.ImportModuleLevelObject(exec, modname, pkgname, level) if ierr != nil { // Promote Go-level ErrModuleNotFound into a typed // ModuleNotFoundError so `try: ... except ImportError:` @@ -208,13 +238,39 @@ func (e *evalState) tryImport(op compile.Opcode, oparg uint32) (next int, ok boo // the import-machinery contract. // // CPython: Python/import.c:1759 import_name (sets ImportError) - if errors.Is(ierr, imp.ErrModuleNotFound) { - pyerrors.SetString(e.ts, pyerrors.PyExc_ModuleNotFoundError, - fmt.Sprintf("No module named %q", modname)) + // + // A failure raised while executing the module body (the imported + // module itself ran a failing `import`, etc.) already left the + // real exception on the thread state with its own traceback, so + // re-synthesizing here would discard it and the inner frame it + // points at. Only synthesize for a genuine lookup miss. + // + // CPython: Python/import.c:1759 import_name only sets the error + // when PyImport_ImportModuleLevelObject returns NULL without one. + if errors.Is(ierr, imp.ErrBlockedNone) { + // sys.modules[name] is None: raise the halted ModuleNotFoundError + // with name set, so `except ImportError as exc: exc.name` works. + // A blocked sentinel is always an absolute name (level 0), so + // modname is already the resolved key in sys.modules. + pyerrors.SetModuleNotFoundHalted(e.ts, modname) + } else if errors.Is(ierr, imp.ErrModuleNotFound) && !errors.Is(ierr, imp.ErrModuleExecFailed) { + pyerrors.SetModuleNotFound(e.ts, modname) } return 0, true, ierr } + // A non-empty fromlist drives _handle_fromlist: force-import any + // submodule named in the fromlist that the package does not already + // expose, so the IMPORT_FROM / import_all_from that follows resolves + // it via a plain attribute read. + // + // CPython: Lib/importlib/_bootstrap.py:1409 _handle_fromlist + if !isEmptyFromlist(fromlistObj) { + if herr := e.handleFromlist(mod, fromlistObj, false); herr != nil { + return 0, true, herr + } + } + // CPython semantics: when fromlist is None/empty (plain `import // a.b.c`), push the TOP-LEVEL package so the name `a` is bound. // When fromlist is non-empty (`from a.b import c`), push the @@ -222,7 +278,7 @@ func (e *evalState) tryImport(op compile.Opcode, oparg uint32) (next int, ok boo // // CPython: Python/bytecodes.c IMPORT_NAME comment "return the // head of the dotted name" when fromlist is empty. - result := objects.Object(mod) + result := mod if isEmptyFromlist(fromlistObj) && strings.Contains(modname, ".") { top := strings.SplitN(modname, ".", 2)[0] if tm, ok := imp.GetModule(top); ok { @@ -279,18 +335,25 @@ func (e *evalState) importStar(from objects.Object) error { var all []objects.Object skipUnder := false - // Check for __all__. - allAttr, aerr := objects.GetAttr(from, objects.NewStr("__all__")) - if aerr == nil && allAttr != nil { + // Prefer __all__; fall back to __dict__ keys (skipping leading "_"). + // Neither read force-imports anything: _handle_fromlist already + // pulled in the fromlist's submodules during IMPORT_NAME. + allAttr, allFound, aerr := getOptionalAttr(e, from, "__all__") + if aerr != nil { + return aerr + } + if allFound { items, ierr := iterToSlice(allAttr) if ierr != nil { return ierr } all = items } else { - // Fall back to __dict__ keys, skipping names starting with "_". - dictAttr, derr := objects.GetAttr(from, objects.NewStr("__dict__")) - if derr != nil || dictAttr == nil { + dictAttr, dictFound, derr := getOptionalAttr(e, from, "__dict__") + if derr != nil { + return derr + } + if !dictFound { return fmt.Errorf("ImportError: from-import-* object has no __dict__ and no __all__") } items, ierr := iterToSlice(dictAttr) @@ -302,20 +365,21 @@ func (e *evalState) importStar(from objects.Object) error { } for _, nameObj := range all { - name, nerr := objects.Str(nameObj) - if nerr != nil { - return fmt.Errorf("TypeError: 'import *' name must be str") + name, ok := nameObj.(*objects.Unicode) + if !ok { + return importStarNonStrError(from, nameObj, skipUnder) } - if skipUnder && name != "" && name[0] == '_' { + s := name.Value() + if skipUnder && s != "" && s[0] == '_' { continue } - val, verr := objects.GetAttr(from, objects.NewStr(name)) + val, verr := objects.GetAttr(from, objects.NewStr(s)) if verr != nil { return verr } - serr := dst.SetItem(objects.NewStr(name), val) - // CPython: Python/ceval.c import_star_from — always releases the - // GetAttr new-ref after SetItem takes its own. + serr := dst.SetItem(objects.NewStr(s), val) + // CPython: Python/intrinsics.c import_all_from releases the GetAttr + // new-ref after SetItem takes its own. objects.Decref(val) if serr != nil { return serr @@ -324,6 +388,28 @@ func (e *evalState) importStar(from objects.Object) error { return nil } +// importStarNonStrError builds the TypeError import_all_from raises for a +// non-string entry in __all__ (or non-string key in __dict__). When the +// module's own __name__ is not a string, the error is about __name__ +// itself. +// +// CPython: Python/intrinsics.c:77 import_all_from (non-str name branch) +func importStarNonStrError(from, name objects.Object, skipUnder bool) error { + modNameObj, err := objects.GetAttr(from, objects.NewStr("__name__")) + if err != nil { + return err + } + mn, ok := modNameObj.(*objects.Unicode) + if !ok { + return fmt.Errorf("TypeError: module __name__ must be a string, not %s", modNameObj.Type().Name) + } + key, container := "Item", "__all__" + if skipUnder { + key, container = "Key", "__dict__" + } + return fmt.Errorf("TypeError: %s in %s.%s must be str, not %s", key, mn.Value(), container, name.Type().Name) +} + // isEmptyFromlist reports whether fromlist is None, the empty tuple, or // the empty list. This mirrors CPython's check in import_name: // "if fromlist is NULL or fromlist is empty tuple, head is returned". @@ -369,6 +455,27 @@ func importLevel(obj objects.Object) int { // module path while __package__ correctly points at the parent. // // CPython: Python/import.c:1665 import_name (read __package__ first) +// checkPackageType returns a TypeError when globals carries a __package__ +// that is set (not None) but is not a string. A relative import with such a +// package is rejected before resolution. +// +// CPython: Lib/importlib/_bootstrap.py:1390 _sanity_check ("__package__ not +// set to a string") +func checkPackageType(globals objects.Object) error { + d, ok := globals.(*objects.Dict) + if !ok { + return nil + } + v, _ := d.GetItem(objects.NewStr("__package__")) + if v == nil || objects.IsNone(v) { + return nil + } + if _, isStr := v.(*objects.Unicode); !isStr { + return fmt.Errorf("TypeError: __package__ not set to a string") + } + return nil +} + func globalName(globals objects.Object) string { if globals == nil { return "" @@ -377,20 +484,45 @@ func globalName(globals objects.Object) string { if !ok { return "" } + // __package__ takes precedence and is returned verbatim, even when it + // is the empty string: an empty package with a relative import is + // exactly the "no known parent package" case resolveAbsName rejects. + // Only a missing or None __package__ falls through to derivation. + // + // CPython: Lib/importlib/_bootstrap.py:1350 _calc___package__ if v, err := d.GetItem(objects.NewStr("__package__")); err == nil && v != nil && !objects.IsNone(v) { - if s, serr := objects.Str(v); serr == nil && s != "" { + if s, serr := objects.Str(v); serr == nil { return s } } + // __spec__.parent is the next anchor when no explicit __package__ is set. + // + // CPython: Lib/importlib/_bootstrap.py:1358 _calc___package__ (spec.parent) + if v, err := d.GetItem(objects.NewStr("__spec__")); err == nil && v != nil && !objects.IsNone(v) { + if parent, perr := objects.GetAttr(v, objects.NewStr("parent")); perr == nil && parent != nil && !objects.IsNone(parent) { + if s, serr := objects.Str(parent); serr == nil { + return s + } + } + } + // Fall back to __name__. A package (one carrying __path__) anchors at + // its own name; a plain module strips its final dotted component. For + // __main__ this yields "" so a relative import raises. + // + // CPython: Lib/importlib/_bootstrap.py:1362 _calc___package__ (rpartition) v, err := d.GetItem(objects.NewStr("__name__")) if err != nil || v == nil { return "" } - if tp := v.Type(); tp.Str != nil { - s, serr := tp.Str(v) - if serr == nil { - return s - } + s, serr := objects.Str(v) + if serr != nil { + return "" + } + if hp, herr := d.GetItem(objects.NewStr("__path__")); herr == nil && hp != nil { + return s + } + if dot := strings.LastIndex(s, "."); dot >= 0 { + return s[:dot] } return "" } @@ -438,11 +570,18 @@ func isAttributeErrorMsg(err error) bool { return strings.HasPrefix(msg, "AttributeError:") } -// evalImportFrom ports _PyEval_ImportFrom. It tries to fetch `name` as -// an attribute of `v`; on miss it consults sys.modules under -// "." using the parent's __name__. As a gopy-specific -// extension (we lack importlib's _handle_fromlist plumbing), it -// force-imports the submodule when sys.modules has not cached it yet. +// errImportFromRaised is a sentinel returned by evalImportFrom after it +// has already installed a typed ImportError on the thread state. The VM +// unwind reads the thread-state exception, so the Go error only needs to +// be non-nil to signal failure. +var errImportFromRaised = errors.New("vm: import-from error raised") + +// evalImportFrom ports _PyEval_ImportFrom. It fetches `name` as an +// attribute of `v`; on miss it falls back to reading "." +// straight out of sys.modules (the circular-import path), and when that +// also misses it raises the "cannot import name X from Y (location)" +// ImportError, reproducing the stdlib-shadowing and circular-import +// message variants. // // CPython: Python/ceval.c:3154 _PyEval_ImportFrom func evalImportFrom(e *evalState, v objects.Object, name string) (objects.Object, error) { @@ -452,33 +591,208 @@ func evalImportFrom(e *evalState, v objects.Object, name string) (objects.Object return x, nil } - // Issue #17636 fallback: read parent.__name__ and look up - // "." in sys.modules. + // Issue #17636: in case this failed because of a circular relative + // import, fall back on reading the module directly from sys.modules. modNameObj, found, err := getOptionalAttr(e, v, "__name__") if err != nil { return nil, err } - if !found { - return nil, fmt.Errorf("vm: ImportError: cannot import name %q from ", name) + // CPython requires PyUnicode_Check (str or subclass); a non-str + // __name__ is treated as missing. + var modNameStr objects.Object + if found && objects.IsSubtype(modNameObj.Type(), objects.StrType()) { + modNameStr = modNameObj + } + if modNameStr != nil { + if s, ok := modNameStr.(*objects.Unicode); ok { + full := s.Value() + "." + name + if cached, ok := imp.GetModule(full); ok { + return cached, nil + } + } + } + + return nil, e.importFromError(v, name, modNameStr) +} + +// importFromError builds and raises the ImportError for a failed +// `from v import name`, porting the error block of _PyEval_ImportFrom. +// +// CPython: Python/ceval.c:3185 _PyEval_ImportFrom (error label) +func (e *evalState) importFromError(v objects.Object, name string, modNameObj objects.Object) error { + nameRepr, _ := objects.Repr(objects.NewStr(name)) + // mod_name_or_unknown is the real __name__ object when present, else a + // fresh "" str. It is the object handed to + // PySet_Contains so an unhashable __name__ raises through. + haveModName := modNameObj != nil + modNameOrUnknownObj := modNameObj + if !haveModName { + modNameOrUnknownObj = objects.NewStr("") + } + modRepr, _ := objects.Repr(modNameOrUnknownObj) + + // modName is the value forwarded as the ImportError `name` member: the + // real module name when __name__ was a string, else unset. + modName := "" + if haveModName { + if s, ok := modNameObj.(*objects.Unicode); ok { + modName = s.Value() + } } - parentName, serr := objects.Str(modNameObj) + + spec, specFound, serr := getOptionalAttr(e, v, "__spec__") if serr != nil { - return nil, fmt.Errorf("vm: ImportError: cannot import name %q from ", name) + return serr + } + if !specFound { + msg := fmt.Sprintf("cannot import name %s from %s (unknown location)", nameRepr, modRepr) + pyerrors.SetImportErrorWithNameFrom(e.ts, msg, modName, "", name) + return errImportFromRaised + } + + origin, originFound, oerr := imp.SpecFileOrigin(spec) + if oerr != nil { + return oerr } - full := parentName + "." + name - if cached, ok := imp.GetModule(full); ok { - return cached, nil + shadowing, sherr := imp.ModuleIsPossiblyShadowing(originFound, origin) + if sherr != nil { + return sherr + } + shadowingStdlib := false + if shadowing { + c, cerr := imp.StdlibModuleNamesContains(modNameOrUnknownObj) + if cerr != nil { + return cerr + } + shadowingStdlib = c } - // gopy extension: no _handle_fromlist runs during IMPORT_NAME, so - // the submodule may never have entered sys.modules. Force-import - // it here. CPython's _handle_fromlist (Lib/importlib/_bootstrap.py) - // performs the same _call_with_frames_removed(import_, ...) per - // fromlist entry. - exec := &vmExecutor{ts: e.ts, builtins: callerBuiltins(e.f)} - sub, ierr := imp.ImportModuleLevel(exec, full, "", 0) - if ierr != nil { - return nil, fmt.Errorf("vm: ImportError: cannot import name %q from %q: %w", name, parentName, ierr) + + // Fall back to __file__ for diagnostics when the spec carries no + // location origin and v is a module. + if !originFound { + if mod, ok := v.(*objects.Module); ok { + if f, ferr := mod.Dict().GetItem(objects.NewStr("__file__")); ferr == nil && f != nil { + if fs, ok := f.(*objects.Unicode); ok { + origin = fs.Value() + originFound = true + } + } + } } - return sub, nil + + var msg string + switch { + case shadowingStdlib: + originRepr, _ := objects.Repr(objects.NewStr(origin)) + msg = fmt.Sprintf("cannot import name %s from %s (consider renaming %s since it has the same name as the standard library module named %s and prevents importing that standard library module)", + nameRepr, modRepr, originRepr, modRepr) + default: + initializing, ierr := imp.SpecIsInitializing(spec) + if ierr != nil { + return ierr + } + switch { + case initializing && shadowing: + originRepr, _ := objects.Repr(objects.NewStr(origin)) + msg = fmt.Sprintf("cannot import name %s from %s (consider renaming %s if it has the same name as a library you intended to import)", + nameRepr, modRepr, originRepr) + case initializing && originFound: + msg = fmt.Sprintf("cannot import name %s from partially initialized module %s (most likely due to a circular import) (%s)", + nameRepr, modRepr, origin) + case initializing: + msg = fmt.Sprintf("cannot import name %s from partially initialized module %s (most likely due to a circular import)", + nameRepr, modRepr) + case originFound: + msg = fmt.Sprintf("cannot import name %s from %s (%s)", nameRepr, modRepr, origin) + default: + msg = fmt.Sprintf("cannot import name %s from %s (unknown location)", nameRepr, modRepr) + } + } + + originArg := "" + if originFound { + originArg = origin + } + pyerrors.SetImportErrorWithNameFrom(e.ts, msg, modName, originArg, name) + return errImportFromRaised +} + +// handleFromlist ports _handle_fromlist: for a package module (one that +// carries __path__), force-import each fromlist entry that is not already +// an attribute so a later attribute read resolves the submodule. A `*` +// entry recurses over module.__all__; a non-str entry raises TypeError. +// +// CPython: Lib/importlib/_bootstrap.py:1409 _handle_fromlist +func (e *evalState) handleFromlist(mod objects.Object, fromlist objects.Object, recursive bool) error { + // _handle_fromlist runs only for packages (hasattr(module, '__path__')). + // __import__ guards the call with the same check, and a non-module + // cached entry never carries __path__, so it no-ops here. + // + // CPython: Lib/importlib/_bootstrap.py:1503 elif hasattr(module, '__path__') + if !recursive { + if _, present, herr := getOptionalAttr(e, mod, "__path__"); herr != nil { + return herr + } else if !present { + return nil + } + } + + items, err := iterToSlice(fromlist) + if err != nil { + return err + } + modName := "" + if nm, present, _ := getOptionalAttr(e, mod, "__name__"); present { + if s, ok := nm.(*objects.Unicode); ok { + modName = s.Value() + } + } + + for _, item := range items { + x, ok := item.(*objects.Unicode) + if !ok { + where := "``from list''" + if recursive { + where = modName + ".__all__" + } + return fmt.Errorf("TypeError: Item in %s must be str, not %s", where, item.Type().Name) + } + entry := x.Value() + switch entry { + case "*": + if !recursive { + if allObj, present, _ := getOptionalAttr(e, mod, "__all__"); present && allObj != nil { + if rerr := e.handleFromlist(mod, allObj, true); rerr != nil { + return rerr + } + } + } + default: + _, present, gerr := getOptionalAttr(e, mod, entry) + if gerr != nil { + return gerr + } + if present { + continue + } + fromName := modName + "." + entry + exec := &vmExecutor{ts: e.ts, builtins: callerBuiltins(e.f)} + if _, ierr := imp.ImportModuleLevel(exec, fromName, "", 0); ierr != nil { + // Backwards-compatibility: ignore a fromlist-triggered import + // of a submodule that simply does not exist, but only when the + // miss is for exactly this submodule. + // + // CPython: Lib/importlib/_bootstrap.py:1433 except ModuleNotFoundError + if errors.Is(ierr, imp.ErrModuleNotFound) { + if _, cached := imp.GetModule(fromName); !cached { + pyerrors.Clear(e.ts) + continue + } + } + return ierr + } + } + } + return nil } diff --git a/vm/eval_import_test.go b/vm/eval_import_test.go index bdfec5b97..abe40cd37 100644 --- a/vm/eval_import_test.go +++ b/vm/eval_import_test.go @@ -217,7 +217,11 @@ func TestImportLevelHelper(t *testing.T) { } } -// TestGlobalNameHelper pins globalName for nil, non-dict, and a dict with __name__. +// TestGlobalNameHelper pins globalName for nil, non-dict, and the +// __name__ anchoring rules. globalName computes the package anchor the +// way _calc___package__ does: a plain module strips its final dotted +// component, while a package (one carrying __path__) anchors at its own +// name. func TestGlobalNameHelper(t *testing.T) { if got := globalName(nil); got != "" { t.Errorf("globalName(nil) = %q, want \"\"", got) @@ -225,9 +229,23 @@ func TestGlobalNameHelper(t *testing.T) { if got := globalName(objects.NewStr("x")); got != "" { t.Errorf("globalName(str) = %q, want \"\"", got) } - d := objects.NewDict() - _ = d.SetItem(objects.NewStr("__name__"), objects.NewStr("mypkg")) - if got := globalName(d); got != "mypkg" { - t.Errorf("globalName(dict) = %q, want \"mypkg\"", got) + // A top-level module rpartitions to the empty package. + mod := objects.NewDict() + _ = mod.SetItem(objects.NewStr("__name__"), objects.NewStr("mypkg")) + if got := globalName(mod); got != "" { + t.Errorf("globalName(module) = %q, want \"\"", got) + } + // A submodule strips its final component. + sub := objects.NewDict() + _ = sub.SetItem(objects.NewStr("__name__"), objects.NewStr("mypkg.sub")) + if got := globalName(sub); got != "mypkg" { + t.Errorf("globalName(submodule) = %q, want \"mypkg\"", got) + } + // A package (carrying __path__) anchors at its own name. + pkg := objects.NewDict() + _ = pkg.SetItem(objects.NewStr("__name__"), objects.NewStr("mypkg")) + _ = pkg.SetItem(objects.NewStr("__path__"), objects.NewList(nil)) + if got := globalName(pkg); got != "mypkg" { + t.Errorf("globalName(package) = %q, want \"mypkg\"", got) } } diff --git a/vm/eval_unwind.go b/vm/eval_unwind.go index ba40d9c85..cb424ef04 100644 --- a/vm/eval_unwind.go +++ b/vm/eval_unwind.go @@ -309,7 +309,7 @@ func buildOSErrorFromGo(err error) *pyerrors.Exception { if errno == 0 { return nil } - return pyerrors.NewOSError(int(errno), strerrorString(errno), filename, filename2) + return pyerrors.NewOSError(winerrorToErrno(int(errno)), strerrorString(errno), filename, filename2) } // strerrorString renders the errno's message the way CPython's @@ -346,26 +346,26 @@ func promoteOSErrorByErrno(typ *objects.Type, err error) *objects.Type { if errors.As(err, &pathErr) { var errno syscall.Errno if errors.As(pathErr.Err, &errno) { - return pyerrors.ErrnoSubclass(int(errno)) + return pyerrors.ErrnoSubclass(winerrorToErrno(int(errno))) } } var linkErr *os.LinkError if errors.As(err, &linkErr) { var errno syscall.Errno if errors.As(linkErr.Err, &errno) { - return pyerrors.ErrnoSubclass(int(errno)) + return pyerrors.ErrnoSubclass(winerrorToErrno(int(errno))) } } var sysErr *os.SyscallError if errors.As(err, &sysErr) { var errno syscall.Errno if errors.As(sysErr.Err, &errno) { - return pyerrors.ErrnoSubclass(int(errno)) + return pyerrors.ErrnoSubclass(winerrorToErrno(int(errno))) } } var errno syscall.Errno if errors.As(err, &errno) { - return pyerrors.ErrnoSubclass(int(errno)) + return pyerrors.ErrnoSubclass(winerrorToErrno(int(errno))) } return typ } diff --git a/vm/import_delegate.go b/vm/import_delegate.go new file mode 100644 index 000000000..45acd5441 --- /dev/null +++ b/vm/import_delegate.go @@ -0,0 +1,127 @@ +package vm + +import ( + pyerrors "github.com/tamnd/gopy/errors" + "github.com/tamnd/gopy/imp" + "github.com/tamnd/gopy/objects" + "github.com/tamnd/gopy/state" + "github.com/tamnd/gopy/traceback" +) + +// delegateImport routes an import through the live Python importlib +// machinery, the same way CPython's import_name looks up __import__ from +// the frame builtins and calls import_func(name, globals, locals, +// fromlist, level). The builtin __import__ resolves to +// _frozen_importlib.__import__ (interp->import_func, wired at bootstrap), +// which runs _find_and_load / _handle_fromlist and registers the result +// in the shared sys.modules. Delegating here keeps a single import path +// so a monkeypatched loader.exec_module fires and the traceback carries +// the frozen importlib frames. +// +// The returned ok is false when _frozen_importlib is not yet installed +// (early bootstrap, before _bootstrap._install has run), so the caller +// falls back to the Go import driver to load the bootstrap itself. +// +// CPython: Python/ceval.c:2898 import_name +// CPython: Lib/importlib/_bootstrap.py:1390 __import__ +func delegateImport(name string, globals, locals, fromlist objects.Object, level int) (objects.Object, bool, error) { + frozen, ok := imp.GetModule("_frozen_importlib") + if !ok { + return nil, false, nil + } + importFunc, err := objects.GetAttr(frozen, objects.NewStr("__import__")) + if err != nil { + return nil, false, nil //nolint:nilerr // missing __import__ means fall back to the Go driver. + } + if globals == nil { + globals = objects.None() + } + if locals == nil { + locals = objects.None() + } + if fromlist == nil { + fromlist = objects.None() + } + args := objects.NewTuple([]objects.Object{ + objects.NewStr(name), + globals, + locals, + fromlist, + objects.NewInt(int64(level)), + }) + mod, callErr := objects.Call(importFunc, args, nil) + if callErr != nil { + return nil, true, callErr + } + return mod, true, nil +} + +// importVerbose reports whether the interpreter runs with -v, which +// suppresses the importlib frame trimming so the full machinery shows. +// +// CPython: Python/import.c:3522 _PyInterpreterState_GetConfig(...)->verbose +func importVerbose() bool { + sysMod, ok := imp.GetModule("sys") + if !ok { + return false + } + flags, err := objects.GetAttr(sysMod, objects.NewStr("flags")) + if err != nil { + return false + } + v, err := objects.GetAttr(flags, objects.NewStr("verbose")) + if err != nil { + return false + } + if i, ok := v.(*objects.Int); ok { + n, _ := i.Int64() + return n != 0 + } + return false +} + +// removeImportlibFrames strips importlib frames from the traceback of the +// exception currently on the thread. If it is an ImportError, every +// importlib chunk is trimmed; otherwise only chunks that end with a call +// to _call_with_frames_removed are trimmed. Matches CPython's behavior of +// hiding the import machinery from user tracebacks. +// +// CPython: Python/import.c:3500 remove_importlib_frames +func removeImportlibFrames(ts *state.Thread) { + exc := pyerrors.Occurred(ts) + if exc == nil || importVerbose() { + return + } + const ( + removeFrames = "_call_with_frames_removed" + bootstrapFile = "" + externalFile = "" + ) + alwaysTrim := exc.ExcType != nil && objects.IsSubtype(exc.ExcType, pyerrors.PyExc_ImportError) + + // A dummy head node lets *outerLink overwrite the chain head uniformly, + // the way CPython threads prev_link/outer_link through PyObject** slots. + var dummy traceback.Traceback + dummy.Next = exc.TB + prevLink := &dummy.Next + var outerLink **traceback.Traceback + inImportlib := false + for tb := exc.TB; tb != nil; { + next := tb.Next + fn := tb.Entry.File + nowInImportlib := fn == bootstrapFile || fn == externalFile + if nowInImportlib && !inImportlib { + outerLink = prevLink + } + inImportlib = nowInImportlib + + if nowInImportlib && (alwaysTrim || tb.Entry.Name == removeFrames) { + *outerLink = next + prevLink = outerLink + } else { + prevLink = &tb.Next + } + tb = next + } + exc.TB = dummy.Next +} diff --git a/vm/import_level.go b/vm/import_level.go new file mode 100644 index 000000000..36c96137b --- /dev/null +++ b/vm/import_level.go @@ -0,0 +1,219 @@ +package vm + +import ( + "fmt" + "strings" + + pyerrors "github.com/tamnd/gopy/errors" + "github.com/tamnd/gopy/imp" + "github.com/tamnd/gopy/objects" +) + +// importModuleLevelObject ports PyImport_ImportModuleLevelObject, the C +// body behind the builtin __import__. CPython describes it as +// "importlib.__import__() & _gcd_import(), ported to C for added +// performance": it resolves the absolute name, drives the live importlib +// _gcd_import / _find_and_load to load it, then performs the fromlist / +// dotted-head selection in C rather than in _bootstrap.__import__. +// +// Routing the builtin through this port (instead of calling +// _frozen_importlib.__import__ wholesale) matters for the dotted-head +// selection: the C code slices the standalone abs_name and re-reads +// sys.modules, raising KeyError("%R not in sys.modules as expected") when +// the entry is missing. The Python mirror instead reads module.__name__, +// which raises AttributeError when a caller has stuffed a non-module +// object into sys.modules (the test_malicious_relative_import regression +// guard, gh-134100). +// +// The returned ok is false when _frozen_importlib is not installed yet +// (early bootstrap), so currentImporter falls back to the Go driver. +// +// CPython: Python/import.c:3798 PyImport_ImportModuleLevelObject +func importModuleLevelObject(name string, globals objects.Object, fromlist objects.Object, level int) (objects.Object, bool, error) { + frozen, ok := imp.GetModule("_frozen_importlib") + if !ok { + return nil, false, nil + } + + // CPython: Python/import.c:3829 resolve_name / abs_name selection. + // _gcd_import runs _sanity_check + _resolve_name itself, so we hand it + // the original (name, package, level); we recompute abs_name here only + // to slice the dotted head below, mirroring the C standalone abs_name. + var packageStr string + var module objects.Object + if level > 0 { + // _bootstrap.__import__: globals_ = globals if globals is not None + // else {}. _calc___package__ runs the __package__/__spec__/__name__ + // fallback (and its DeprecationWarning / ImportWarning / KeyError / + // TypeError) against that dict. + // + // CPython: Lib/importlib/_bootstrap.py:1487 __import__ + g := globals + if g == nil || objects.IsNone(g) { + g = objects.NewDict() + } + calc, err := objects.GetAttr(frozen, objects.NewStr("_calc___package__")) + if err != nil { + return nil, true, err + } + pkgObj, err := objects.Call(calc, objects.NewTuple([]objects.Object{g}), nil) + if err != nil { + return nil, true, err + } + if u, isStr := pkgObj.(*objects.Unicode); isStr { + packageStr = u.Value() + } + gcd, err := objects.GetAttr(frozen, objects.NewStr("_gcd_import")) + if err != nil { + return nil, true, err + } + module, err = objects.Call(gcd, objects.NewTuple([]objects.Object{ + objects.NewStr(name), pkgObj, objects.NewInt(int64(level)), + }), nil) + if err != nil { + return nil, true, err + } + } else { + // CPython: Python/import.c:3835 level == 0 requires a non-empty name. + if name == "" { + return nil, true, fmt.Errorf("ValueError: Empty module name") + } + gcd, err := objects.GetAttr(frozen, objects.NewStr("_gcd_import")) + if err != nil { + return nil, true, err + } + module, err = objects.Call(gcd, objects.NewTuple([]objects.Object{objects.NewStr(name)}), nil) + if err != nil { + return nil, true, err + } + } + + // CPython: Python/import.c:3881 has_from = PyObject_IsTrue(fromlist). + hasFrom := false + if fromlist != nil && !objects.IsNone(fromlist) { + t, err := objects.IsTruthy(fromlist) + if err != nil { + return nil, true, err + } + hasFrom = t + } + if !hasFrom { + return headSelection(name, packageStr, level, module) + } + return fromlistSelection(frozen, module, fromlist) +} + +// headSelection mirrors the !has_from branch of +// PyImport_ImportModuleLevelObject: an absolute dotted import returns the +// top-level package, while a relative dotted import re-reads the +// already-loaded head from sys.modules by slicing the resolved abs_name. +// +// CPython: Python/import.c:3887 (!has_from branch) +func headSelection(name, packageStr string, level int, module objects.Object) (objects.Object, bool, error) { + if level != 0 && name == "" { + // CPython: Python/import.c:3895 (elif !name: final_mod = mod). + return module, true, nil + } + runes := []rune(name) + dot := indexRune(runes, '.') + if dot < 0 { + // CPython: Python/import.c:3897 (no dot, simple exit). + return module, true, nil + } + if level == 0 { + // CPython: Python/import.c:3903 re-import the front absolutely. + front := string(runes[:dot]) + return importModuleLevelObject(front, objects.None(), nil, 0) + } + // CPython: Python/import.c:3912 slice abs_name to its first `dot` + // components and re-read sys.modules. abs_name is the standalone + // resolved name, not module.__name__, so a non-module sys.modules entry + // surfaces as the KeyError below rather than an AttributeError. + absName := resolveImportName(name, packageStr, level) + absRunes := []rune(absName) + cutOff := len(runes) - dot + toReturn := string(absRunes[:len(absRunes)-cutOff]) + mod, err := imp.SysModules().GetItem(objects.NewStr(toReturn)) + if err != nil || mod == nil { + // CPython: Python/import.c:3924 KeyError "%R not in sys.modules + // as expected". + r, rerr := objects.Repr(objects.NewStr(toReturn)) + if rerr != nil { + r = "'" + toReturn + "'" + } + msg := fmt.Sprintf("%s not in sys.modules as expected", r) + exc := pyerrors.New(pyerrors.PyExc_KeyError, objects.NewTuple([]objects.Object{objects.NewStr(msg)})) + return nil, true, objects.NewRaisedError(exc, "KeyError: "+msg) + } + return mod, true, nil +} + +// fromlistSelection mirrors the has_from branch: when the loaded module is +// a package (carries __path__) defer to importlib._handle_fromlist to +// force-import each requested submodule, otherwise return the module +// untouched. +// +// CPython: Python/import.c:3939 (has_from branch) +func fromlistSelection(frozen objects.Object, module objects.Object, fromlist objects.Object) (objects.Object, bool, error) { + hasPath, err := objects.HasAttrString(module, "__path__") + if err != nil { + return nil, true, err + } + if !hasPath { + return module, true, nil + } + // _bootstrap.__import__ passes _gcd_import as the import callable so + // _handle_fromlist loads `pkg.sub` by absolute name. fromlist reaches + // _handle_fromlist untouched: it iterates the object and raises the + // "Item in ``from list'' must be str" TypeError for a non-str entry, + // matching the C body which never pre-validates the elements. + // + // CPython: Lib/importlib/_bootstrap.py:1505 _handle_fromlist(module, + // fromlist, _gcd_import) + gcd, err := objects.GetAttr(frozen, objects.NewStr("_gcd_import")) + if err != nil { + return nil, true, err + } + handle, err := objects.GetAttr(frozen, objects.NewStr("_handle_fromlist")) + if err != nil { + return nil, true, err + } + res, err := objects.Call(handle, objects.NewTuple([]objects.Object{ + module, fromlist, gcd, + }), nil) + if err != nil { + return nil, true, err + } + return res, true, nil +} + +// resolveImportName mirrors _bootstrap._resolve_name: strip the trailing +// (level-1) dotted components from package, then re-attach name. It +// recomputes the abs_name that _gcd_import derived internally so +// headSelection can slice it. +// +// CPython: Lib/importlib/_bootstrap.py _resolve_name +func resolveImportName(name, pkg string, level int) string { + for i := 1; i < level; i++ { + dot := strings.LastIndexByte(pkg, '.') + if dot < 0 { + break + } + pkg = pkg[:dot] + } + if name == "" { + return pkg + } + return pkg + "." + name +} + +// indexRune returns the index of the first occurrence of r in runes, or +// -1. Matches PyUnicode_FindChar(..., direction=1) on code points. +func indexRune(runes []rune, r rune) int { + for i, c := range runes { + if c == r { + return i + } + } + return -1 +} diff --git a/vm/oserrno_other.go b/vm/oserrno_other.go new file mode 100644 index 000000000..850155120 --- /dev/null +++ b/vm/oserrno_other.go @@ -0,0 +1,7 @@ +//go:build !windows + +package vm + +// winerrorToErrno is the identity on non-Windows platforms: Go's +// syscall.Errno already carries the POSIX errno there. +func winerrorToErrno(errno int) int { return errno } diff --git a/vm/oserrno_windows.go b/vm/oserrno_windows.go new file mode 100644 index 000000000..49f755edf --- /dev/null +++ b/vm/oserrno_windows.go @@ -0,0 +1,159 @@ +//go:build windows + +package vm + +// winerrorToErrno maps a Windows system error code to the POSIX errno +// CPython stores in OSError.errno (OSError.winerror keeps the raw code). +// Go's syscall.Errno on Windows carries the raw WinAPI error, so without +// this translation `except FileNotFoundError` / `except FileExistsError` +// would never match (errnomap is keyed on POSIX errno). +// +// CPython: PC/errmap.h winerror_to_errno +func winerrorToErrno(winerror int) int { + // Unwrap FACILITY_WIN32 HRESULT errors. + if winerror&0xFFFF0000 == 0x80070000 { + winerror &= 0x0000FFFF + } + + // Winsock error codes (10000-11999) are errno values. + if winerror >= 10000 && winerror < 12000 { + switch winerror { + case 10004, // WSAEINTR + 10009, // WSAEBADF + 10013, // WSAEACCES + 10014, // WSAEFAULT + 10022, // WSAEINVAL + 10024: // WSAEMFILE + return winerror - 10000 + default: + return winerror + } + } + + switch winerror { + case 2, // ERROR_FILE_NOT_FOUND + 3, // ERROR_PATH_NOT_FOUND + 15, // ERROR_INVALID_DRIVE + 18, // ERROR_NO_MORE_FILES + 53, // ERROR_BAD_NETPATH + 67, // ERROR_BAD_NET_NAME + 161, // ERROR_BAD_PATHNAME + 206: // ERROR_FILENAME_EXCED_RANGE + return errENOENT + case 10: // ERROR_BAD_ENVIRONMENT + return errE2BIG + case 11, // ERROR_BAD_FORMAT + 188, // ERROR_INVALID_STARTING_CODESEG + 189, // ERROR_INVALID_STACKSEG + 190, // ERROR_INVALID_MODULETYPE + 191, // ERROR_INVALID_EXE_SIGNATURE + 192, // ERROR_EXE_MARKED_INVALID + 193, // ERROR_BAD_EXE_FORMAT + 194, // ERROR_ITERATED_DATA_EXCEEDS_64k + 195, // ERROR_INVALID_MINALLOCSIZE + 196, // ERROR_DYNLINK_FROM_INVALID_RING + 197, // ERROR_IOPL_NOT_ENABLED + 198, // ERROR_INVALID_SEGDPL + 199, // ERROR_AUTODATASEG_EXCEEDS_64k + 200, // ERROR_RING2SEG_MUST_BE_MOVABLE + 201, // ERROR_RELOC_CHAIN_XEEDS_SEGLIM + 202: // ERROR_INFLOOP_IN_RELOC_CHAIN + return errENOEXEC + case 6, // ERROR_INVALID_HANDLE + 114, // ERROR_INVALID_TARGET_HANDLE + 130: // ERROR_DIRECT_ACCESS_HANDLE + return errEBADF + case 128, // ERROR_WAIT_NO_CHILDREN + 129: // ERROR_CHILD_NOT_COMPLETE + return errECHILD + case 89, // ERROR_NO_PROC_SLOTS + 164, // ERROR_MAX_THRDS_REACHED + 215: // ERROR_NESTING_NOT_ALLOWED + return errEAGAIN + case 7, // ERROR_ARENA_TRASHED + 8, // ERROR_NOT_ENOUGH_MEMORY + 9, // ERROR_INVALID_BLOCK + 1816: // ERROR_NOT_ENOUGH_QUOTA + return errENOMEM + case 5, // ERROR_ACCESS_DENIED + 16, // ERROR_CURRENT_DIRECTORY + 19, // ERROR_WRITE_PROTECT + 20, // ERROR_BAD_UNIT + 21, // ERROR_NOT_READY + 22, // ERROR_BAD_COMMAND + 23, // ERROR_CRC + 24, // ERROR_BAD_LENGTH + 25, // ERROR_SEEK + 26, // ERROR_NOT_DOS_DISK + 27, // ERROR_SECTOR_NOT_FOUND + 28, // ERROR_OUT_OF_PAPER + 29, // ERROR_WRITE_FAULT + 30, // ERROR_READ_FAULT + 31, // ERROR_GEN_FAILURE + 32, // ERROR_SHARING_VIOLATION + 33, // ERROR_LOCK_VIOLATION + 34, // ERROR_WRONG_DISK + 36, // ERROR_SHARING_BUFFER_EXCEEDED + 65, // ERROR_NETWORK_ACCESS_DENIED + 82, // ERROR_CANNOT_MAKE + 83, // ERROR_FAIL_I24 + 108, // ERROR_DRIVE_LOCKED + 132, // ERROR_SEEK_ON_DEVICE + 158, // ERROR_NOT_LOCKED + 167, // ERROR_LOCK_FAILED + 35: // 35 (undefined) + return errEACCES + case 80, // ERROR_FILE_EXISTS + 183: // ERROR_ALREADY_EXISTS + return errEEXIST + case 17: // ERROR_NOT_SAME_DEVICE + return errEXDEV + case 267: // ERROR_DIRECTORY (bpo-12802) + return errENOTDIR + case 4: // ERROR_TOO_MANY_OPEN_FILES + return errEMFILE + case 112: // ERROR_DISK_FULL + return errENOSPC + case 109, // ERROR_BROKEN_PIPE + 232: // ERROR_NO_DATA (bpo-13063) + return errEPIPE + case 145: // ERROR_DIR_NOT_EMPTY + return errENOTEMPTY + case 1113: // ERROR_NO_UNICODE_TRANSLATION + return errEILSEQ + case 258: // WAIT_TIMEOUT + return errETIMEDOUT + case 1, // ERROR_INVALID_FUNCTION + 12, // ERROR_INVALID_ACCESS + 13, // ERROR_INVALID_DATA + 87, // ERROR_INVALID_PARAMETER + 131: // ERROR_NEGATIVE_SEEK + return errEINVAL + default: + return errEINVAL + } +} + +// POSIX errno values CPython maps Windows errors onto. Hard-coded to the +// standard POSIX numbers (not Go's Windows-side syscall constants, which +// are fabricated) so they line up with module/errno and errnomap. +const ( + errENOENT = 2 + errE2BIG = 7 + errENOEXEC = 8 + errEBADF = 9 + errECHILD = 10 + errEAGAIN = 11 + errENOMEM = 12 + errEACCES = 13 + errEEXIST = 17 + errEXDEV = 18 + errENOTDIR = 20 + errEINVAL = 22 + errEMFILE = 24 + errENOSPC = 28 + errEPIPE = 32 + errENOTEMPTY = 41 + errEILSEQ = 42 + errETIMEDOUT = 138 +) diff --git a/website/docs/specs/1700/1731_modules_imports_panel.md b/website/docs/specs/1700/1731_modules_imports_panel.md new file mode 100644 index 000000000..307ce5637 --- /dev/null +++ b/website/docs/specs/1700/1731_modules_imports_panel.md @@ -0,0 +1,123 @@ +--- +id: "1731" +slug: 1731 +title: "1731: Modules / imports test panel — CPython 3.14 parity port" +sidebar_label: "1731 Modules imports panel" +description: "Audit and port of the Modules/imports test panel from spec 1700 (12 files plus test_import/, test_importlib/, test_module/) against CPython 3.14 under the spec 1726 zero-skip bridge." +--- + +## Status + +Active. Branch `feat/v0.13.5-spec-modules-imports`. + +Run under the [[1726]] bridge so every `@cpython_only` test executes on gopy +instead of being skipped. "No skip" means parity with CPython: if CPython +skips a test on this platform, gopy skips it too; everything else must pass. + +## Goal + +Drive every test in the spec 1700 Modules / imports panel to CPython 3.14 +parity via faithful CPython ports. No shims, no partial slices: when a gate +lands on a subsystem, port every function in that subsystem from CPython as the +single source of truth. + +Sources of truth: `$HOME/cpython-314/`. Every cited function is read from that +tree before porting. + +## Panel + +The panel is the 12 flat files plus the three directory suites. CPython 3.14.5 +runs all of the non-interpreter files green. + +| Test | CPython 3.14.5 | gopy (audit 2026-06-16) | +| --- | --- | --- | +| `test_module/` (dir) | OK | **OK (39 tests)** | +| `test_import/` (dir) | OK | **OK (118 tests, 4 skipped)** — 3 platform skips + `test_frozen_compat` (needs a frozen `_frozen_importlib`, P7) | +| `test_importlib/` (dir) | OK | 1346 tests; 2 failures (module-lock GC lifetime, threaded circular import) + 1 error (incomplete multi-phase C-ext), 63 skipped | +| `test_modulefinder` | OK | **OK (17 tests)** | +| `test_pkg` | OK | **OK (8 tests)** | +| `test_pkgutil` | OK | **OK (21 tests)** | +| `test_pyclbr` | OK | **OK (6 tests)** | +| `test_pkgimport` | (covered by `test_import/`) | no flat file | +| `test_runpy` | OK | **OK (40 tests)** | +| `test_frozen` | OK | **OK (3/3)** — frozen test modules + override + `sys._stdlib_dir` shipped | +| `test_zipimport` | OK | **OK (91 tests, 4 skipped)** | +| `test_zipimport_support` | OK | needs vendored `test.test_doctest` (doctest) | +| `test_zipapp` | OK | **OK (35 tests)** | +| `test__interpchannels` | PEP 554 | deferred (see below) | +| `test__interpreters` | PEP 554 | deferred (`_interpreters.run_string` missing) | + +## Plan + +Phased, smallest-blast-radius first. Re-audit after each phase against +CPython 3.14.5 (counts and `-v` lists). + +- **P1 — `os.altsep` and the module namespace surface.** `os.altsep` is `None` + on POSIX; its absence blocks `test_pkgutil`, `test_zipimport`, + `test_zipimport_support`. The `test_pkg` `dir()` gap is a module-object + attribute surface issue (`__cached__`, `__doc__`, `__loader__`, `__spec__`). + Port these first. +- **P2 — pure-Python stdlib modules.** `modulefinder`, `pyclbr`, `zipapp` are + pure-Python `Lib/*.py`; vendor them and whatever import-machinery they lean + on. Confirm they run under gopy's import system. +- **P3 — frozen modules.** `test_frozen` needs `__hello__` and the frozen + module table. Port the frozen-module surface from CPython. +- **P4 — `test_runpy` residual.** Single ERROR in + `test_run_package_init_exceptions`; port the package-init exception path. +- **P5 — directory suites.** Re-audit `test_import/`, `test_importlib/`, + `test_module/` against CPython and close residuals. +- **P7 — live importlib finders (architectural).** gopy dispatches imports + Go-side: `sys.meta_path` is empty where CPython has + `[BuiltinImporter, FrozenImporter, PathFinder]`, and `importlib.machinery` + is a stub that does not re-export `PathFinder` / `FrozenImporter` / + `BuiltinImporter`. The Python finder classes in `_bootstrap.py` exist but + are not wired into `sys.meta_path`, and `_imp` is missing the functions the + full bootstrap drives (`extension_suffixes`, `find_frozen`, + `get_frozen_object`, `is_frozen_package`, `create_builtin`, `exec_builtin`, + `create_dynamic`, `exec_dynamic`, `_fix_co_filename`). This is the root of + the `test_import/`, `test_importlib/`, `test_modulefinder`, and `test_runpy` + residuals. Closing it means making the Python finders the real dispatch path + (populate `sys.meta_path`, port the `_imp` C functions, vendor the full + `_bootstrap_external.py` with `PathFinder`) instead of the Go-side shim. + This is a subsystem port on the scale of its own spec. + +## Notable fixes + +- `func_getattro` now increfs `__dict__` attribute reads + (`Objects/funcobject.c` Py_XINCREF). A list stored on a function (mock keeps + its `patchings` list this way) was emptied by `list_dealloc` after the first + read, so a shared decorator silently stopped patching across test classes. + This fixed `test_zipimport.test_checked_hash_based_change_pyc` in the + cross-class run. +- `_testcapi.config_get` / `config_getint` / `config_names` ported over a + `PyConfig_Get` spec table (`Python/initconfig.c`), fixing the two + `testTraceback` errors. +- **P6 — interpreters.** `test__interpreters` / `test__interpchannels` are + PEP 554 subinterpreters. Match CPython's behaviour: if CPython skips on this + build, gopy skips; otherwise port the `_interpreters` surface the tests reach. + +## Checklist + +- [x] P1: `os.altsep` +- [x] P1: module-object `dir()` surface (`__cached__`, `__doc__`, `__loader__`, `__spec__`) for `test_pkg` — `test_pkg` green (8 tests) +- [x] P2: vendor `modulefinder` — `test_modulefinder` green (17 tests) +- [x] P2: vendor `pyclbr` — `test_pyclbr` green (6 tests) +- [x] P2: vendor `zipapp` — `test_zipapp` green (35 tests) +- [x] P2: `test_pkgutil` green (21 tests) +- [x] `test_zipimport` green (91 tests): `func_getattro` incref + `config_get` port +- [x] `test_module/` green (39 tests) +- [x] P3: frozen `__hello__`/`__phello__` + aliases, frozen override, `sys._stdlib_dir` — `test_frozen` green (3/3) +- [x] P4: `test_runpy` green (40 tests) — package-init exception path closed +- [x] P5: `test_import/` runs all 118 tests without the threaded crash — `os.fstat`/`os.isatty` no longer borrow the fd in a finalizer-bearing `os.File` +- [x] P5: `test_import/` green — ported the single-phase extension cache (`_testsinglephase*` variants, `m_size` kinds, the extensions cache + `m_copy` reload), the gh-123950 circular import (`_testsinglephase_circular` via the `_gcd_import` import hook), and per-subinterpreter `sys.modules` isolation so the PEP 489 compat gate fires on re-import. 4 skips remain: 3 platform-specific, plus `test_frozen_compat`, which needs a frozen `_frozen_importlib` (P7) +- [x] P5: `test_module_with_large_stack` no longer flakes with `bad file descriptor` — `os.NewFile`/`os.OpenFile` arm the close finalizer on the unexported inner `*os.file`, so `SetFinalizer(f, nil)` on the outer handle was a no-op. A leaked borrowed-fd wrapper (subprocess pipes) would close a reused descriptor mid-write. `objects.ClearOSFileFinalizer` reaches the inner pointer; the `io` and `_posixsubprocess` borrows route through it +- [x] P5: re-audit `test_module/` — green (39 tests) +- [ ] P5: `test_importlib/` residuals — 1346 tests run, down to 2 failures + 1 error. The error + (`test_incomplete_multi_phase_init_module`) is the `_testmultiphase` C-ext path (P7). The two + failures are GC/threading edge cases: `test_all_locks` expects `_bootstrap._module_locks` to drain + to zero after `gc_collect()` (gopy's collector leaves the no-longer-referenced `_ModuleLock` + weakref entries live across the full import sweep, though the isolated `test_lock_lifetime` passes), + and `test_circular_imports` is a threaded-import determinism case. Both overlap the broader + weakref/GC work (see the weakref/gc panel tasks). +- [ ] P7: live importlib finders on `sys.meta_path` + `_imp` C functions (architectural) +- [ ] P6: `test__interpreters` / `test__interpchannels` parity with CPython skip/run