From 86cb889a27b43a80b2a9c12d7b3afe5796cc2fdb Mon Sep 17 00:00:00 2001 From: David Allsopp Date: Thu, 21 Nov 2024 14:57:40 +0000 Subject: [PATCH 1/4] Split imports and link_obj --- reloc.ml | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/reloc.ml b/reloc.ml index 9fcf05a..03927c6 100644 --- a/reloc.ml +++ b/reloc.ml @@ -907,7 +907,6 @@ let build_dll link_exe output_file files exts extra_args = let imports obj = let n = needed imported defined resolve_alias resolve_alternate obj in - imported_from_implib := StrSet.union !imported_from_implib (StrSet.inter n from_imports); let undefs = StrSet.diff n defined in StrSet.filter (fun s -> @@ -915,7 +914,7 @@ let build_dll link_exe output_file files exts extra_args = | Some _ -> false | None -> s <> "environ" (* special hack for Cygwin64 *) ) - undefs + undefs, StrSet.inter n from_imports in (* Second step: transitive closure, starting from given objects *) @@ -963,7 +962,7 @@ let build_dll link_exe output_file files exts extra_args = in let dll_exports = ref StrSet.empty in - let rec link_obj fn obj = + let record_exports obj = List.iter (fun sym -> if Symbol.is_defin sym && exportable sym.sym_name @@ -972,7 +971,9 @@ let build_dll link_exe output_file files exts extra_args = obj.symbols; dll_exports := List.fold_left (fun accu x -> StrSet.add x accu) - !dll_exports (collect_dllexports obj); + !dll_exports (collect_dllexports obj) + in + let rec link_obj fn obj = StrSet.iter (fun s -> if StrSet.mem s !exported then () @@ -990,15 +991,20 @@ let build_dll link_exe output_file files exts extra_args = and link_libobj (libname,objname,obj) = if Hashtbl.mem libobjects (libname,objname) then () - else (Hashtbl.replace libobjects (libname,objname) (obj,imports obj); + else (let imports, from_imports = imports obj in + Hashtbl.replace libobjects (libname,objname) (obj,imports); + imported_from_implib := StrSet.union !imported_from_implib from_imports; + record_exports obj; link_obj (Printf.sprintf "%s(%s)" libname objname) obj) in let redirect = Hashtbl.create 16 in List.iter (fun (fn, obj) -> + let imps, from_imports = imports obj in + imported_from_implib := StrSet.union !imported_from_implib from_imports; + record_exports obj; link_obj fn obj; - let imps = imports obj in if StrSet.is_empty imps then () else Hashtbl.replace redirect fn (close_obj fn imps obj); ) objs; From 4325bf1c5e837af2031fa94c8415d44c886f5d20 Mon Sep 17 00:00:00 2001 From: David Allsopp Date: Thu, 21 Nov 2024 15:33:13 +0000 Subject: [PATCH 2/4] Include default libraries in defined_in The defined_in function is used when determining which objects from the command line will actually be linked. However, this doesn't include the default libraries, which creates an issue for modules which are linked as a result of main. defined_in is augmented to include the mapping of symbols found in the default libraries, but some additional care is required to ensure that this doesn't cause imports from those to leak into the export table of the executable. --- reloc.ml | 56 ++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/reloc.ml b/reloc.ml index 03927c6..70c07c0 100644 --- a/reloc.ml +++ b/reloc.ml @@ -767,7 +767,7 @@ let build_dll link_exe output_file files exts extra_args = in (* Collect all the available symbols, including those defined in default libraries *) - let defined, from_imports, resolve_alias, resolve_alternate = + let defined, from_imports, resolve_alias, resolve_alternate, collected = let aliases = Hashtbl.create 16 in let alternates = Hashtbl.create 16 in let defined = ref StrSet.empty in @@ -836,14 +836,16 @@ let build_dll link_exe output_file files exts extra_args = and collect_file fn = if not (Hashtbl.mem collected (String.lowercase_ascii fn)) then begin - Hashtbl.replace collected (String.lowercase_ascii fn) (); debug 2 "** open: %s" fn; collect_defined fn (Lib.read fn) end and collect_defined fn = function - | `Obj obj -> collect_defined_obj obj + | `Obj obj -> + Hashtbl.replace collected (String.lowercase_ascii fn) [fn, obj]; + collect_defined_obj obj | `Lib (objs,imports) -> + Hashtbl.replace collected (String.lowercase_ascii fn) objs; List.iter (fun (_, obj) -> collect_defined_obj obj) objs; List.iter (fun (s,_) -> @@ -854,12 +856,7 @@ let build_dll link_exe output_file files exts extra_args = ) imports in - List.iter - (fun (fn,x) -> - Hashtbl.replace collected (String.lowercase_ascii fn) (); - collect_defined fn x - ) - files; + List.iter (fun (fn,x) -> collect_defined fn x) files; if !use_default_libs then List.iter (fun fn -> collect_file (find_file fn)) !default_libs; List.iter (fun fn -> collect_file (find_file fn)) exts; @@ -869,13 +866,13 @@ let build_dll link_exe output_file files exts extra_args = if !machine = `x64 then add_def "__ImageBase" else add_def "___ImageBase"; - !defined, !from_imports, (Hashtbl.find aliases), (Hashtbl.find alternates) + !defined, !from_imports, (Hashtbl.find aliases), (Hashtbl.find alternates), collected in (* Determine which objects from the given libraries should be linked in. First step: find the mapping (symbol -> object) for these objects. *) - let defined_in = + let defined_in, default_libs = let defined_in = Hashtbl.create 16 in let def_in_obj fn (objname, obj) = List.iter @@ -893,13 +890,21 @@ let build_dll link_exe output_file files exts extra_args = ) obj.symbols in - List.iter - (fun (fn,objs) -> - if !explain then Printf.printf "Scanning lib %s\n%!" fn; - List.iter (def_in_obj fn) objs - ) - libs; - Hashtbl.find defined_in + let scan (fn,objs) = + if !explain then Printf.printf "Scanning lib %s\n%!" fn; + List.iter (def_in_obj fn) objs + in + let default_libs = + List.fold_right + (fun fn acc -> + let fn = find_file fn in + scan (fn, Hashtbl.find collected (String.lowercase_ascii fn)); + StrSet.add fn acc) + (if !use_default_libs then !default_libs else []) + StrSet.empty + in + List.iter scan libs; + Hashtbl.find defined_in, default_libs in let imported_from_implib = ref StrSet.empty in @@ -990,20 +995,27 @@ let build_dll link_exe output_file files exts extra_args = (needed imported defined resolve_alias resolve_alternate obj) and link_libobj (libname,objname,obj) = + let default = StrSet.mem libname default_libs in if Hashtbl.mem libobjects (libname,objname) then () else (let imports, from_imports = imports obj in + let imports = if default then StrSet.empty else imports in Hashtbl.replace libobjects (libname,objname) (obj,imports); - imported_from_implib := StrSet.union !imported_from_implib from_imports; - record_exports obj; + if not default then begin + imported_from_implib := StrSet.union !imported_from_implib from_imports; + record_exports obj + end; link_obj (Printf.sprintf "%s(%s)" libname objname) obj) in let redirect = Hashtbl.create 16 in List.iter (fun (fn, obj) -> + let default = StrSet.mem fn default_libs in let imps, from_imports = imports obj in - imported_from_implib := StrSet.union !imported_from_implib from_imports; - record_exports obj; + if not default then begin + imported_from_implib := StrSet.union !imported_from_implib from_imports; + record_exports obj + end; link_obj fn obj; if StrSet.is_empty imps then () else Hashtbl.replace redirect fn (close_obj fn imps obj); From 969e466897a86dfedffdc9bd1b407ce4c87d15ad Mon Sep 17 00:00:00 2001 From: David Allsopp Date: Thu, 21 Nov 2024 20:41:43 +0000 Subject: [PATCH 3/4] Choose the correct startup object for mingw-w64 If -municode is passed to mingw-w64's gcc, then crt2u.o (which causes wmain to be selected) should be linked rather than crt2.o. In passing, the code now sniffs for -nostartfiles, which causes neither crt2.o nor crt2u.o to be considered. --- CHANGES | 2 ++ reloc.ml | 10 ++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/CHANGES b/CHANGES index fefa9d7..f7595db 100644 --- a/CHANGES +++ b/CHANGES @@ -30,6 +30,8 @@ Version 0.44 as querying the compilers for their library search paths. Support parsing clang output. (Antonin Décimo, review by David Allsopp) +- GPR#146: For mingw-w64, select crt2u.o instead of crt2.o if -link -municode + is specified (David Allsopp) Version 0.43 - GPR#108: Add -lgcc_s to Cygwin's link libraries, upstreaming a patch from the diff --git a/reloc.ml b/reloc.ml index 70c07c0..17e748d 100644 --- a/reloc.ml +++ b/reloc.ml @@ -1423,8 +1423,14 @@ let setup_toolchain () = with Not_found -> () in default_libs := "-lmingw32" :: "-lgcc" :: !default_libs; - if !exe_mode = `EXE then default_libs := "crt2.o" :: !default_libs - else default_libs := "dllcrt2.o" :: !default_libs + if not (List.mem "-nostartfiles" !extra_args) then begin + if !exe_mode = `EXE then + if List.mem "-municode" !extra_args then + default_libs := "crt2u.o" :: !default_libs + else + default_libs := "crt2.o" :: !default_libs + else default_libs := "dllcrt2.o" :: !default_libs + end in match !toolchain with | _ when !builtin_linker -> From 1fbab57483231da60cf26f12ed99bf9cbb2ce584 Mon Sep 17 00:00:00 2001 From: David Allsopp Date: Thu, 21 Nov 2024 20:44:12 +0000 Subject: [PATCH 4/4] Consider the entrypoint when determining exports The export table is determined by looping over the objects included on the command line. If the main symbol is included in a library (e.g. libcamlrun or libasmrun) then there may not be an object on the commandline which causes it, and any transitive dependencies, to be linked. The entrypoint symbol for the Cygwin/mingw-w64/MSVC toolchains is now resolved, and may cause additional objects to be linked. --- CHANGES | 5 ++-- reloc.ml | 73 ++++++++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 63 insertions(+), 15 deletions(-) diff --git a/CHANGES b/CHANGES index f7595db..89ada64 100644 --- a/CHANGES +++ b/CHANGES @@ -30,8 +30,9 @@ Version 0.44 as querying the compilers for their library search paths. Support parsing clang output. (Antonin Décimo, review by David Allsopp) -- GPR#146: For mingw-w64, select crt2u.o instead of crt2.o if -link -municode - is specified (David Allsopp) +- GPR#146: Take the entrypoint (main, wmainCRTStartup, etc.) into account when + determining the modules which will be linked. For mingw-w64, select crt2u.o + instead of crt2.o if -link -municode is specified (David Allsopp) Version 0.43 - GPR#108: Add -lgcc_s to Cygwin's link libraries, upstreaming a patch from the diff --git a/reloc.ml b/reloc.ml index 17e748d..f1a0040 100644 --- a/reloc.ml +++ b/reloc.ml @@ -725,7 +725,7 @@ let needed imported defined resolve_alias resolve_alternate obj = StrSet.empty obj.symbols -let build_dll link_exe output_file files exts extra_args = +let build_dll link_exe output_file files exts extra_args_string = let main_pgm = link_exe <> `DLL in (* fully resolve filenames, eliminate duplicates *) @@ -1007,6 +1007,53 @@ let build_dll link_exe output_file files exts extra_args = link_obj (Printf.sprintf "%s(%s)" libname objname) obj) in + let entrypoint = + if not main_pgm then + None + else + match !toolchain with + | `CYGWIN64 -> + Some "main" + | `MINGW | `MINGW64 -> begin + let entry_point s = + String.length s > 7 && String.sub s 0 7 = "-Wl,-e," + in + try + let s = List.find entry_point !extra_args in + Some (String.sub s 7 (String.length s - 7)) + with Not_found -> + Some "mainCRTStartup" + end + | `MSVC | `MSVC64 -> begin + let entry_point s = + String.length s > 7 && String.lowercase_ascii (String.sub s 0 7) = "/entry:" + in + try + let s = List.find entry_point !extra_args in + Some (String.sub s 7 (String.length s - 7)) + with Not_found -> + if !subsystem = "windows" then + Some "WinMainCRTStartup" + else + Some "mainCRTStartup" + end + | `LIGHTLD | `GNAT | `GNAT64 -> + None + in + let () = + match entrypoint with + | None -> () + | Some entrypoint -> + try + let (libname, objname, _) as o = defined_in entrypoint in + if !explain then + Printf.printf "%s(%s) because of entrypoint %s\n%!" libname objname + entrypoint; + link_libobj o + with Not_found -> + if !explain then + Printf.printf "Entrypoint %s not found\n%!" entrypoint + in let redirect = Hashtbl.create 16 in List.iter (fun (fn, obj) -> @@ -1142,24 +1189,24 @@ let build_dll link_exe output_file files exts extra_args = being an empty file. *) let c = open_out implib in output_string c "x"; close_out c; let _impexp = add_temp (Filename.chop_suffix implib ".lib" ^ ".exp") in - let extra_args = - if !custom_crt then "/nodefaultlib:LIBCMT /nodefaultlib:MSVCRT " ^ extra_args - else "msvcrt.lib " ^ extra_args + let extra_args_string = + if !custom_crt then "/nodefaultlib:LIBCMT /nodefaultlib:MSVCRT " ^ extra_args_string + else "msvcrt.lib " ^ extra_args_string in - let extra_args = - if !machine = `x64 then (Printf.sprintf "/base:%s " !base_addr) ^ extra_args else extra_args + let extra_args_string = + if !machine = `x64 then (Printf.sprintf "/base:%s " !base_addr) ^ extra_args_string else extra_args_string in - let extra_args = + let extra_args_string = (* FlexDLL doesn't process .voltbl sections correctly, so don't allow the linker to process them. *) let command = if Sys.win32 then link ^ " /nologo /? | findstr EMITVOLATILEMETADATA > NUL" else link ^ " /nologo '/?' | grep -iq emitvolatilemetadata >/dev/null" in if Sys.command command = 0 then - "/EMITVOLATILEMETADATA:NO " ^ extra_args - else extra_args + "/EMITVOLATILEMETADATA:NO " ^ extra_args_string + else extra_args_string in (* Flexdll requires that all images (main programs and all the DLLs) are @@ -1187,7 +1234,7 @@ let build_dll link_exe output_file files exts extra_args = (Filename.quote output_file) !subsystem files descr - extra_args + extra_args_string | `CYGWIN64 -> let def_file = if main_pgm then "" @@ -1209,7 +1256,7 @@ let build_dll link_exe output_file files exts extra_args = descr files def_file - extra_args + extra_args_string | `MINGW | `MINGW64 | `GNAT | `GNAT64 -> let def_file = if main_pgm then "" @@ -1233,7 +1280,7 @@ let build_dll link_exe output_file files exts extra_args = files def_file (if !implib then "-Wl,--out-implib=" ^ Filename.quote (Filename.chop_extension output_file ^ ".a") else "") - extra_args + extra_args_string | `LIGHTLD -> no_merge_manifest := true; let ld = Option.value !Cmdline.use_linker ~default:"ld" in @@ -1246,7 +1293,7 @@ let build_dll link_exe output_file files exts extra_args = descr files (if !implib then "--out-implib " ^ Filename.quote (Filename.chop_extension output_file ^ ".a") else "") - extra_args + extra_args_string in debug ~dry_mode 1 "+ %s" cmd; if not !dry_mode then begin