From a7d49bd89f052777edb037f4cc5b05ca11e4f7fc Mon Sep 17 00:00:00 2001 From: Etienne Millon Date: Mon, 5 Feb 2024 17:05:04 +0100 Subject: [PATCH] [3.13] backport #9811 (#9924) * test: add repro for #9538 (subst on 32-bit) (#9539) Signed-off-by: Etienne Millon * refactor: detect large files in Io functions (#9828) `Io.read_all` and related functions read the contents of a file in a string, which has a size limit (`Sys.max_string_length`) and can be an issue in 32-bit systems. This makes an explicit check and raises a `Code_error` in these situations. Signed-off-by: Etienne Millon * fix(subst): ignore large files (#9811) Fixes #9538 This logs a warning for large files (>16MB on 32-bit systems). Signed-off-by: Etienne Millon --------- Signed-off-by: Etienne Millon --- bin/subst.ml | 30 +++++++++++------ doc/changes/9811.md | 1 + otherlibs/stdune/src/io.ml | 19 ++++++++--- otherlibs/stdune/src/io.mli | 11 ++++++- test/blackbox-tests/test-cases/subst/32bit.t | 32 +++++++++++++++++++ test/blackbox-tests/test-cases/subst/dune | 5 +++ .../utils/melc_stdlib_prefix.ml | 6 +++- 7 files changed, 89 insertions(+), 15 deletions(-) create mode 100644 doc/changes/9811.md create mode 100644 test/blackbox-tests/test-cases/subst/32bit.t create mode 100644 test/blackbox-tests/test-cases/subst/dune diff --git a/bin/subst.ml b/bin/subst.ml index f625ed0cc06..ddbf4f240e3 100644 --- a/bin/subst.ml +++ b/bin/subst.ml @@ -120,15 +120,27 @@ let subst_string s path ~map = ;; let subst_file path ~map = - let s = Io.read_file path in - let s = - if Path.is_root (Path.parent_exn path) && Package.is_opam_file path - then "version: \"%%" ^ "VERSION_NUM" ^ "%%\"\n" ^ s - else s - in - match subst_string s ~map path with - | None -> () - | Some s -> Io.write_file path s + match Io.with_file_in path ~f:Io.read_all_unless_large with + | Error () -> + let hints = + if Sys.word_size = 32 + then + [ Pp.textf + "Dune has been built as a 32-bit binary so the maximum size \"dune subst\" \ + can operate on is 16MiB." + ] + else [] + in + User_warning.emit ~hints [ Pp.textf "Ignoring large file: %s" (Path.to_string path) ] + | Ok s -> + let s = + if Path.is_root (Path.parent_exn path) && Package.is_opam_file path + then "version: \"%%" ^ "VERSION_NUM" ^ "%%\"\n" ^ s + else s + in + (match subst_string s ~map path with + | None -> () + | Some s -> Io.write_file path s) ;; (* Extending the Dune_project APIs, but adding capability to modify *) diff --git a/doc/changes/9811.md b/doc/changes/9811.md new file mode 100644 index 00000000000..86759bea7c9 --- /dev/null +++ b/doc/changes/9811.md @@ -0,0 +1 @@ +- subst: do not fail on 32-bit systems when large files are encountered. Just log a warning in this case. (#9811, fixes #9538, @emillon) diff --git a/otherlibs/stdune/src/io.ml b/otherlibs/stdune/src/io.ml index 950f118fcb0..135b32f7cbe 100644 --- a/otherlibs/stdune/src/io.ml +++ b/otherlibs/stdune/src/io.ml @@ -275,7 +275,7 @@ struct if r = len then Bytes.unsafe_to_string buf else Bytes.sub_string buf ~pos:0 ~len:r ;; - let read_all = + let read_all_unless_large = (* We use 65536 because that is the size of OCaml's IO buffers. *) let chunk_size = 65536 in (* Generic function for channels such that seeking is unsupported or @@ -286,7 +286,7 @@ struct loop () in try loop () with - | End_of_file -> Buffer.contents buffer + | End_of_file -> Ok (Buffer.contents buffer) in fun t -> (* Optimisation for regular files: if the channel supports seeking, we @@ -295,6 +295,7 @@ struct regular files so this optimizations seems worth it. *) match in_channel_length t with | exception Sys_error _ -> read_all_generic t (Buffer.create chunk_size) + | n when n > Sys.max_string_length -> Error () | n -> (* For some files [in_channel_length] returns an invalid value. For instance for files in /proc it returns [0] and on Windows the @@ -307,7 +308,7 @@ struct end of the file *) let s = eagerly_input_string t n in (match input_char t with - | exception End_of_file -> s + | exception End_of_file -> Ok s | c -> (* The [+ chunk_size] is to make sure there is at least [chunk_size] free space so that the first [Buffer.add_channel buffer t @@ -318,7 +319,17 @@ struct read_all_generic t buffer) ;; - let read_file ?binary fn = with_file_in fn ~f:read_all ?binary + let path_to_dyn path = String.to_dyn (Path.to_string path) + + let read_file ?binary fn = + match with_file_in fn ~f:read_all_unless_large ?binary with + | Ok x -> x + | Error () -> + Code_error.raise + "read_file: file is larger than Sys.max_string_length" + [ "fn", path_to_dyn fn ] + ;; + let lines_of_file fn = with_file_in fn ~f:input_lines ~binary:false let zero_strings_of_file fn = with_file_in fn ~f:input_zero_separated ~binary:true diff --git a/otherlibs/stdune/src/io.mli b/otherlibs/stdune/src/io.mli index 49e2e1b4eb3..91600fb89ac 100644 --- a/otherlibs/stdune/src/io.mli +++ b/otherlibs/stdune/src/io.mli @@ -9,7 +9,16 @@ val input_lines : in_channel -> string list unrelated channels because it uses a statically-allocated global buffer. *) val copy_channels : in_channel -> out_channel -> unit -val read_all : in_channel -> string +(** Try to read everything from a channel. Returns [Error ()] if the contents + are larger than [Sys.max_string_length]. This is generally a problem only + on 32-bit systems. + Overflow detection does not happen in the following cases: + - channel is not a file (for example, a pipe) + - if the detected size is unreliable (/proc) + - race condition with another process changing the size of the underlying + file. + In these cases, an exception might be raised by [Buffer] functions. *) +val read_all_unless_large : in_channel -> (string, unit) result include Io_intf.S with type path = Path.t module String_path : Io_intf.S with type path = string diff --git a/test/blackbox-tests/test-cases/subst/32bit.t b/test/blackbox-tests/test-cases/subst/32bit.t new file mode 100644 index 00000000000..2e87c6059bf --- /dev/null +++ b/test/blackbox-tests/test-cases/subst/32bit.t @@ -0,0 +1,32 @@ +When dune subst is called on a file larger than 16MiB, it should not crash. +See #9538. + + $ cat > create.ml << EOF + > let () = Unix.truncate "large.dat" 0x1_00_00_00 + > EOF + $ touch large.dat + $ ocaml unix.cma create.ml + $ rm create.ml + + $ cat > dune-project << EOF + > (lang dune 1.0) + > (name project) + > (package + > (name project)) + > EOF + +This test uses subst, which needs a git repository: + + $ git init + Initialized empty Git repository in $TESTCASE_ROOT/.git/ + $ git add dune-project + $ git add large.dat + $ git commit -m create | tail -n 3 + 2 files changed, 4 insertions(+) + create mode 100644 dune-project + create mode 100644 large.dat + + $ dune subst + Warning: Ignoring large file: large.dat + Hint: Dune has been built as a 32-bit binary so the maximum size "dune subst" + can operate on is 16MiB. diff --git a/test/blackbox-tests/test-cases/subst/dune b/test/blackbox-tests/test-cases/subst/dune new file mode 100644 index 00000000000..6b573dd370d --- /dev/null +++ b/test/blackbox-tests/test-cases/subst/dune @@ -0,0 +1,5 @@ +(cram + (applies_to 32bit) + (enabled_if + (not %{arch_sixtyfour})) + (deps %{bin:git})) diff --git a/test/blackbox-tests/utils/melc_stdlib_prefix.ml b/test/blackbox-tests/utils/melc_stdlib_prefix.ml index 235d9843a44..df6a5b37620 100644 --- a/test/blackbox-tests/utils/melc_stdlib_prefix.ml +++ b/test/blackbox-tests/utils/melc_stdlib_prefix.ml @@ -2,7 +2,11 @@ open Stdune let command cmd args = let p = Unix.open_process_args_in cmd (Array.of_list (cmd :: args)) in - let output = Io.read_all p in + let output = + match Io.read_all_unless_large p with + | Ok x -> x + | Error () -> assert false + in match Unix.close_process_in p with | WEXITED n when n = 0 -> Ok output | WEXITED n -> Error n