Skip to content

Commit

Permalink
[3.13] backport #9811 (#9924)
Browse files Browse the repository at this point in the history
* test: add repro for #9538 (subst on 32-bit) (#9539)

Signed-off-by: Etienne Millon <[email protected]>

* refactor: detect large files in Io functions (#9828)

`Io.read_all` and related functions read the contents of a file in a
string, which has a size limit (`Sys.max_string_length`) and can be an
issue in 32-bit systems. This makes an explicit check and raises a
`Code_error` in these situations.

Signed-off-by: Etienne Millon <[email protected]>

* fix(subst): ignore large files (#9811)

Fixes #9538

This logs a warning for large files (>16MB on 32-bit systems).

Signed-off-by: Etienne Millon <[email protected]>

---------

Signed-off-by: Etienne Millon <[email protected]>
  • Loading branch information
emillon authored Feb 5, 2024
1 parent febf36e commit a7d49bd
Show file tree
Hide file tree
Showing 7 changed files with 89 additions and 15 deletions.
30 changes: 21 additions & 9 deletions bin/subst.ml
Original file line number Diff line number Diff line change
Expand Up @@ -120,15 +120,27 @@ let subst_string s path ~map =
;;

let subst_file path ~map =
let s = Io.read_file path in
let s =
if Path.is_root (Path.parent_exn path) && Package.is_opam_file path
then "version: \"%%" ^ "VERSION_NUM" ^ "%%\"\n" ^ s
else s
in
match subst_string s ~map path with
| None -> ()
| Some s -> Io.write_file path s
match Io.with_file_in path ~f:Io.read_all_unless_large with
| Error () ->
let hints =
if Sys.word_size = 32
then
[ Pp.textf
"Dune has been built as a 32-bit binary so the maximum size \"dune subst\" \
can operate on is 16MiB."
]
else []
in
User_warning.emit ~hints [ Pp.textf "Ignoring large file: %s" (Path.to_string path) ]
| Ok s ->
let s =
if Path.is_root (Path.parent_exn path) && Package.is_opam_file path
then "version: \"%%" ^ "VERSION_NUM" ^ "%%\"\n" ^ s
else s
in
(match subst_string s ~map path with
| None -> ()
| Some s -> Io.write_file path s)
;;

(* Extending the Dune_project APIs, but adding capability to modify *)
Expand Down
1 change: 1 addition & 0 deletions doc/changes/9811.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
- subst: do not fail on 32-bit systems when large files are encountered. Just log a warning in this case. (#9811, fixes #9538, @emillon)
19 changes: 15 additions & 4 deletions otherlibs/stdune/src/io.ml
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ struct
if r = len then Bytes.unsafe_to_string buf else Bytes.sub_string buf ~pos:0 ~len:r
;;

let read_all =
let read_all_unless_large =
(* We use 65536 because that is the size of OCaml's IO buffers. *)
let chunk_size = 65536 in
(* Generic function for channels such that seeking is unsupported or
Expand All @@ -286,7 +286,7 @@ struct
loop ()
in
try loop () with
| End_of_file -> Buffer.contents buffer
| End_of_file -> Ok (Buffer.contents buffer)
in
fun t ->
(* Optimisation for regular files: if the channel supports seeking, we
Expand All @@ -295,6 +295,7 @@ struct
regular files so this optimizations seems worth it. *)
match in_channel_length t with
| exception Sys_error _ -> read_all_generic t (Buffer.create chunk_size)
| n when n > Sys.max_string_length -> Error ()
| n ->
(* For some files [in_channel_length] returns an invalid value. For
instance for files in /proc it returns [0] and on Windows the
Expand All @@ -307,7 +308,7 @@ struct
end of the file *)
let s = eagerly_input_string t n in
(match input_char t with
| exception End_of_file -> s
| exception End_of_file -> Ok s
| c ->
(* The [+ chunk_size] is to make sure there is at least [chunk_size]
free space so that the first [Buffer.add_channel buffer t
Expand All @@ -318,7 +319,17 @@ struct
read_all_generic t buffer)
;;

let read_file ?binary fn = with_file_in fn ~f:read_all ?binary
let path_to_dyn path = String.to_dyn (Path.to_string path)

let read_file ?binary fn =
match with_file_in fn ~f:read_all_unless_large ?binary with
| Ok x -> x
| Error () ->
Code_error.raise
"read_file: file is larger than Sys.max_string_length"
[ "fn", path_to_dyn fn ]
;;

let lines_of_file fn = with_file_in fn ~f:input_lines ~binary:false
let zero_strings_of_file fn = with_file_in fn ~f:input_zero_separated ~binary:true

Expand Down
11 changes: 10 additions & 1 deletion otherlibs/stdune/src/io.mli
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,16 @@ val input_lines : in_channel -> string list
unrelated channels because it uses a statically-allocated global buffer. *)
val copy_channels : in_channel -> out_channel -> unit

val read_all : in_channel -> string
(** Try to read everything from a channel. Returns [Error ()] if the contents
are larger than [Sys.max_string_length]. This is generally a problem only
on 32-bit systems.
Overflow detection does not happen in the following cases:
- channel is not a file (for example, a pipe)
- if the detected size is unreliable (/proc)
- race condition with another process changing the size of the underlying
file.
In these cases, an exception might be raised by [Buffer] functions. *)
val read_all_unless_large : in_channel -> (string, unit) result

include Io_intf.S with type path = Path.t
module String_path : Io_intf.S with type path = string
Expand Down
32 changes: 32 additions & 0 deletions test/blackbox-tests/test-cases/subst/32bit.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
When dune subst is called on a file larger than 16MiB, it should not crash.
See #9538.

$ cat > create.ml << EOF
> let () = Unix.truncate "large.dat" 0x1_00_00_00
> EOF
$ touch large.dat
$ ocaml unix.cma create.ml
$ rm create.ml

$ cat > dune-project << EOF
> (lang dune 1.0)
> (name project)
> (package
> (name project))
> EOF

This test uses subst, which needs a git repository:

$ git init
Initialized empty Git repository in $TESTCASE_ROOT/.git/
$ git add dune-project
$ git add large.dat
$ git commit -m create | tail -n 3
2 files changed, 4 insertions(+)
create mode 100644 dune-project
create mode 100644 large.dat

$ dune subst
Warning: Ignoring large file: large.dat
Hint: Dune has been built as a 32-bit binary so the maximum size "dune subst"
can operate on is 16MiB.
5 changes: 5 additions & 0 deletions test/blackbox-tests/test-cases/subst/dune
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
(cram
(applies_to 32bit)
(enabled_if
(not %{arch_sixtyfour}))
(deps %{bin:git}))
6 changes: 5 additions & 1 deletion test/blackbox-tests/utils/melc_stdlib_prefix.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@ open Stdune

let command cmd args =
let p = Unix.open_process_args_in cmd (Array.of_list (cmd :: args)) in
let output = Io.read_all p in
let output =
match Io.read_all_unless_large p with
| Ok x -> x
| Error () -> assert false
in
match Unix.close_process_in p with
| WEXITED n when n = 0 -> Ok output
| WEXITED n -> Error n
Expand Down

0 comments on commit a7d49bd

Please sign in to comment.