From cf2f94439a67cd65d5f94fb30a07f460c39e2bc0 Mon Sep 17 00:00:00 2001 From: Xavier Leroy Date: Thu, 28 Dec 2023 18:11:38 +0100 Subject: [PATCH 1/2] Use standard hash function for `Z.hash` and add `Z.seeded_hash` For consistency with other integer types in the standard library (modules Int, Int32, Int64, Nativeint), let's use the standard hash function (`Hashtbl.hash`) for `Z.hash` instead of our variant. This is a bit slower but has several benefits (see #145): - 32/64 bit compatibility - better mixing of the bits of the result. While we're at it, add a `Z.seeded_hash` function, defined as `Hashtbl.seeded_hash`, so that the `Z` module can be used as the argument to the `Hashtbl.MakeSeeded` functor. --- caml_z.c | 5 ----- z.ml | 3 ++- z.mli | 28 +++++++++++++++++++++++----- 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/caml_z.c b/caml_z.c index 02e758d..040bb90 100644 --- a/caml_z.c +++ b/caml_z.c @@ -3350,11 +3350,6 @@ static intnat ml_z_custom_hash(value v) return acc; } -CAMLprim value ml_z_hash(value v) -{ - return Val_long(ml_z_custom_hash(v)); -} - /* serialized format: - 1-byte sign (1 for negative, 0 for positive) - 4-byte size in bytes diff --git a/z.ml b/z.ml index 975823f..58faadb 100644 --- a/z.ml +++ b/z.ml @@ -258,7 +258,8 @@ external perfect_power: t -> bool = "ml_z_perfect_power" external perfect_square: t -> bool = "ml_z_perfect_square" external probab_prime: t -> int -> int = "ml_z_probab_prime" external nextprime: t -> t = "ml_z_nextprime" -external hash: t -> int = "ml_z_hash" [@@noalloc] +let hash: t -> int = Stdlib.Hashtbl.hash +let seeded_hash: int -> t -> int = Stdlib.Hashtbl.seeded_hash external to_bits: t -> string = "ml_z_to_bits" external of_bits: string -> t = "ml_z_of_bits" diff --git a/z.mli b/z.mli index 70098ff..1794942 100644 --- a/z.mli +++ b/z.mli @@ -486,12 +486,24 @@ val is_odd: t -> bool @since 1.4 *) -external hash: t -> int = "ml_z_hash" [@@noalloc] +val hash: t -> int (** Hashes a number, producing a small integer. - The result is consistent with equality: if [a] = [b], then [hash a] = - [hash b]. - OCaml's generic hash function, [Hashtbl.hash], works correctly with - numbers, but {!Z.hash} is slightly faster. + The result is consistent with equality: + if [a] = [b], then [hash a] = [hash b]. + The result is the same as produced by OCaml's generic hash function, + {!Hashtbl.hash}. + Together with type {!Z.t}, the function {!Z.hash} makes it possible + to pass module {!Z} as argument to the functor {!Hashtbl.Make}. + @before 1.14 a different hash algorithm was used. +*) + +val seeded_hash: int -> t -> int +(** Like {!Z.hash}, but takes a seed as extra argument for diversification. + The result is the same as produced by OCaml's generic seeded hash function, + {!Hashtbl.seeded_hash}. + Together with type {!Z.t}, the function {!Z.hash} makes it possible + to pass module {!Z} as argument to the functor {!Hashtbl.MakeSeeded}. + @since 1.14 *) (** {1 Elementary number theory} *) @@ -717,6 +729,8 @@ val random_int: ?rng: Random.State.t -> t -> t Random numbers produced by this function are not cryptographically strong and must not be used in cryptographic or high-security contexts. See {!Z.random_int_gen} for an alternative. + + @since 1.13 *) val random_bits: ?rng: Random.State.t -> int -> t @@ -731,6 +745,8 @@ val random_bits: ?rng: Random.State.t -> int -> t Random numbers produced by this function are not cryptographically strong and must not be used in cryptographic or high-security contexts. See {!Z.random_bits_gen} for an alternative. + + @since 1.13 *) val random_int_gen: fill: (bytes -> int -> int -> unit) -> t -> t @@ -751,6 +767,7 @@ val random_int_gen: fill: (bytes -> int -> int -> unit) -> t -> t << Z.random_int_gen ~fill:Cryptokit.Random.secure_rng#bytes bound >> + @since 1.13 *) val random_bits_gen: fill: (bytes -> int -> int -> unit) -> int -> t @@ -759,6 +776,7 @@ val random_bits_gen: fill: (bytes -> int -> int -> unit) -> int -> t This is a more efficient special case of {!Z.random_int_gen} when the bound is a power of two. The [fill] parameter is as described in {!Z.random_int_gen}. + @since 1.13 *) (** {1 Prefix and infix operators} *) From b9c96768b0764ab54ea04dfab1259b05731a8bad Mon Sep 17 00:00:00 2001 From: Xavier Leroy Date: Thu, 28 Dec 2023 18:15:48 +0100 Subject: [PATCH 2/2] Test the hash function with random integers and the chi2 test --- tests/Makefile | 2 +- tests/chi2.ml | 33 +++++++++++++++++++++++++-------- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/tests/Makefile b/tests/Makefile index 760294b..f06bcb1 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -38,7 +38,7 @@ test:: ofstring.exe test:: chi2.exe @echo "Testing random number generation..." - @./chi2.exe + @if ./chi2.exe; then echo "chi2: passed"; else echo "chi2: FAILED"; exit 2; fi bench:: timings.exe ./timings.exe diff --git a/tests/chi2.ml b/tests/chi2.ml index 1241828..86a5738 100644 --- a/tests/chi2.ml +++ b/tests/chi2.ml @@ -1,12 +1,11 @@ (* Accumulate [n] samples from function [f] and check the chi-square. - Only the low 8 bits of the result of [f] are sampled. *) + Assumes [f] returns integers in the [0..255] range. *) let chisquare n f = let r = 256 in let freq = Array.make r 0 in for i = 0 to n - 1 do - let t = Z.to_int (Z.logand (f ()) (Z.of_int 0xFF)) in - freq.(t) <- freq.(t) + 1 + let t = f () in freq.(t) <- freq.(t) + 1 done; let expected = float n /. float r in let t = @@ -22,9 +21,19 @@ let chisquare n f = *) chi2 <= degfree +. 4.0 *. sqrt (2.0 *. degfree) +let failed = ref false + +let test_base name f = + if not (chisquare 100_000 f) then begin + Printf.printf "%s: suspicious result\n%!" name; + failed := true + end + let test name f = - if not (chisquare 100_000 f) - then Printf.printf "%s: suspicious result\n%!" name + (* Test the low 8 bits of the result of f *) + test_base name (fun () -> Z.to_int (Z.logand (f ()) (Z.of_int 0xFF))) + +let p = Z.of_string "35742549198872617291353508656626642567" let _ = test "random_bits 15 (bits 0-7)" @@ -38,6 +47,14 @@ let _ = test "random_int 2^30 (bits 21-28)" (fun () -> Z.(shift_right (random_int (shift_left one 30)) 21)); test "random_int (256 * p) / p" - (let p = Z.of_string "35742549198872617291353508656626642567" in - let bound = Z.shift_left p 8 in - fun () -> Z.(div (random_int bound) p)) + (let bound = Z.shift_left p 8 in + fun () -> Z.(div (random_int bound) p)); + (* Also test our hash function, why not? *) + test_base "hash (random_int p) (bits 0-7)" + (fun () -> Z.(hash (random_int p)) land 0xFF); + test_base "hash (random_int p) (bits 16-23)" + (fun () -> (Z.(hash (random_int p)) lsr 16) land 0xFF); + exit (if !failed then 2 else 0) + + +