From f82f0d480b778f3e3764c3624158a11311476456 Mon Sep 17 00:00:00 2001 From: Rafael Fourquet Date: Tue, 10 Oct 2023 14:10:25 +0200 Subject: [PATCH] Random: allow string seeds (#51527) We used to be able to seed RNGs with a string, but that string was interpreted as the filename containing the actual seed. This was deprecated in #21359, in order to later allow using a string seed directly, which this patch does. --------- Co-authored-by: Nathan Zimmerberg <39104088+nhz2@users.noreply.github.com> --- NEWS.md | 5 +++-- stdlib/Random/src/RNGs.jl | 30 +++++++++++++++++++++++++----- stdlib/Random/src/Xoshiro.jl | 2 +- stdlib/Random/test/runtests.jl | 25 +++++++++++++++++++++++-- 4 files changed, 52 insertions(+), 10 deletions(-) diff --git a/NEWS.md b/NEWS.md index 46ba7c4b85118..4c5aebb8f19df 100644 --- a/NEWS.md +++ b/NEWS.md @@ -60,9 +60,10 @@ Standard library changes #### Random * `rand` now supports sampling over `Tuple` types ([#35856], [#50251]). -* When seeding RNGs provided by `Random`, negative integer seeds can now be used ([#51416]). - * `rand` now supports sampling over `Pair` types ([#28705]). +* When seeding RNGs provided by `Random`, negative integer seeds can now be used ([#51416]). +* Seedable random number generators from `Random` can now be seeded by a string, e.g. + `seed!(rng, "a random seed")` ([#51527]). #### REPL diff --git a/stdlib/Random/src/RNGs.jl b/stdlib/Random/src/RNGs.jl index 8e766bfb98e53..7782de88ba537 100644 --- a/stdlib/Random/src/RNGs.jl +++ b/stdlib/Random/src/RNGs.jl @@ -83,7 +83,7 @@ MersenneTwister(seed, state::DSFMT_state) = Create a `MersenneTwister` RNG object. Different RNG objects can have their own seeds, which may be useful for generating different streams of random numbers. -The `seed` may be an integer or a vector of `UInt32` integers. +The `seed` may be an integer, a string, or a vector of `UInt32` integers. If no seed is provided, a randomly generated one is created (using entropy from the system). See the [`seed!`](@ref) function for reseeding an already existing `MersenneTwister` object. @@ -316,12 +316,32 @@ function hash_seed(seed::Union{AbstractArray{UInt32}, AbstractArray{UInt64}}) SHA.digest!(ctx) end +function hash_seed(str::AbstractString) + ctx = SHA.SHA2_256_CTX() + # convert to String such that `codeunits(str)` below is consistent between equal + # strings of different types + str = String(str) + SHA.update!(ctx, codeunits(str)) + # signature for strings: so far, all hash_seed functions end-up hashing a multiple + # of 4 bytes of data, and add the signature (1 byte) at the end; so hash as many + # bytes as necessary to have a total number of hashed bytes equal to 0 mod 4 (padding), + # and then hash the signature 0x05; in order for strings of different lengths to have + # different hashes, padding bytes are set equal to the number of padding bytes + pad = 4 - mod(ncodeunits(str), 4) + for _=1:pad + SHA.update!(ctx, (pad % UInt8,)) + end + SHA.update!(ctx, (0x05,)) + SHA.digest!(ctx) +end + """ hash_seed(seed) -> AbstractVector{UInt8} Return a cryptographic hash of `seed` of size 256 bits (32 bytes). -`seed` can currently be of type `Union{Integer, DenseArray{UInt32}, DenseArray{UInt64}}`, +`seed` can currently be of type +`Union{Integer, AbstractString, AbstractArray{UInt32}, AbstractArray{UInt64}}`, but modules can extend this function for types they own. `hash_seed` is "injective" : if `n != m`, then `hash_seed(n) != `hash_seed(m)`. @@ -750,13 +770,13 @@ jump!(r::MersenneTwister, steps::Integer) = copy!(r, jump(r, steps)) # 3, 4: .adv_vals, .idxF (counters to reconstruct the float cache, optional if 5-6 not shown)) # 5, 6: .adv_ints, .idxI (counters to reconstruct the integer cache, optional) -Random.MersenneTwister(seed::Union{Integer,Vector{UInt32}}, advance::NTuple{6,Integer}) = +Random.MersenneTwister(seed, advance::NTuple{6,Integer}) = advance!(MersenneTwister(seed), advance...) -Random.MersenneTwister(seed::Union{Integer,Vector{UInt32}}, advance::NTuple{4,Integer}) = +Random.MersenneTwister(seed, advance::NTuple{4,Integer}) = MersenneTwister(seed, (advance..., 0, 0)) -Random.MersenneTwister(seed::Union{Integer,Vector{UInt32}}, advance::NTuple{2,Integer}) = +Random.MersenneTwister(seed, advance::NTuple{2,Integer}) = MersenneTwister(seed, (advance..., 0, 0, 0, 0)) # advances raw state (per fill_array!) of r by n steps (Float64 values) diff --git a/stdlib/Random/src/Xoshiro.jl b/stdlib/Random/src/Xoshiro.jl index bf48fe8e7e9b4..b16668e99584b 100644 --- a/stdlib/Random/src/Xoshiro.jl +++ b/stdlib/Random/src/Xoshiro.jl @@ -4,7 +4,7 @@ # Lots of implementation is shared with TaskLocalRNG """ - Xoshiro(seed::Integer) + Xoshiro(seed::Union{Integer, AbstractString}) Xoshiro() Xoshiro256++ is a fast pseudorandom number generator described by David Blackman and diff --git a/stdlib/Random/test/runtests.jl b/stdlib/Random/test/runtests.jl index 28c1047b42f09..2ad9d38d94845 100644 --- a/stdlib/Random/test/runtests.jl +++ b/stdlib/Random/test/runtests.jl @@ -656,6 +656,7 @@ end # test that the following is not an error (#16925) @test Random.seed!(m..., typemax(UInt)) === m2 @test Random.seed!(m..., typemax(UInt128)) === m2 + @test Random.seed!(m..., "a random seed") === m2 end end @@ -710,7 +711,7 @@ end end @testset "$RNG(seed) & Random.seed!(m::$RNG, seed) produce the same stream" for RNG=(MersenneTwister,Xoshiro) - seeds = Any[0, 1, 2, 10000, 10001, rand(UInt32, 8), rand(UInt128, 3)...] + seeds = Any[0, 1, 2, 10000, 10001, rand(UInt32, 8), randstring(), randstring(), rand(UInt128, 3)...] if RNG == Xoshiro push!(seeds, rand(UInt64, rand(1:4))) end @@ -723,7 +724,7 @@ end end @testset "Random.seed!(seed) sets Random.GLOBAL_SEED" begin - seeds = Any[0, rand(UInt128), rand(UInt64, 4)] + seeds = Any[0, rand(UInt128), rand(UInt64, 4), randstring(20)] for seed=seeds Random.seed!(seed) @@ -940,6 +941,15 @@ end @test string(m) == "MersenneTwister(-3)" Random.seed!(m, typemin(Int8)) @test string(m) == "MersenneTwister(-128)" + + # string seeds + Random.seed!(m, "seed 1") + @test string(m) == "MersenneTwister(\"seed 1\")" + x = rand(m) + @test x == rand(MersenneTwister("seed 1")) + @test string(m) == """MersenneTwister("seed 1", (0, 1002, 0, 1))""" + # test that MersenneTwister's fancy constructors accept string seeds + @test MersenneTwister("seed 1", (0, 1002, 0, 1)) == m end @testset "RandomDevice" begin @@ -1196,6 +1206,17 @@ end hash32 = Random.hash_seed(seed32) @test Random.hash_seed(map(UInt64, seed32)) == hash32 @test hash32 ∉ keys(vseeds) + + seed_str = randstring() + seed_gstr = GenericString(seed_str) + @test Random.hash_seed(seed_str) == Random.hash_seed(seed_gstr) + string_seeds = Set{Vector{UInt8}}() + for ch = 'A':'z' + vseed = Random.hash_seed(string(ch)) + @test vseed ∉ keys(vseeds) + @test vseed ∉ string_seeds + push!(string_seeds, vseed) + end end @testset "rand(::Type{<:Pair})" begin