Skip to content

Commit 8631fcb

Browse files
rscgopherbot
authored andcommitted
math/rand/v2: add PCG-DXSM
For the original math/rand, we ported Plan 9's random number generator, which was a refinement by Ken Thompson of an algorithm by Don Mitchell and Jim Reeds, which Mitchell in turn recalls as having been derived from an algorithm by Marsaglia. At its core, it is an additive lagged Fibonacci generator (ALFG). Whatever the details of the history, this generator is nowhere near the current state of the art for simple, pseudo-random generators. This CL adds an implementation of Melissa O'Neill's PCG, specifically the variant PCG-DXSM, which she defined after writing the PCG paper and which is now the default in Numpy. The update is slightly slower (a few multiplies and adds, instead of a few adds), but the state is dramatically smaller (2 words instead of 607). The statistical output properties are better too. A followup CL will delete the old generator. PCG is the only change here, so no benchmarks should be affected. Including them anyway as further evidence for caution. goos: linux goarch: amd64 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 8993506f2f.amd64 │ 01ff938549.amd64 │ │ sec/op │ sec/op vs base │ SourceUint64-32 1.325n ± 1% 1.352n ± 1% +2.00% (p=0.000 n=20) GlobalInt64-32 2.240n ± 1% 2.083n ± 0% -7.03% (p=0.000 n=20) GlobalInt64Parallel-32 0.1041n ± 1% 0.1035n ± 1% ~ (p=0.064 n=20) GlobalUint64-32 2.072n ± 3% 2.038n ± 1% ~ (p=0.089 n=20) GlobalUint64Parallel-32 0.1008n ± 1% 0.1006n ± 1% ~ (p=0.804 n=20) Int64-32 1.716n ± 1% 1.687n ± 2% ~ (p=0.045 n=20) Uint64-32 1.665n ± 1% 1.674n ± 2% ~ (p=0.878 n=20) GlobalIntN1000-32 3.335n ± 1% 3.135n ± 1% -6.00% (p=0.000 n=20) IntN1000-32 2.484n ± 1% 2.478n ± 1% ~ (p=0.085 n=20) Int64N1000-32 2.502n ± 2% 2.455n ± 1% -1.88% (p=0.002 n=20) Int64N1e8-32 2.484n ± 2% 2.467n ± 2% ~ (p=0.048 n=20) Int64N1e9-32 2.502n ± 0% 2.454n ± 1% -1.92% (p=0.000 n=20) Int64N2e9-32 2.502n ± 0% 2.482n ± 1% -0.76% (p=0.000 n=20) Int64N1e18-32 3.201n ± 1% 3.349n ± 2% +4.62% (p=0.000 n=20) Int64N2e18-32 3.504n ± 1% 3.537n ± 1% ~ (p=0.185 n=20) Int64N4e18-32 4.873n ± 1% 4.917n ± 0% +0.90% (p=0.000 n=20) Int32N1000-32 2.639n ± 1% 2.386n ± 1% -9.57% (p=0.000 n=20) Int32N1e8-32 2.686n ± 2% 2.366n ± 1% -11.91% (p=0.000 n=20) Int32N1e9-32 2.636n ± 1% 2.355n ± 2% -10.70% (p=0.000 n=20) Int32N2e9-32 2.660n ± 1% 2.371n ± 1% -10.88% (p=0.000 n=20) Float32-32 2.261n ± 1% 2.245n ± 2% ~ (p=0.752 n=20) Float64-32 2.280n ± 1% 2.235n ± 1% -1.97% (p=0.007 n=20) ExpFloat64-32 3.891n ± 1% 3.813n ± 3% ~ (p=0.087 n=20) NormFloat64-32 3.711n ± 1% 3.652n ± 2% ~ (p=0.021 n=20) Perm3-32 32.60n ± 2% 33.12n ± 3% ~ (p=0.107 n=20) Perm30-32 204.2n ± 0% 205.1n ± 1% ~ (p=0.358 n=20) Perm30ViaShuffle-32 121.7n ± 2% 110.8n ± 1% -8.96% (p=0.000 n=20) ShuffleOverhead-32 106.2n ± 2% 113.0n ± 1% +6.36% (p=0.000 n=20) Concurrent-32 2.190n ± 5% 2.100n ± 0% -4.13% (p=0.001 n=20) PCG_DXSM-32 1.490n ± 0% goos: darwin goarch: arm64 pkg: math/rand/v2 cpu: Apple M1 │ 8993506f2f.arm64 │ 01ff938549.arm64 │ │ sec/op │ sec/op vs base │ SourceUint64-8 2.271n ± 0% 2.258n ± 1% ~ (p=0.167 n=20) GlobalInt64-8 2.161n ± 1% 2.167n ± 0% ~ (p=0.693 n=20) GlobalInt64Parallel-8 0.4303n ± 0% 0.4310n ± 0% ~ (p=0.051 n=20) GlobalUint64-8 2.164n ± 1% 2.182n ± 1% ~ (p=0.042 n=20) GlobalUint64Parallel-8 0.4287n ± 0% 0.4297n ± 0% ~ (p=0.082 n=20) Int64-8 2.478n ± 1% 2.472n ± 1% ~ (p=0.151 n=20) Uint64-8 2.460n ± 1% 2.449n ± 1% ~ (p=0.013 n=20) GlobalIntN1000-8 2.814n ± 2% 2.814n ± 2% ~ (p=0.821 n=20) IntN1000-8 3.003n ± 2% 2.998n ± 2% ~ (p=0.024 n=20) Int64N1000-8 2.954n ± 0% 2.949n ± 2% ~ (p=0.192 n=20) Int64N1e8-8 2.956n ± 0% 2.953n ± 2% ~ (p=0.109 n=20) Int64N1e9-8 3.325n ± 0% 2.950n ± 0% -11.26% (p=0.000 n=20) Int64N2e9-8 2.956n ± 2% 2.946n ± 2% ~ (p=0.027 n=20) Int64N1e18-8 3.780n ± 1% 3.779n ± 1% ~ (p=0.815 n=20) Int64N2e18-8 4.385n ± 0% 4.370n ± 1% ~ (p=0.402 n=20) Int64N4e18-8 6.527n ± 0% 6.544n ± 1% ~ (p=0.140 n=20) Int32N1000-8 2.964n ± 1% 2.950n ± 0% -0.47% (p=0.002 n=20) Int32N1e8-8 2.964n ± 1% 2.950n ± 2% ~ (p=0.013 n=20) Int32N1e9-8 2.963n ± 2% 2.951n ± 2% ~ (p=0.062 n=20) Int32N2e9-8 2.961n ± 2% 2.950n ± 2% -0.37% (p=0.002 n=20) Float32-8 3.442n ± 0% 3.441n ± 0% ~ (p=0.211 n=20) Float64-8 3.442n ± 0% 3.442n ± 0% ~ (p=0.067 n=20) ExpFloat64-8 4.472n ± 0% 4.481n ± 0% +0.20% (p=0.000 n=20) NormFloat64-8 4.734n ± 0% 4.725n ± 0% -0.19% (p=0.003 n=20) Perm3-8 26.55n ± 0% 26.55n ± 0% ~ (p=0.833 n=20) Perm30-8 181.9n ± 0% 181.9n ± 0% -0.03% (p=0.004 n=20) Perm30ViaShuffle-8 143.1n ± 0% 142.9n ± 0% ~ (p=0.204 n=20) ShuffleOverhead-8 120.6n ± 1% 120.8n ± 2% ~ (p=0.102 n=20) Concurrent-8 2.357n ± 2% 2.421n ± 6% ~ (p=0.016 n=20) PCG_DXSM-8 2.531n ± 0% goos: linux goarch: 386 pkg: math/rand/v2 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 8993506f2f.386 │ 01ff938549.386 │ │ sec/op │ sec/op vs base │ SourceUint64-32 2.102n ± 2% 2.069n ± 0% ~ (p=0.021 n=20) GlobalInt64-32 3.542n ± 2% 3.456n ± 1% -2.44% (p=0.001 n=20) GlobalInt64Parallel-32 0.3202n ± 0% 0.3252n ± 0% +1.56% (p=0.000 n=20) GlobalUint64-32 3.507n ± 1% 3.573n ± 1% +1.87% (p=0.000 n=20) GlobalUint64Parallel-32 0.3170n ± 1% 0.3159n ± 0% ~ (p=0.167 n=20) Int64-32 2.516n ± 1% 2.562n ± 2% ~ (p=0.016 n=20) Uint64-32 2.544n ± 1% 2.592n ± 0% +1.85% (p=0.000 n=20) GlobalIntN1000-32 6.237n ± 1% 6.266n ± 2% ~ (p=0.268 n=20) IntN1000-32 4.670n ± 2% 4.724n ± 2% ~ (p=0.644 n=20) Int64N1000-32 5.412n ± 1% 5.490n ± 2% ~ (p=0.159 n=20) Int64N1e8-32 5.414n ± 2% 5.513n ± 2% ~ (p=0.129 n=20) Int64N1e9-32 5.473n ± 1% 5.476n ± 1% ~ (p=0.723 n=20) Int64N2e9-32 5.487n ± 1% 5.501n ± 2% ~ (p=0.481 n=20) Int64N1e18-32 8.901n ± 2% 9.043n ± 2% ~ (p=0.330 n=20) Int64N2e18-32 9.521n ± 1% 9.601n ± 2% ~ (p=0.703 n=20) Int64N4e18-32 11.92n ± 1% 12.00n ± 1% ~ (p=0.489 n=20) Int32N1000-32 4.785n ± 1% 4.829n ± 2% ~ (p=0.402 n=20) Int32N1e8-32 4.748n ± 1% 4.825n ± 2% ~ (p=0.218 n=20) Int32N1e9-32 4.810n ± 1% 4.830n ± 2% ~ (p=0.794 n=20) Int32N2e9-32 4.812n ± 1% 4.750n ± 2% ~ (p=0.057 n=20) Float32-32 10.48n ± 4% 10.89n ± 4% ~ (p=0.162 n=20) Float64-32 19.79n ± 3% 19.60n ± 4% ~ (p=0.668 n=20) ExpFloat64-32 12.91n ± 3% 12.96n ± 3% ~ (p=1.000 n=20) NormFloat64-32 7.462n ± 1% 7.516n ± 1% ~ (p=0.051 n=20) Perm3-32 35.98n ± 2% 36.78n ± 2% ~ (p=0.033 n=20) Perm30-32 241.5n ± 1% 238.9n ± 2% ~ (p=0.126 n=20) Perm30ViaShuffle-32 187.3n ± 2% 189.7n ± 2% ~ (p=0.387 n=20) ShuffleOverhead-32 160.2n ± 1% 159.8n ± 1% ~ (p=0.256 n=20) Concurrent-32 3.308n ± 3% 3.286n ± 1% ~ (p=0.038 n=20) PCG_DXSM-32 7.613n ± 1% For #61716. Change-Id: Icb274ca1f782504d658305a40159b4ae6a2f3f1d Reviewed-on: https://go-review.googlesource.com/c/go/+/502505 Auto-Submit: Russ Cox <rsc@golang.org> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Rob Pike <r@golang.org>
1 parent f2e2637 commit 8631fcb

File tree

3 files changed

+206
-0
lines changed

3 files changed

+206
-0
lines changed

api/next/61716.txt

+6
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ pkg math/rand/v2, func Int64N(int64) int64 #61716
99
pkg math/rand/v2, func IntN(int) int #61716
1010
pkg math/rand/v2, func N[$0 intType]($0) $0 #61716
1111
pkg math/rand/v2, func New(Source) *Rand #61716
12+
pkg math/rand/v2, func NewPCG(uint64, uint64) *PCG #61716
1213
pkg math/rand/v2, func NewSource(int64) Source #61716
1314
pkg math/rand/v2, func NewZipf(*Rand, float64, float64, uint64) *Zipf #61716
1415
pkg math/rand/v2, func NormFloat64() float64 #61716
@@ -19,6 +20,10 @@ pkg math/rand/v2, func Uint32N(uint32) uint32 #61716
1920
pkg math/rand/v2, func Uint64() uint64 #61716
2021
pkg math/rand/v2, func Uint64N(uint64) uint64 #61716
2122
pkg math/rand/v2, func UintN(uint) uint #61716
23+
pkg math/rand/v2, method (*PCG) MarshalBinary() ([]uint8, error) #61716
24+
pkg math/rand/v2, method (*PCG) Seed(uint64, uint64) #61716
25+
pkg math/rand/v2, method (*PCG) Uint64() uint64 #61716
26+
pkg math/rand/v2, method (*PCG) UnmarshalBinary([]uint8) error #61716
2227
pkg math/rand/v2, method (*Rand) ExpFloat64() float64 #61716
2328
pkg math/rand/v2, method (*Rand) Float32() float32 #61716
2429
pkg math/rand/v2, method (*Rand) Float64() float64 #61716
@@ -37,6 +42,7 @@ pkg math/rand/v2, method (*Rand) Uint64() uint64 #61716
3742
pkg math/rand/v2, method (*Rand) Uint64N(uint64) uint64 #61716
3843
pkg math/rand/v2, method (*Rand) UintN(uint) uint #61716
3944
pkg math/rand/v2, method (*Zipf) Uint64() uint64 #61716
45+
pkg math/rand/v2, type PCG struct #61716
4046
pkg math/rand/v2, type Rand struct #61716
4147
pkg math/rand/v2, type Source interface { Uint64 } #61716
4248
pkg math/rand/v2, type Source interface, Uint64() uint64 #61716

src/math/rand/v2/pcg.go

+121
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
// Copyright 2023 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package rand
6+
7+
import (
8+
"errors"
9+
"math/bits"
10+
)
11+
12+
// https://numpy.org/devdocs/reference/random/upgrading-pcg64.html
13+
// https://github.com/imneme/pcg-cpp/commit/871d0494ee9c9a7b7c43f753e3d8ca47c26f8005
14+
15+
// A PCG is a PCG generator with 128 bits of internal state.
16+
// A zero PCG is equivalent to NewPCG(0, 0).
17+
type PCG struct {
18+
hi uint64
19+
lo uint64
20+
}
21+
22+
// NewPCG returns a new PCG seeded with the given values.
23+
func NewPCG(seed1, seed2 uint64) *PCG {
24+
return &PCG{seed1, seed2}
25+
}
26+
27+
// Seed resets the PCG to behave the same way as NewPCG(seed1, seed2).
28+
func (p *PCG) Seed(seed1, seed2 uint64) {
29+
p.hi = seed1
30+
p.lo = seed2
31+
}
32+
33+
// binary.bigEndian.Uint64, copied to avoid dependency
34+
func beUint64(b []byte) uint64 {
35+
_ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
36+
return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 |
37+
uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56
38+
}
39+
40+
// binary.bigEndian.PutUint64, copied to avoid dependency
41+
func bePutUint64(b []byte, v uint64) {
42+
_ = b[7] // early bounds check to guarantee safety of writes below
43+
b[0] = byte(v >> 56)
44+
b[1] = byte(v >> 48)
45+
b[2] = byte(v >> 40)
46+
b[3] = byte(v >> 32)
47+
b[4] = byte(v >> 24)
48+
b[5] = byte(v >> 16)
49+
b[6] = byte(v >> 8)
50+
b[7] = byte(v)
51+
}
52+
53+
// MarshalBinary implements the encoding.BinaryMarshaler interface.
54+
func (p *PCG) MarshalBinary() ([]byte, error) {
55+
b := make([]byte, 20)
56+
copy(b, "pcg:")
57+
bePutUint64(b[4:], p.hi)
58+
bePutUint64(b[4+8:], p.lo)
59+
return b, nil
60+
}
61+
62+
var errUnmarshalPCG = errors.New("invalid PCG encoding")
63+
64+
// UnmarshalBinary implements the encoding.BinaryUnmarshaler interface.
65+
func (p *PCG) UnmarshalBinary(data []byte) error {
66+
if len(data) != 20 || string(data[:4]) != "pcg:" {
67+
return errUnmarshalPCG
68+
}
69+
p.hi = beUint64(data[4:])
70+
p.lo = beUint64(data[4+8:])
71+
return nil
72+
}
73+
74+
func (p *PCG) next() (hi, lo uint64) {
75+
// https://github.com/imneme/pcg-cpp/blob/428802d1a5/include/pcg_random.hpp#L161
76+
//
77+
// Numpy's PCG multiplies by the 64-bit value cheapMul
78+
// instead of the 128-bit value used here and in the official PCG code.
79+
// This does not seem worthwhile, at least for Go: not having any high
80+
// bits in the multiplier reduces the effect of low bits on the highest bits,
81+
// and it only saves 1 multiply out of 3.
82+
// (On 32-bit systems, it saves 1 out of 6, since Mul64 is doing 4.)
83+
const (
84+
mulHi = 2549297995355413924
85+
mulLo = 4865540595714422341
86+
incHi = 6364136223846793005
87+
incLo = 1442695040888963407
88+
)
89+
90+
// state = state * mul + inc
91+
hi, lo = bits.Mul64(p.lo, mulLo)
92+
hi += p.hi*mulLo + p.lo*mulHi
93+
lo, c := bits.Add64(lo, incLo, 0)
94+
hi, _ = bits.Add64(hi, incHi, c)
95+
p.lo = lo
96+
p.hi = hi
97+
return hi, lo
98+
}
99+
100+
// Uint64 return a uniformly-distributed random uint64 value.
101+
func (p *PCG) Uint64() uint64 {
102+
hi, lo := p.next()
103+
104+
// XSL-RR would be
105+
// hi, lo := p.next()
106+
// return bits.RotateLeft64(lo^hi, -int(hi>>58))
107+
// but Numpy uses DXSM and O'Neill suggests doing the same.
108+
// See https://github.com/golang/go/issues/21835#issuecomment-739065688
109+
// and following comments.
110+
111+
// DXSM "double xorshift multiply"
112+
// https://github.com/imneme/pcg-cpp/blob/428802d1a5/include/pcg_random.hpp#L1015
113+
114+
// https://github.com/imneme/pcg-cpp/blob/428802d1a5/include/pcg_random.hpp#L176
115+
const cheapMul = 0xda942042e4dd58b5
116+
hi ^= hi >> 32
117+
hi *= cheapMul
118+
hi ^= hi >> 48
119+
hi *= (lo | 1)
120+
return hi
121+
}

src/math/rand/v2/pcg_test.go

+79
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
// Copyright 2023 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package rand_test
6+
7+
import (
8+
. "math/rand/v2"
9+
"testing"
10+
)
11+
12+
func BenchmarkPCG_DXSM(b *testing.B) {
13+
var p PCG
14+
var t uint64
15+
for n := b.N; n > 0; n-- {
16+
t += p.Uint64()
17+
}
18+
Sink = t
19+
}
20+
21+
func TestPCGMarshal(t *testing.T) {
22+
var p PCG
23+
const (
24+
seed1 = 0x123456789abcdef0
25+
seed2 = 0xfedcba9876543210
26+
want = "pcg:\x12\x34\x56\x78\x9a\xbc\xde\xf0\xfe\xdc\xba\x98\x76\x54\x32\x10"
27+
)
28+
p.Seed(seed1, seed2)
29+
data, err := p.MarshalBinary()
30+
if string(data) != want || err != nil {
31+
t.Errorf("MarshalBinary() = %q, %v, want %q, nil", data, err, want)
32+
}
33+
34+
q := PCG{}
35+
if err := q.UnmarshalBinary([]byte(want)); err != nil {
36+
t.Fatalf("UnmarshalBinary(): %v", err)
37+
}
38+
if q != p {
39+
t.Fatalf("after round trip, q = %#x, but p = %#x", q, p)
40+
}
41+
42+
qu := q.Uint64()
43+
pu := p.Uint64()
44+
if qu != pu {
45+
t.Errorf("after round trip, q.Uint64() = %#x, but p.Uint64() = %#x", qu, pu)
46+
}
47+
}
48+
49+
func TestPCG(t *testing.T) {
50+
p := NewPCG(1, 2)
51+
want := []uint64{
52+
0xc4f5a58656eef510,
53+
0x9dcec3ad077dec6c,
54+
0xc8d04605312f8088,
55+
0xcbedc0dcb63ac19a,
56+
0x3bf98798cae97950,
57+
0xa8c6d7f8d485abc,
58+
0x7ffa3780429cd279,
59+
0x730ad2626b1c2f8e,
60+
0x21ff2330f4a0ad99,
61+
0x2f0901a1947094b0,
62+
0xa9735a3cfbe36cef,
63+
0x71ddb0a01a12c84a,
64+
0xf0e53e77a78453bb,
65+
0x1f173e9663be1e9d,
66+
0x657651da3ac4115e,
67+
0xc8987376b65a157b,
68+
0xbb17008f5fca28e7,
69+
0x8232bd645f29ed22,
70+
0x12be8f07ad14c539,
71+
0x54908a48e8e4736e,
72+
}
73+
74+
for i, x := range want {
75+
if u := p.Uint64(); u != x {
76+
t.Errorf("PCG #%d = %#x, want %#x", i, u, x)
77+
}
78+
}
79+
}

0 commit comments

Comments
 (0)