From fbecbe53b629ee928fe8bef5ee390fa90cf96794 Mon Sep 17 00:00:00 2001 From: korthaj Date: Sat, 17 Jun 2017 10:50:48 +0200 Subject: [PATCH 1/6] Mark functions available in Go 1.9 as deprecated. --- funcs.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/funcs.go b/funcs.go index d2e503e..be5e63d 100644 --- a/funcs.go +++ b/funcs.go @@ -14,6 +14,8 @@ const ( // LeadingZeros returns the number of leading zero bits in w; // it returns 64 when w is zero. +// +// Deprecated: In Go 1.9 this function is available in package math/bits as LeadingZeros64. func LeadingZeros(w uint64) int { // Fill word with ones on the right, e.g. 0x0000f308 -> 0x0000ffff. w |= w >> 1 @@ -27,6 +29,8 @@ func LeadingZeros(w uint64) int { // TrailingZeros returns the number of trailing zero bits in w; // it returns 64 when w is zero. +// +// Deprecated: In Go 1.9 this function is available in package math/bits as TrailingZeros64. func TrailingZeros(w uint64) int { // “Using de Bruijn Sequences to Index a 1 in a Computer Word”, // Leiserson, Prokop, and Randall, MIT, 1998. @@ -53,6 +57,8 @@ func init() { } // Count returns the number of nonzero bits in w. +// +// Deprecated: In Go 1.9 this function is available in package math/bits as OnesCount64. func Count(w uint64) int { // “Software Optimization Guide for AMD64 Processors”, Section 8.6. const maxw = 1<<64 - 1 From b291bb74957241c0dc0a11c7e3bf593952c540f0 Mon Sep 17 00:00:00 2001 From: korthaj Date: Sat, 17 Jun 2017 13:03:23 +0200 Subject: [PATCH 2/6] Use functions in Go 1.9 math/bits --- set.go | 2 + set_math_bits.go | 592 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 594 insertions(+) create mode 100644 set_math_bits.go diff --git a/set.go b/set.go index 8e74ffe..f0345b2 100644 --- a/set.go +++ b/set.go @@ -1,3 +1,5 @@ +// +build !go1.9 + // Package bit provides a bit array implementation // and some utility bit functions. // diff --git a/set_math_bits.go b/set_math_bits.go new file mode 100644 index 0000000..fb68f1b --- /dev/null +++ b/set_math_bits.go @@ -0,0 +1,592 @@ +// +build go1.9 + +// Package bit provides a bit array implementation +// and some utility bit functions. +// +// Bit functions +// +// The bit functions count leading and trailing zero bits +// and the number of non-zero bits in a 64-bit word. +// The functions use bitwise operations instead of looping +// over individual bits. This gives a considerable speedup, +// as all bits within the word are processed in parallel. +// +// Bit set +// +// A bit set, or bit array, is an efficient set data structure +// that consists of an array of 64-bit words. Because it uses +// bit-level parallelism, limits memory access, and efficiently uses +// the data cache, a bit set often outperforms other data structures. +// +// Tutorial +// +// The Basics example shows how to create, combine, compare and +// print bit sets. +// +// Primes contains a short and simple, but still efficient, +// implementation of a prime number sieve. +// +// Union is a more advanced example demonstrating how to build +// an efficient variadic Union function using the SetOr method. +// +package bit + +import ( + "math/bits" + "strconv" +) + +const ( + bpw = 64 // bits per word + maxw = 1<>shift] & (1<<(n&mask)) == 1 iff n belongs to set, + // • data[len(data)-1] != 0 if set is nonempty, + // • data[i] == 0 for all i such that len(data) ≤ i < cap(data). + data []uint64 +} + +// New creates a new set with the given elements. +// Negative numbers are not included in the set. +func New(n ...int) *Set { + if len(n) == 0 { + return new(Set) + } + max := n[0] + for _, e := range n { + if e > max { + max = e + } + } + if max < 0 { + return new(Set) + } + s := &Set{ + data: make([]uint64, max>>shift+1), + } + for _, e := range n { + if e >= 0 { + s.data[e>>shift] |= 1 << uint(e&mask) + } + } + return s +} + +// Contains tells if n is an element of the set. +func (s *Set) Contains(n int) bool { + if n < 0 { + return false + } + d := s.data + i := n >> shift + if i >= len(d) { + return false + } + return d[i]&(1< len(b) { + return false + } + for i := 0; i < la; i++ { + if a[i]&^b[i] != 0 { + return false + } + } + return true +} + +// Max returns the maximum element of the set; +// it panics if the set is empty. +func (s *Set) Max() int { + if len(s.data) == 0 { + panic("max not defined for empty set") + } + d := s.data + i := len(d) - 1 + return i< m, in the set, +// or -1 if there is no such element. +func (s *Set) Next(m int) int { + d := s.data + len := len(d) + if len == 0 { + return -1 + } + if m < 0 { + if d[0]&1 != 0 { + return 0 + } + m = 0 + } + i := m >> shift + if i >= len { + return -1 + } + t := 1 + uint(m&mask) + w := d[i] >> t << t // Zero out bits for numbers ≤ m. + for i < len-1 && w == 0 { + i++ + w = d[i] + } + if w == 0 { + return -1 + } + return i< max { + return max + } + i = m >> shift + t := bpw - uint(m&mask) + w := d[i] << t >> t // Zero out bits for numbers ≥ m. + for i > 0 && w == 0 { + i-- + w = d[i] + } + if w == 0 { + return -1 + } + return i<>= uint(b + 1) + for w&1 != 0 { // common case + if do(n) { + return true + } + n++ + w >>= 1 + } + } + } + return false +} + +// String returns a string representation of the set. The elements +// are listed in ascending order. Runs of at least three consecutive +// elements from a to b are given as a..b. +func (s *Set) String() string { + var buf []byte + buf = append(buf, '{') + a, b := -1, -2 // Keep track of a range a..b of elements. + s.Visit(func(n int) (skip bool) { + if n == b+1 { + b++ // Increase current range from a..b to a..b+1. + return + } + buf = appendRange(buf, a, b) + a, b = n, n // Start new range. + return + }) + buf = appendRange(buf, a, b) + if s.Size() > 0 { + buf = buf[:len(buf)-1] // Remove trailing " ". + } + buf = append(buf, '}') + return string(buf) +} + +// appendRange appends either "", "a ", "a b " or "a..b, " to buf. +func appendRange(buf []byte, a, b int) []byte { + switch { + case a > b: + return buf // Append nothing. + case a == b: + buf = strconv.AppendInt(buf, int64(a), 10) + case a+1 == b: + buf = strconv.AppendInt(buf, int64(a), 10) + buf = append(buf, ' ') + buf = strconv.AppendInt(buf, int64(b), 10) + default: + buf = strconv.AppendInt(buf, int64(a), 10) + buf = append(buf, ".."...) + buf = strconv.AppendInt(buf, int64(b), 10) + } + return append(buf, ' ') +} + +// Add adds n to s and returns a pointer to the updated set. +// A negative n will not be added. +func (s *Set) Add(n int) *Set { + if n < 0 { + return s + } + i := n >> shift + if i >= len(s.data) { + s.resize(i + 1) + } + s.data[i] |= 1 << uint(n&mask) + return s +} + +// Delete removes n from s and returns a pointer to the updated set. +func (s *Set) Delete(n int) *Set { + if n < 0 { + return s + } + i := n >> shift + if i >= len(s.data) { + return s + } + s.data[i] &^= 1 << uint(n&mask) + s.trim() + return s +} + +// AddRange adds all integers from m to n-1 to s +// and returns a pointer to the updated set. +// Negative numbers will not be added. +func (s *Set) AddRange(m, n int) *Set { + if n < 1 || m >= n { + return s + } + m = max(0, m) + n-- + low, high := m>>shift, n>>shift + if high >= len(s.data) { + s.resize(high + 1) + } + d := s.data + // Range fits in one word. + if low == high { + d[low] |= bitMask(m&mask, n&mask) + return s + } + // Range spans at least two words. + d[low] |= bitMask(m&mask, bpw-1) + for i := low + 1; i < high; i++ { + d[i] = maxw + } + d[high] |= bitMask(0, n&mask) + return s +} + +// DeleteRange removes all integers from m to n-1 from s +// and returns a pointer to the updated set. +func (s *Set) DeleteRange(m, n int) *Set { + if n < 1 || m >= n { + return s + } + m = max(0, m) + n-- + d := s.data + low, high := m>>shift, n>>shift + // Range does not intersect set. + if low >= len(d) { + return s + } + // Top of range overshoots set. + if len(d) <= high { + high = len(d) - 1 // low ≤ high still holds, since low < len(d). + n = bpw - 1 // To assure that n&mask == bpw-1 below. + } + // Range fits in one word. + if low == high { + d[low] &^= bitMask(m&mask, n&mask) + s.trim() + return s + } + // Range spans at least two words. + d[low] &^= bitMask(m&mask, bpw-1) + for i := low + 1; i < high; i++ { + d[i] = 0 + } + d[high] &^= bitMask(0, n&mask) + s.trim() + return s +} + +// And creates a new set that consists of all elements that belong +// to both s1 and s2. +func (s1 *Set) And(s2 *Set) *Set { + return new(Set).SetAnd(s1, s2) +} + +// Or creates a new set that contains all elements that belong +// to either s1 or s2. +func (s1 *Set) Or(s2 *Set) *Set { + return new(Set).SetOr(s1, s2) +} + +// Xor creates a new set that contains all elements that belong +// to either s1 or s2, but not to both. +func (s1 *Set) Xor(s2 *Set) *Set { + return new(Set).SetXor(s1, s2) +} + +// AndNot creates a new set that consists of all elements that belong +// to s1, but not to s2. +func (s1 *Set) AndNot(s2 *Set) *Set { + return new(Set).SetAndNot(s1, s2) +} + +// Set sets s to s1 and then returns a pointer to the updated set s. +func (s *Set) Set(s1 *Set) *Set { + s.realloc(len(s1.data)) + copy(s.data, s1.data) + return s +} + +// SetAnd sets s to the intersection s1 ∩ s2 and then returns a pointer to s. +func (s *Set) SetAnd(s1, s2 *Set) *Set { + a, b := s1.data, s2.data + // Find last nonzero word in result. + n := min(len(a), len(b)) - 1 + for n >= 0 && a[n]&b[n] == 0 { + n-- + } + if s == s1 || s == s2 { + s.resize(n + 1) + } else { + s.realloc(n + 1) + } + for i := 0; i <= n; i++ { + s.data[i] = a[i] & b[i] + } + return s +} + +// SetAndNot sets s to the set difference s1 ∖ s2 and then returns a pointer to s. +func (s *Set) SetAndNot(s1, s2 *Set) *Set { + a, b := s1.data, s2.data + la, lb := len(a), len(b) + // Result requires len(a) words if len(a) > len(b), + // otherwise find last nonzero word in result. + n := la - 1 + if la <= lb { + for n >= 0 && a[n]&^b[n] == 0 { + n-- + } + } + if s == s1 || s == s2 { + s.resize(n + 1) + } else { + s.realloc(n + 1) + } + d := s.data + if m := lb; m <= n { + copy(d[m:n+1], a[m:n+1]) + n = m - 1 + } + for i := 0; i <= n; i++ { + d[i] = a[i] &^ b[i] + } + return s +} + +// SetOr sets s to the union s1 ∪ s2 and then returns a pointer to s. +func (s *Set) SetOr(s1, s2 *Set) *Set { + // Swap, if necessary, to make s1 shorter than s2. + if len(s1.data) > len(s2.data) { + s1, s2 = s2, s1 + } + a, b := s1.data, s2.data + la := len(a) + n := len(b) - 1 + if s == s1 || s == s2 { + s.resize(n + 1) + } else { + s.realloc(n + 1) + } + d := s.data + copy(d[la:n+1], b[la:n+1]) + for i := 0; i < la; i++ { + d[i] = a[i] | b[i] + } + return s +} + +// SetXor sets s to the symmetric difference A ∆ B = (A ∪ B) ∖ (A ∩ B) +// and then returns a pointer to s. +func (s *Set) SetXor(s1, s2 *Set) *Set { + // Swap, if necessary, to make s1 shorter than s2. + if len(s1.data) > len(s2.data) { + s1, s2 = s2, s1 + } + a, b := s1.data, s2.data + la, lb := len(a), len(b) + n := lb - 1 + if la == lb { // The only case where result may be shorter than len(b). + for n >= 0 && a[n]^b[n] == 0 { + n-- + } + if n == -1 { // No elements left. + s.realloc(0) + return s + } + } + if s == s1 || s == s2 { + s.resize(n + 1) + } else { + s.realloc(n + 1) + } + d := s.data + if la <= n { + copy(d[la:n+1], b[la:n+1]) + n = la - 1 + } + for i := 0; i <= n; i++ { + d[i] = a[i] ^ b[i] + } + return s +} + +// resize changes the length of s.data to n, keeping old values. +// It preserves the invariant s.data[i] = 0, n ≤ i < cap(data). +func (s *Set) resize(n int) { + d := s.data + if s.realloc(n) { + copy(s.data, d) + } +} + +// realloc creates a slice s.data of length n, possibly zeroing out old values. +// It preserves the invariant s.data[i] = 0, n ≤ i < cap(data). +// It returns true if new memory has been allocated. +func (s *Set) realloc(n int) (didAlloc bool) { + if c := cap(s.data); c < n { + s.data = make([]uint64, n, newCap(n, c)) + return true + } + // Add zeroes if shrinking. + d := s.data + for i := len(d) - 1; i >= n; i-- { + d[i] = 0 + } + s.data = d[:n] + return false +} + +// newCap suggests a new increased capacity, favoring powers of two, +// when growing a slice to length n. The suggested capacities guarantee +// linear amortized cost for repeated memory allocations. +func newCap(n, prevCap int) int { + return max(n, nextPow2(prevCap)) +} + +// nextPow2 returns the smallest p = 1, 2, 4, ..., 2^k such that p > n, +// or MaxInt if p > MaxInt. +func nextPow2(n int) (p int) { + if n <= 0 { + return 1 + } + if k := 64 - bits.LeadingZeros64(uint64(n)); k < bitsPerWord-1 { + return 1 << uint(k) + } + return MaxInt +} + +// trim slices s.data by removing all trailing words equal to zero. +func (s *Set) trim() { + d := s.data + n := len(d) - 1 + for n >= 0 && d[n] == 0 { + n-- + } + s.data = d[:n+1] +} + +// bitMask returns a bit mask with nonzero bits from m to n, 0 ≤ m ≤ n < bpw. +func bitMask(m, n int) uint64 { + return maxw >> uint(bpw-1-(n-m)) << uint(m) +} + +func min(a, b int) int { + if a < b { + return a + } + return b +} + +func max(a, b int) int { + if a > b { + return a + } + return b +} From bff4427151f518c359b9003af9572d0f579d4e56 Mon Sep 17 00:00:00 2001 From: korthaj Date: Sat, 17 Jun 2017 14:13:23 +0200 Subject: [PATCH 3/6] Remove benchmarks for deprecated functions --- bench_test.go | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/bench_test.go b/bench_test.go index 5db1549..10b3008 100644 --- a/bench_test.go +++ b/bench_test.go @@ -2,24 +2,6 @@ package bit import "testing" -func BenchmarkLeadingZeros(b *testing.B) { - for i := 0; i < b.N; i++ { - LeadingZeros(0xcafecafecafecafe) - } -} - -func BenchmarkTrailingZeros(b *testing.B) { - for i := 0; i < b.N; i++ { - TrailingZeros(0xcafecafecafecafe) - } -} - -func BenchmarkCount(b *testing.B) { - for i := 0; i < b.N; i++ { - Count(0xcafecafecafecafe) - } -} - // Number of words in test set. const nw = 1 << 10 From d1ce7ca2000a65ab043db0643f44421be3249596 Mon Sep 17 00:00:00 2001 From: korthaj Date: Sat, 17 Jun 2017 14:35:51 +0200 Subject: [PATCH 4/6] Use Len64 from Go 1.9 --- set_math_bits.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/set_math_bits.go b/set_math_bits.go index fb68f1b..b23fc10 100644 --- a/set_math_bits.go +++ b/set_math_bits.go @@ -138,7 +138,7 @@ func (s *Set) Max() int { } d := s.data i := len(d) - 1 - return i< max { + if max := i< max { return max } i = m >> shift @@ -212,7 +212,7 @@ func (s *Set) Prev(m int) int { if w == 0 { return -1 } - return i< Date: Mon, 19 Jun 2017 10:18:28 +0200 Subject: [PATCH 5/6] Add godoc badge --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index f6ff2bd..dd5f69e 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ ### Golang set data structure with bonus bit-twiddling functions +[![GoDoc](https://godoc.org/github.com/yourbasic/bit?status.svg)][godoc-bit] + A bit array, or bit set, is an efficient set data structure. It consists of an array that compactly stores bits and it uses bit-level parallelism to perform operations quickly. From 32a092b3594699e1c9c4e87464d6be8b4180f115 Mon Sep 17 00:00:00 2001 From: korthaj Date: Mon, 19 Jun 2017 18:59:50 +0200 Subject: [PATCH 6/6] Move godoc badge --- README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/README.md b/README.md index dd5f69e..8efd166 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,7 @@ -# Your basic bit +# Your basic bit [![GoDoc](https://godoc.org/github.com/yourbasic/bit?status.svg)][godoc-bit] ### Golang set data structure with bonus bit-twiddling functions -[![GoDoc](https://godoc.org/github.com/yourbasic/bit?status.svg)][godoc-bit] - A bit array, or bit set, is an efficient set data structure. It consists of an array that compactly stores bits and it uses bit-level parallelism to perform operations quickly.