Skip to content

Commit

Permalink
Adding some more fun bitmap asm code
Browse files Browse the repository at this point in the history
  • Loading branch information
BrentFarris committed Jul 3, 2024
1 parent 9d244f3 commit 49c1533
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 26 deletions.
5 changes: 4 additions & 1 deletion src/bitmap/bitmap.amd64.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,7 @@ package bitmap
func Check(b Bitmap, index int) bool

//go:noescape
func Count(b Bitmap) int
func CountASM(b Bitmap) int

//go:noescape
func CountASMUsingTable(b Bitmap) int
108 changes: 99 additions & 9 deletions src/bitmap/bitmap.amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -54,17 +54,107 @@ TEXT ·Check(SB),NOSPLIT,$0-32
SETCS index+32(FP) // for embedded assembly in Go
RET

// func Count(b Bitmap) int
TEXT ·Count(SB),NOSPLIT,$0-28
MOVQ b+0(FP), DX // Head address to slice data
MOVW b+8(FP), CX // Byte length
XORW R8, R8
MOVL $0, index+24(FP)
// func CountASM(b Bitmap) int
TEXT ·CountASM(SB),NOSPLIT,$0-28
MOVQ $0x1234567812345678, CX
MOVQ $0x1234567812345678, AX
MOVQ $0x1234567812345678, DX
MOVQ $0x1234567812345678, R8
MOVQ b+0(FP), DX // Head address to slice data
MOVWLZX b+8(FP), CX // Byte length
XORQ R8, R8 // 0
XORQ AX, AX // 0
MOVL AX, index+24(FP) // Return int
count:
MOVB (DX), R8 // Read the next byte of the slice
INCQ DX // Move the pointer to the next byte address
POPCNTW R8, AX // Population count of 1s in word
ADDL AX, index+24(FP) // Add the calculated pop count to return
SUBL $1, CX // Decrement our counter
JNE count // If our counter is not 0, continue loop
exit:
RET

DATA poptab<>+0x00(SB)/4, $0x02010100
DATA poptab<>+0x04(SB)/4, $0x03020201
DATA poptab<>+0x08(SB)/4, $0x03020201
DATA poptab<>+0x0C(SB)/4, $0x04030302
DATA poptab<>+0x10(SB)/4, $0x03020201
DATA poptab<>+0x14(SB)/4, $0x04030302
DATA poptab<>+0x18(SB)/4, $0x04030302
DATA poptab<>+0x1C(SB)/4, $0x05040403
DATA poptab<>+0x20(SB)/4, $0x03020201
DATA poptab<>+0x24(SB)/4, $0x04030302
DATA poptab<>+0x28(SB)/4, $0x04030302
DATA poptab<>+0x2C(SB)/4, $0x05040403
DATA poptab<>+0x30(SB)/4, $0x04030302
DATA poptab<>+0x34(SB)/4, $0x05040403
DATA poptab<>+0x38(SB)/4, $0x05040403
DATA poptab<>+0x3C(SB)/4, $0x06050504
DATA poptab<>+0x40(SB)/4, $0x03020201
DATA poptab<>+0x44(SB)/4, $0x04030302
DATA poptab<>+0x48(SB)/4, $0x04030302
DATA poptab<>+0x4C(SB)/4, $0x05040403
DATA poptab<>+0x50(SB)/4, $0x04030302
DATA poptab<>+0x54(SB)/4, $0x05040403
DATA poptab<>+0x58(SB)/4, $0x05040403
DATA poptab<>+0x5C(SB)/4, $0x06050504
DATA poptab<>+0x60(SB)/4, $0x04030302
DATA poptab<>+0x64(SB)/4, $0x05040403
DATA poptab<>+0x68(SB)/4, $0x05040403
DATA poptab<>+0x6C(SB)/4, $0x06050504
DATA poptab<>+0x70(SB)/4, $0x05040403
DATA poptab<>+0x74(SB)/4, $0x06050504
DATA poptab<>+0x78(SB)/4, $0x06050504
DATA poptab<>+0x7C(SB)/4, $0x07060605
DATA poptab<>+0x80(SB)/4, $0x03020201
DATA poptab<>+0x84(SB)/4, $0x04030302
DATA poptab<>+0x88(SB)/4, $0x04030302
DATA poptab<>+0x8C(SB)/4, $0x05040403
DATA poptab<>+0x90(SB)/4, $0x04030302
DATA poptab<>+0x94(SB)/4, $0x05040403
DATA poptab<>+0x98(SB)/4, $0x05040403
DATA poptab<>+0x9C(SB)/4, $0x06050504
DATA poptab<>+0xA0(SB)/4, $0x04030302
DATA poptab<>+0xA4(SB)/4, $0x05040403
DATA poptab<>+0xA8(SB)/4, $0x05040403
DATA poptab<>+0xAC(SB)/4, $0x06050504
DATA poptab<>+0xB0(SB)/4, $0x05040403
DATA poptab<>+0xB4(SB)/4, $0x06050504
DATA poptab<>+0xB8(SB)/4, $0x06050504
DATA poptab<>+0xBC(SB)/4, $0x07060605
DATA poptab<>+0xC0(SB)/4, $0x04030302
DATA poptab<>+0xC4(SB)/4, $0x05040403
DATA poptab<>+0xC8(SB)/4, $0x05040403
DATA poptab<>+0xCC(SB)/4, $0x06050504
DATA poptab<>+0xD0(SB)/4, $0x05040403
DATA poptab<>+0xD4(SB)/4, $0x06050504
DATA poptab<>+0xD8(SB)/4, $0x06050504
DATA poptab<>+0xDC(SB)/4, $0x07060605
DATA poptab<>+0xE0(SB)/4, $0x05040403
DATA poptab<>+0xE4(SB)/4, $0x06050504
DATA poptab<>+0xE8(SB)/4, $0x06050504
DATA poptab<>+0xEC(SB)/4, $0x07060605
DATA poptab<>+0xF0(SB)/4, $0x06050504
DATA poptab<>+0xF4(SB)/4, $0x07060605
DATA poptab<>+0xF8(SB)/4, $0x07060605
DATA poptab<>+0xFC(SB)/4, $0x08070706
GLOBL poptab<>(SB), RODATA, $256

// func CountASMUsingTable(b Bitmap) int
TEXT ·CountASMUsingTable(SB),NOSPLIT,$0-28
MOVQ b+0(FP), DX // Head address to slice data
MOVL b+8(FP), CX // Byte length
MOVL $0, index+24(FP) // Return int
XORQ R8, R8
LEAQ poptab<>(SB), R9
XORQ AX, AX
count:
MOVB (DX), R8
INCQ DX
POPCNTW R8, AX
ADDW AX, index+24(FP)
SUBW $1, CX
MOVB (R9)(R8*1), AX
ADDL AX, index+24(FP)
SUBL $1, CX
JNE count
exit:
RET
14 changes: 13 additions & 1 deletion src/bitmap/bitmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@

package bitmap

import "math"
import (
"math"
"math/bits"
)

const bitsInByte = 8

Expand Down Expand Up @@ -99,3 +102,12 @@ func (b Bitmap) CountInverse() int {
func (b Bitmap) Clear() {
clear(b)
}

// Count returns the number of bits that are true.
func Count(b Bitmap) int {
count := 0
for i := 0; i < len(b); i++ {
count += bits.OnesCount(uint(b[i]))
}
return count
}
14 changes: 3 additions & 11 deletions src/bitmap/bitmap.pure.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,6 @@ func Check(b Bitmap, index int) bool {
return (b[index/bitsInByte] & (0x01 << (index % bitsInByte))) != 0
}

// Count returns the number of bits that are true.
func Count(b Bitmap) int {
count := 0
length := len(b) * bitsInByte
for i := 0; i < length; i++ {
if Check(b, i) {
count++
}
}
return count
}
func CountASM(b Bitmap) int { return Count(b) }

func CountASMUsingTable(b Bitmap) int { return Count(b) }
25 changes: 21 additions & 4 deletions src/bitmap/bitmap_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ import (

func seededRandomTestSet(maxMapLen int, seed int64) []int {
rnd := rand.New(rand.NewSource(seed))
mapLen := rnd.Intn(maxMapLen)
onBits := rnd.Intn(mapLen)
mapLen := rnd.Intn(maxMapLen) + 1
onBits := rnd.Intn(mapLen) + 1
choices := make([]int, onBits)
for i := range onBits {
choices[i] = i
Expand Down Expand Up @@ -82,16 +82,22 @@ func TestCheck(t *testing.T) {
}

func TestCount(t *testing.T) {
//seed := int64(1720034999808757000)
//seed := int64(1720034181037542000)
//sets := seededRandomTestSet(64, seed)
sets, seed := randomTestSet(64)
bits := New(slices.Max(sets) + 1)
for i := range sets {
bits.Set(sets[i])
}
if Count(bits) != len(sets) {
t.Fatalf("[Go] Count was expected to be %d but was %d for seed %d", len(sets), Count(bits), seed)
}
if legacyCount(bits) != len(sets) {
t.Fatalf("[Go] Count was expected to be %d but was %d for seed %d", len(sets), legacyCount(bits), seed)
}
if Count(bits) != len(sets) {
t.Fatalf("[Asm] Count was expected to be %d but was %d for seed %d", len(sets), Count(bits), seed)
if CountASM(bits) != len(sets) {
t.Fatalf("[Asm] Count was expected to be %d but was %d for seed %d", len(sets), CountASM(bits), seed)
}
}
func BenchmarkCheckGo(b *testing.B) {
Expand Down Expand Up @@ -138,6 +144,17 @@ func BenchmarkCountGo(b *testing.B) {
}

func BenchmarkCountAmd64(b *testing.B) {
sets := seededRandomTestSet(64, 99)
bits := New(slices.Max(sets) + 1)
for i := range sets {
bits.Set(sets[i])
}
for i := 0; i < b.N; i++ {
CountASM(bits)
}
}

func BenchmarkFastCountGo(b *testing.B) {
sets := seededRandomTestSet(64, 99)
bits := New(slices.Max(sets) + 1)
for i := range sets {
Expand Down

0 comments on commit 49c1533

Please sign in to comment.