From 0619d50f9c444db208f95dcd1ff7d44aa86f93f5 Mon Sep 17 00:00:00 2001 From: DavePearce Date: Tue, 18 Jun 2024 08:57:46 +1200 Subject: [PATCH] Initial Support for Memory Mapped Files This brings in initial support for memory mapped files, developed by Tsvetan. The goal is subsequently to explore their use to improve reading of trace files. --- go.mod | 9 +++- go.sum | 18 ++++++++ pkg/mmap/block_device.go | 99 ++++++++++++++++++++++++++++++++++++++++ pkg/mmap/file.go | 51 +++++++++++++++++++++ pkg/test/ir_test.go | 2 +- pkg/test/mmap_test.go | 75 ++++++++++++++++++++++++++++++ 6 files changed, 252 insertions(+), 2 deletions(-) create mode 100644 pkg/mmap/block_device.go create mode 100644 pkg/mmap/file.go create mode 100644 pkg/test/mmap_test.go diff --git a/go.mod b/go.mod index 86fe477..d4276ce 100644 --- a/go.mod +++ b/go.mod @@ -4,12 +4,19 @@ go 1.22.1 require ( github.com/consensys/gnark-crypto v0.12.1 + github.com/pkg/errors v0.9.1 github.com/spf13/cobra v1.8.0 + github.com/stretchr/testify v1.8.2 + golang.org/x/sys v0.9.0 ) require ( github.com/bits-and-blooms/bitset v1.7.0 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/kr/text v0.2.0 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/rogpeppe/go-internal v1.12.0 // indirect github.com/spf13/pflag v1.0.5 // indirect - golang.org/x/sys v0.9.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 87b6142..e79ca3b 100644 --- a/go.sum +++ b/go.sum @@ -3,23 +3,41 @@ github.com/bits-and-blooms/bitset v1.7.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edY github.com/consensys/gnark-crypto v0.12.1 h1:lHH39WuuFgVHONRl3J0LRBtuYdQTumFSDtJF7HpyG8M= github.com/consensys/gnark-crypto v0.12.1/go.mod h1:v2Gy7L/4ZRosZ7Ivs+9SfUDr0f5UlG+EM5t7MPHiLuY= github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/leanovate/gopter v0.2.9 h1:fQjYxZaynp97ozCzfOyOuAGOU4aU/z37zf/tOujFk7c= github.com/leanovate/gopter v0.2.9/go.mod h1:U2L/78B+KVFIx2VmW6onHJQzXtFb+p5y3y2Sh+Jxxv8= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= +github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0= github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8= github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s= golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/pkg/mmap/block_device.go b/pkg/mmap/block_device.go new file mode 100644 index 0000000..a279747 --- /dev/null +++ b/pkg/mmap/block_device.go @@ -0,0 +1,99 @@ +package mmap + +import ( + "errors" + "io" + "runtime/debug" + "syscall" + + pkgErrors "github.com/pkg/errors" + "golang.org/x/sys/unix" +) + +// BlockDevice represents a mmap block device holding a reference to a file descriptor. +type BlockDevice struct { + FileDescriptor int + Data []byte +} + +// NewBlockDevice creates a BlockDevice from a file +// descriptor referring either to a regular file or UNIX device node. To +// speed up reads, a memory map is used. +func NewBlockDevice(fileDescriptor, sizeBytes int) (*BlockDevice, error) { + data, err := unix.Mmap(fileDescriptor, 0, sizeBytes, syscall.PROT_READ, syscall.MAP_SHARED) + if err != nil { + return nil, pkgErrors.Wrap(err, "failed to memory map block device") + } + + return &BlockDevice{ + FileDescriptor: fileDescriptor, + Data: data, + }, nil +} + +// ReadAt reads through the memory map at a given offset. +func (bd *BlockDevice) ReadAt(p []byte, off int64) (n int, err error) { + // Let read actions go through the memory map to prevent system + // call overhead for commonly requested objects. + if off < 0 { + return 0, syscall.EINVAL + } + + if off > int64(len(bd.Data)) { + return 0, io.EOF + } + // Install a page fault handler, so that I/O errors against the + // memory map (e.g., due to disk failure) don't cause us to + // crash. + old := debug.SetPanicOnFault(true) + defer func() { + debug.SetPanicOnFault(old) + + if recover() != nil { + err = errors.New("page fault occurred while reading from memory map") + } + }() + + n = copy(p, bd.Data[off:]) + if n < len(p) { + err = io.EOF + } + + return +} + +// WriteAt writes at a given offset. +func (bd *BlockDevice) WriteAt(p []byte, off int64) (int, error) { + // Let write actions go through the file descriptor. Doing so + // yields better performance, as writes through a memory map + // would trigger a page fault that causes data to be read. + // + // The pwrite() system call cannot return a size and error at + // the same time. If an error occurs after one or more bytes are + // written, it returns the size without an error (a "short + // write"). As WriteAt() must return an error in those cases, we + // must invoke pwrite() repeatedly. + // + // TODO: Maybe it makes sense to let unaligned writes that would + // trigger reads anyway to go through the memory map? + nTotal := 0 + + for len(p) > 0 { + n, err := unix.Pwrite(bd.FileDescriptor, p, off) + nTotal += n + + if err != nil { + return nTotal, err + } + + p = p[n:] + off += int64(n) + } + + return nTotal, nil +} + +// Sync synchronizes a file's in-core state with storage device. +func (bd *BlockDevice) Sync() error { + return unix.Fsync(bd.FileDescriptor) +} diff --git a/pkg/mmap/file.go b/pkg/mmap/file.go new file mode 100644 index 0000000..059464e --- /dev/null +++ b/pkg/mmap/file.go @@ -0,0 +1,51 @@ +package mmap + +import ( + pkgErrors "github.com/pkg/errors" + "golang.org/x/sys/unix" +) + +// File represents a memory-mapped file. +type File struct { + BlockDevice *BlockDevice + SectorSizeBytes int + SectorCount int64 +} + +// NewFile constructs a new instance of File. +func NewFile(path string, minimumSizeBytes int) (*File, error) { + fd, err := unix.Open(path, unix.O_CREAT|unix.O_RDWR|unix.O_APPEND, 0666) + if err != nil { + return nil, pkgErrors.Wrapf(err, "failed to open file %#v", path) + } + + // Use the block size returned by fstat() to determine the + // sector size and the number of sectors needed to store the + // desired amount of space. + var stat unix.Stat_t + if err := unix.Fstat(fd, &stat); err != nil { + return nil, pkgErrors.Wrapf(err, "failed to obtain size of file %#v", path) + } + + sectorSizeBytes := int(stat.Blksize) + sectorCount := int64((uint64(minimumSizeBytes) + uint64(stat.Blksize) - 1) / uint64(stat.Blksize)) + sizeBytes := int64(sectorSizeBytes) * sectorCount + + if err := unix.Ftruncate(fd, sizeBytes); err != nil { + return nil, pkgErrors.Wrapf(err, "failed to truncate file %#v to %d bytes", path, sizeBytes) + } + + bd, err := NewBlockDevice(fd, int(sizeBytes)) + + if err != nil { + return nil, err + } else if err := unix.Close(fd); err != nil { + return nil, err + } + + return &File{ + BlockDevice: bd, + SectorSizeBytes: sectorSizeBytes, + SectorCount: sectorCount, + }, nil +} diff --git a/pkg/test/ir_test.go b/pkg/test/ir_test.go index eef73e0..79b87a0 100644 --- a/pkg/test/ir_test.go +++ b/pkg/test/ir_test.go @@ -1,4 +1,4 @@ -package testA +package test import ( "bufio" diff --git a/pkg/test/mmap_test.go b/pkg/test/mmap_test.go new file mode 100644 index 0000000..ace1880 --- /dev/null +++ b/pkg/test/mmap_test.go @@ -0,0 +1,75 @@ +package test + +import ( + "os" + "path/filepath" + "runtime/debug" + "testing" + + "github.com/consensys/go-corset/pkg/mmap" + + "github.com/stretchr/testify/require" +) + +func Ignored_TestNewBlockDeviceFromFile(t *testing.T) { + minSizeBytes := 123456 + blockDevicePath := filepath.Join(t.TempDir(), "test_blockdevice") + + println(blockDevicePath) + + mmapFile, err := mmap.NewFile(blockDevicePath, minSizeBytes) + require.NoError(t, err) + + sectorSizeBytes := mmapFile.SectorSizeBytes + sectorCount := mmapFile.SectorCount + blockDevice := mmapFile.BlockDevice + // The sector size should be a power of two, and the number of + // sectors should be sufficient to hold the required space. + require.LessOrEqual(t, 512, sectorSizeBytes) + require.Equal(t, 0, sectorSizeBytes&(sectorSizeBytes-1)) + require.Equal(t, int64((minSizeBytes+sectorSizeBytes-1)/sectorSizeBytes), sectorCount) + + // The file on disk should have a size that corresponds to the + // sector size and count. + fileInfo, err := os.Stat(blockDevicePath) + require.NoError(t, err) + require.Equal(t, int64(sectorSizeBytes)*sectorCount, fileInfo.Size()) + + // Test read, write and sync operations. + n, err := blockDevice.WriteAt([]byte("Hello"), 12345) + require.Equal(t, 5, n) + require.NoError(t, err) + + var b [16]byte + n, err = blockDevice.ReadAt(b[:], 12340) + require.Equal(t, 16, n) + require.NoError(t, err) + require.Equal(t, []byte("\x00\x00\x00\x00\x00Hello\x00\x00\x00\x00\x00\x00"), b[:]) + + require.NoError(t, mmapFile.BlockDevice.Sync()) + + // Truncating the file will cause future read access to the + // memory map underneath the BlockDevice to raise SIGBUS. This + // may also occur in case of actual I/O errors. These page + // faults should be caught properly. + // + // To be able to implement this, ReadAt() temporary enables the + // debug.SetPanicOnFault() option. Test that the original value + // of this option is restored upon completion. + require.NoError(t, os.Truncate(blockDevicePath, 0)) + + debug.SetPanicOnFault(false) + + n, err = blockDevice.ReadAt(b[:], 12340) + require.NoError(t, err) + + require.False(t, debug.SetPanicOnFault(false)) + require.Equal(t, 0, n) + debug.SetPanicOnFault(true) + + n, err = blockDevice.ReadAt(b[:], 12340) + + require.True(t, debug.SetPanicOnFault(false)) + require.Equal(t, 0, n) + require.Error(t, err, "page fault occurred while reading from memory map") +}