Skip to content

Commit

Permalink
Make fingerprinting faster (#36073) (#36080)
Browse files Browse the repository at this point in the history
By using a buffer of the fingerprint's size. This descreases the
amount of syscalls for reading the file.

Also added the benchmark for tracking the function's performance.

(cherry picked from commit 061cb88)

Co-authored-by: Denis <denis.rechkunov@elastic.co>
  • Loading branch information
mergify[bot] and rdner authored Jul 17, 2023
1 parent 132226c commit 14aef55
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 4 deletions.
9 changes: 5 additions & 4 deletions filebeat/input/filestream/fswatch.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package filestream

import (
"bufio"
"crypto/sha256"
"encoding/hex"
"fmt"
Expand Down Expand Up @@ -453,7 +454,6 @@ func (s *fileScanner) toFileDescriptor(it *ingestTarget) (fd loginp.FileDescript
return fd, fmt.Errorf("filesize of %q is %d bytes, expected at least %d bytes for fingerprinting", fd.Filename, fileSize, minSize)
}

h := sha256.New()
file, err := os.Open(it.originalFilename)
if err != nil {
return fd, fmt.Errorf("failed to open %q for fingerprinting: %w", it.originalFilename, err)
Expand All @@ -467,9 +467,10 @@ func (s *fileScanner) toFileDescriptor(it *ingestTarget) (fd loginp.FileDescript
}
}

r := io.LimitReader(file, s.cfg.Fingerprint.Length)
buf := make([]byte, h.BlockSize())
written, err := io.CopyBuffer(h, r, buf)
bfile := bufio.NewReaderSize(file, int(s.cfg.Fingerprint.Length))
r := io.LimitReader(bfile, s.cfg.Fingerprint.Length)
h := sha256.New()
written, err := io.Copy(h, r)
if err != nil {
return fd, fmt.Errorf("failed to compute hash for first %d bytes of %q: %w", s.cfg.Fingerprint.Length, fd.Filename, err)
}
Expand Down
28 changes: 28 additions & 0 deletions filebeat/input/filestream/fswatch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -757,3 +757,31 @@ func filenames(m map[string]loginp.FileDescriptor) (result string) {
}
return result
}

func BenchmarkToFileDescriptor(b *testing.B) {
dir := b.TempDir()
basename := "created.log"
filename := filepath.Join(dir, basename)
err := os.WriteFile(filename, []byte(strings.Repeat("a", 1024)), 0777)
require.NoError(b, err)

s := fileScanner{
paths: []string{filename},
cfg: fileScannerConfig{
Fingerprint: fingerprintConfig{
Enabled: true,
Offset: 0,
Length: 1024,
},
},
}

it, err := s.getIngestTarget(filename)
require.NoError(b, err)

for i := 0; i < b.N; i++ {
fd, err := s.toFileDescriptor(&it)
require.NoError(b, err)
require.Equal(b, "2edc986847e209b4016e141a6dc8716d3207350f416969382d431539bf292e4a", fd.Fingerprint)
}
}

0 comments on commit 14aef55

Please sign in to comment.