From cc5df9f0e51956e5465d9c44d949e671806cac7c Mon Sep 17 00:00:00 2001 From: Brandon Liu Date: Wed, 30 Aug 2023 18:26:17 +0800 Subject: [PATCH] add filtering by tiles --- main.go | 5 +++-- pmtiles/stats.go | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/main.go b/main.go index 1e66b84..c3b023a 100644 --- a/main.go +++ b/main.go @@ -61,7 +61,8 @@ var cli struct { Stats struct { Input string `arg:"" type:"existingfile"` - } `cmd:"" help:"Prints p50, p99, and max tile size stats (after compression) for each and all zoom levels."` + Filter string `help:"Limit statistics to file with list of tiles (one Z/X/Y on each line)." type:"existingfile"` + } `cmd:"" help:"Prints p50, p99, and max tile size statistics (after compression) for each zoom level."` Verify struct { Input string `arg:"" help:"Input archive." type:"existingfile"` @@ -131,7 +132,7 @@ func main() { case "extract ": logger.Fatalf("This command is not yet implemented.") case "stats ": - err := pmtiles.Stats(logger, "file:///", cli.Stats.Input) + err := pmtiles.Stats(logger, "file:///", cli.Stats.Input, cli.Stats.Filter) if err != nil { logger.Fatalf("Failed to stats archive, %v", err) } diff --git a/pmtiles/stats.go b/pmtiles/stats.go index 2e03e1a..b141272 100644 --- a/pmtiles/stats.go +++ b/pmtiles/stats.go @@ -1,20 +1,25 @@ package pmtiles import ( - "github.com/RoaringBitmap/roaring/roaring64" + "bufio" "bytes" "context" "fmt" + "github.com/RoaringBitmap/roaring/roaring64" "github.com/dustin/go-humanize" "gocloud.dev/blob" "io" "log" + "os" "sort" + "strconv" + "strings" ) // do this in an inefficent way for now, storing all entries in memory // later on, evaluate t-digest for faster rank calculation -func Stats(logger *log.Logger, bucketURL string, file string) error { +func Stats(logger *log.Logger, bucketURL string, file string, filter string) error { + ctx := context.Background() bucket, err := blob.OpenBucket(ctx, bucketURL) if err != nil { @@ -44,6 +49,25 @@ func Stats(logger *log.Logger, bucketURL string, file string) error { return fmt.Errorf("Failed to read %s, %w", file, err) } + filter_set := roaring64.New() + if filter != "" { + file, err := os.Open(filter) + if err != nil { + log.Fatalf("failed to open file: %s", err) + } + defer file.Close() + scanner := bufio.NewScanner(file) + for scanner.Scan() { + parts := strings.Split(scanner.Text(), "/") + z, _ := strconv.ParseUint(parts[0], 10, 8) + x, _ := strconv.ParseUint(parts[1], 10, 32) + y, _ := strconv.ParseUint(parts[2], 10, 32) + filter_set.Add(ZxyToId(uint8(z), uint32(x), uint32(y))) + } + } else { + filter_set.AddRange(0, ZxyToId(header.MaxZoom+1, 0, 0)) + } + total_entries := make([]EntryV3, 0) var CollectEntries func(uint64, uint64) @@ -80,6 +104,10 @@ func Stats(logger *log.Logger, bucketURL string, file string) error { if offset_set.Contains(entry.Offset) { continue } + if !filter_set.Contains(entry.TileId) { + continue + } + offset_set.Add(entry.Offset) z, _, _ := IdToZxy(entry.TileId) if _, ok := stats[z]; !ok {