Skip to content

Commit

Permalink
implement pmtiles extract [#31, #52] (#62)
Browse files Browse the repository at this point in the history
* implement pmtiles extract [#31, #52]

* Experimental cli support for extracting a region from a larger archive, given a maxzoom and GeoJSON multipolygon region.
* Limited to credentialed buckets or local files now, public HTTP to come later
* Limited to a single download thread
* Change directory optimization to be faster and match Java implementation, affects root/leaf sizes

* Finish initial extract [#31]

* include the DstOffset so we can multithread downloads later
* set header statistics
* implement --dry-run
* add logging messages for user feedback
  • Loading branch information
bdon authored Sep 4, 2023
1 parent 98576cc commit 9bc8c22
Show file tree
Hide file tree
Showing 7 changed files with 661 additions and 137 deletions.
20 changes: 13 additions & 7 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,13 @@ var cli struct {
} `cmd:"" help:"Fetch one tile from a local or remote archive and output on stdout."`

Extract struct {
Input string `arg:"" help:"Input local or remote archive."`
Output string `arg:"" help:"Output archive." type:"path"`
Bucket string `help:"Remote bucket of input archive."`
Region string `help:"local GeoJSON Polygon or MultiPolygon file for area of interest." type:"existingfile"`
Maxzoom int `help:"Maximum zoom level, inclusive."`
DryRun bool `help:"Calculate tiles to extract based on header and directories, but don't download them."`
Input string `arg:"" help:"Input local or remote archive."`
Output string `arg:"" help:"Output archive." type:"path"`
Bucket string `help:"Remote bucket of input archive."`
Region string `help:"local GeoJSON Polygon or MultiPolygon file for area of interest." type:"existingfile"`
Maxzoom uint8 `help:"Maximum zoom level, inclusive."`
DryRun bool `help:"Calculate tiles to extract, but don't download them."`
Overfetch float32 `default:0.1 help:"What ratio of extra data to download to minimize # requests; 0.2 is 20%"`
} `cmd:"" help:"Create an archive from a larger archive for a subset of zoom levels or geographic region."`

Verify struct {
Expand Down Expand Up @@ -119,7 +120,10 @@ func main() {
logger.Printf("Serving %s %s on port %d with Access-Control-Allow-Origin: %s\n", cli.Serve.Bucket, cli.Serve.Path, cli.Serve.Port, cli.Serve.Cors)
logger.Fatal(http.ListenAndServe(":"+strconv.Itoa(cli.Serve.Port), nil))
case "extract <input> <output>":
logger.Fatalf("This command is not yet implemented.")
err := pmtiles.Extract(logger, cli.Extract.Bucket, cli.Extract.Input, cli.Extract.Maxzoom, cli.Extract.Region, cli.Extract.Output, cli.Extract.Overfetch, cli.Extract.DryRun)
if err != nil {
logger.Fatalf("Failed to extract, %v", err)
}
case "convert <input> <output>":
path := cli.Convert.Input
output := cli.Convert.Output
Expand Down Expand Up @@ -160,6 +164,8 @@ func main() {
logger.Fatalf("Failed to upload file, %v", err)
}
case "verify <input>":
// check clustered
// check counts (addressed tiles, tile entries, # tile contents)
logger.Fatalf("This command is not yet implemented.")
case "version":
fmt.Printf("pmtiles %s, commit %s, built at %s\n", version, commit, date)
Expand Down
4 changes: 2 additions & 2 deletions pmtiles/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,14 +114,14 @@ func Convert(logger *log.Logger, input string, output string, deduplicate bool,
func add_directoryv2_entries(dir DirectoryV2, entries *[]EntryV3, f *os.File) {
for zxy, rng := range dir.Entries {
tile_id := ZxyToId(zxy.Z, zxy.X, zxy.Y)
*entries = append(*entries, EntryV3{tile_id, rng.Offset, rng.Length, 1})
*entries = append(*entries, EntryV3{tile_id, rng.Offset, uint32(rng.Length), 1})
}

var unique = map[uint64]uint32{}

// uniqify the offset/length pairs
for _, rng := range dir.Leaves {
unique[rng.Offset] = rng.Length
unique[rng.Offset] = uint32(rng.Length)
}

for offset, length := range unique {
Expand Down
43 changes: 25 additions & 18 deletions pmtiles/directory.go
Original file line number Diff line number Diff line change
Expand Up @@ -313,24 +313,31 @@ func build_roots_leaves(entries []EntryV3, leaf_size int) ([]byte, []byte, int)
}

func optimize_directories(entries []EntryV3, target_root_len int) ([]byte, []byte, int) {
test_root_bytes := serialize_entries(entries)

// Case1: the entire directory fits into the target len
if len(test_root_bytes) <= target_root_len {
return test_root_bytes, make([]byte, 0), 0
} else {

// TODO: case 2: mixed tile entries/directory entries in root

// case 3: root directory is leaf pointers only
// use an iterative method, increasing the size of the leaf directory until the root fits
leaf_size := 4096
for {
root_bytes, leaves_bytes, num_leaves := build_roots_leaves(entries, leaf_size)
if len(root_bytes) <= target_root_len {
return root_bytes, leaves_bytes, num_leaves
}
leaf_size *= 2
if len(entries) < 16384 {
test_root_bytes := serialize_entries(entries)
// Case1: the entire directory fits into the target len
if len(test_root_bytes) <= target_root_len {
return test_root_bytes, make([]byte, 0), 0
}
}

// TODO: case 2: mixed tile entries/directory entries in root

// case 3: root directory is leaf pointers only
// use an iterative method, increasing the size of the leaf directory until the root fits

var leaf_size float32
leaf_size = float32(len(entries)) / 3500

if leaf_size < 4096 {
leaf_size = 4096
}

for {
root_bytes, leaves_bytes, num_leaves := build_roots_leaves(entries, int(leaf_size))
if len(root_bytes) <= target_root_len {
return root_bytes, leaves_bytes, num_leaves
}
leaf_size *= 1.2
}
}
2 changes: 1 addition & 1 deletion pmtiles/downloader.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ func DownloadParts(getter func (Range) []byte, ranges []Range, numThreads int) c

// an number for overhead: 0.2 is 20% overhead, 1.0 is 100% overhead
// a number of maximum chunk size: n chunks * threads is the max memory usage
// store the smallest gaps in a heap
// store the smallest gaps in a heap; merge ranges until overhead budget is reached
func DownloadBatchedParts(getter func (Range) []byte, ranges []Range, overhead float32, maxSizeBytes int, numThreads int) chan []byte {
orderedOutput := make(chan []byte, 8)
return orderedOutput
Expand Down
Loading

0 comments on commit 9bc8c22

Please sign in to comment.