From ded75c7f5a5a6fe4d214fda58da20b1947fdeece Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Fri, 31 May 2024 11:36:09 -0600 Subject: [PATCH] feat: add stac-arrow --- .github/workflows/ci.yml | 1 + Cargo.toml | 2 + README.md | 1 + stac-arrow/Cargo.toml | 23 ++++ stac-arrow/README.md | 42 +++++++ stac-arrow/data/naip.parquet | Bin 0 -> 31869 bytes stac-arrow/src/lib.rs | 222 +++++++++++++++++++++++++++++++++++ 7 files changed, 291 insertions(+) create mode 100644 stac-arrow/Cargo.toml create mode 100644 stac-arrow/README.md create mode 100644 stac-arrow/data/naip.parquet create mode 100644 stac-arrow/src/lib.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cb6dcbc36..468d0e9c2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,6 +24,7 @@ jobs: - "-p stac -F reqwest" - "-p stac-api" - "-p stac -p stac-api -F geo" + - "-p stac-arrow" - "-p stac-async" - "-p stac-cli --no-default-features" - "-p stac-server --no-default-features" diff --git a/Cargo.toml b/Cargo.toml index ec37e9c39..cf6baafae 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,7 @@ members = [ "stac", "pgstac", "stac-api", + "stac-arrow", "stac-async", "stac-cli", "stac-server", @@ -12,6 +13,7 @@ members = [ default-members = [ "stac", "stac-api", + "stac-arrow", "stac-async", "stac-cli", "stac-server", diff --git a/README.md b/README.md index 7a7567e3c..2f4dbe996 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,7 @@ This monorepo contains several crates: | [stac](./stac/README.md) | Core data structures and synchronous I/O | [![docs.rs](https://img.shields.io/docsrs/stac?style=flat-square)](https://docs.rs/stac/latest/stac/)
[![Crates.io](https://img.shields.io/crates/v/stac?style=flat-square)](https://crates.io/crates/stac) | | [pgstac](./pgstac/README.md) | Bindings for [pgstac](https://github.com/stac-utils/pgstac) | [![docs.rs](https://img.shields.io/docsrs/pgstac?style=flat-square)](https://docs.rs/pgstac/latest/pgstac/)
[![Crates.io](https://img.shields.io/crates/v/pgstac?style=flat-square)](https://crates.io/crates/pgstac) | | [stac-api](./stac-api/README.md) | Data structures for the [STAC API](https://github.com/radiantearth/stac-api-spec) specification | [![docs.rs](https://img.shields.io/docsrs/stac-api?style=flat-square)](https://docs.rs/stac-api/latest/stac_api/)
[![Crates.io](https://img.shields.io/crates/v/stac-api?style=flat-square)](https://crates.io/crates/stac-api) | +| [stac-arrow](./stac-arrow/README.md) | Read STAC data stored in [arrow](https://arrow.apache.org/) | [![docs.rs](https://img.shields.io/docsrs/stac-arrow?style=flat-square)](https://docs.rs/stac-arrow/latest/stac_arrow/)
[![Crates.io](https://img.shields.io/crates/v/stac-arrow?style=flat-square)](https://crates.io/crates/stac-arrow) | | [stac-async](./stac-async/README.md) | Asynchronous I/O with [tokio](https://tokio.rs/) | [![docs.rs](https://img.shields.io/docsrs/stac-async?style=flat-square)](https://docs.rs/stac-async/latest/stac_async/)
[![Crates.io](https://img.shields.io/crates/v/stac-async?style=flat-square)](https://crates.io/crates/stac-async) | | [stac-cli](./stac-cli/README.md)| Command line interface | [![docs.rs](https://img.shields.io/docsrs/stac-cli?style=flat-square)](https://docs.rs/stac-cli/latest/stac_cli/)
[![Crates.io](https://img.shields.io/crates/v/stac-cli?style=flat-square)](https://crates.io/crates/stac-cli) | | [stac-server](./stac-server/README.md)| STAC API server with multiple backends | [![docs.rs](https://img.shields.io/docsrs/stac-server?style=flat-square)](https://docs.rs/stac-server/latest/stac_server/)
[![Crates.io](https://img.shields.io/crates/v/stac-server?style=flat-square)](https://crates.io/crates/stac-server) | diff --git a/stac-arrow/Cargo.toml b/stac-arrow/Cargo.toml new file mode 100644 index 000000000..c96280493 --- /dev/null +++ b/stac-arrow/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "stac-arrow" +version = "0.1.0" +authors = ["Pete Gadomski "] +edition = "2021" +description = "Read STAC data stored in Apache Arrow" +homepage = "https://github.com/stac-utils/stac-rs" +repository = "https://github.com/stac-utils/stac-rs" +keywords = ["geospatial", "stac", "metadata", "geo", "arrow"] +categories = ["science", "data-structures"] + +[dependencies] +arrow = { version = "51", features = ["chrono-tz"] } +arrow-json = "51" +log = "0.4" +serde_json = "1" +stac = { version = "0.7", path = "../stac" } +thiserror = "1" +wkb = "0.7" + +[dev-dependencies] +parquet = "51" +stac-validate = { version = "0.1", path = "../stac-validate" } diff --git a/stac-arrow/README.md b/stac-arrow/README.md new file mode 100644 index 000000000..29a2cae6b --- /dev/null +++ b/stac-arrow/README.md @@ -0,0 +1,42 @@ +# stac-arrow + +[![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/stac-utils/stac-rs/ci.yml?branch=main&style=for-the-badge)](https://github.com/stac-utils/stac-rs/actions/workflows/ci.yml) +[![docs.rs](https://img.shields.io/docsrs/stac-arrow?style=for-the-badge)](https://docs.rs/stac-arrow/latest/stac_arrow/) +[![Crates.io](https://img.shields.io/crates/v/stac-arrow?style=for-the-badge)](https://crates.io/crates/stac-arrow) +![Crates.io](https://img.shields.io/crates/l/stac-arrow?style=for-the-badge) +[![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.1-4baaaa.svg?style=for-the-badge)](./CODE_OF_CONDUCT) + +Read [STAC](https://stacspec.org/) data stored in [arrow](https://arrow.apache.org/). + +## Usage + +To use the library in your project: + +```toml +[dependencies] +stac-arrow = "0.1" +``` + +## Examples + +```rust +use std::fs::File; +use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; + +let file = File::open("data/naip.parquet").unwrap(); +let reader = ParquetRecordBatchReaderBuilder::try_new(file) + .unwrap() + .build() + .unwrap(); +let mut items = Vec::new(); +for result in reader { + items.extend(stac_arrow::record_batch_to_items(result.unwrap()).unwrap()); +} +assert_eq!(items.len(), 5); +``` + +Please see the [documentation](https://docs.rs/stac-arrow) for more usage examples. + +## Other info + +This crate is part of the [stac-rs](https://github.com/stac-utils/stac-rs) monorepo, see its README for contributing and license information. diff --git a/stac-arrow/data/naip.parquet b/stac-arrow/data/naip.parquet new file mode 100644 index 0000000000000000000000000000000000000000..e54d47a2e59330eb21c3d47977f8707542251f3f GIT binary patch literal 31869 zcmeHw3ve6fdEhPyN+cyYv@4*HPqt~s-tr=vco8I_Se6z536dr#5~K)_w#(v00OaB! zJO~2HgH01xab1n$NnMYd#<3zlqVY|fT;t^2RAC&~^=UF5Hna9?gf!fFBO67>#iJj0G6wIHjHT)4dt z@Fk1IT;ZU_g5Yn9FBjt(L{R8Xq>9N>q&tZ~X%O#T)2jy4Ndxz}9o(x16z{voiLNToHyD>k!L(76kFzT` z0E^khx%8Z)$LexfZF8COoULchHfOimtkxd8(|t2zD8xTm z|I|3i4Jtq|vJQ}0-L1s~bnNP+4%OIra**L(x<;0`xgixJmV3rv`d$Nfau4@UyHK=h zh+9XfFP@#ol^v3N{%7Ofdw?z$OyI>%bGt?x(;1mB09x(CV{^hYU}Z`6GAU{yE`1 z+`ro0^WRe|{E>hDlTZG=o0(a?_xv{=3c~%T{^3K1op5jc=qJAYM=<`5BaA!DTsZQ4 z>YIOX?w6s}(sJ`pp3L$`PORP@I}i8Qoe8{^T;`9wbN{ZL?sr0~EnoQI-vr^Fz=8YS zrO^L)2*y8ukO11J63-lTD|$kPyX?fK;!fGJ{dd@_rv}_e|R2v@c#bl zr;hw0wE8!<9DW^ny8FS#B`4em{^7s<UUT8@37$uhkx)= zY4SYWPyN*g9#6o$!asiX5dW*t>ch@2tsc>7O-2dIefq(vEfTNy|bm-XFJuLbr zD=Oo%g;=Kqb1it3E$Uh=T-3E%x~OZlcv07qeCy`})wMSM!1aoaHz(rRbiA0a5KDhz zm+8kX+$$#TnKrcaTDtzNv90TAs}05%8%B+tBocj&z2K2NNAeWO(C|Zj zwcoUCfX|(8zkLmcxX#9=o$V+2`&^x0_{&F5SkJdG4EIFGeq$?LLG&9Uk?e9!R_Qc7 zY%rZRaBsGAAHQ+`7EtSo{@LF!e!FqhXeOfnVatVKFAyc=%bXC-#Eap4C7Mm=O2v4- zJDrN=vxV$@u^XB!;arO3e8}yVXjTwt8ZTHf;Z&}N`Q8vfy12sZS$V*M8L?ndB?WEN z&pszG=2*BG253!p?X{%Cxx-Q8|6vZXzhF4F$Q;TSn2WD5nb$B0!Nv2hF?W!b{Alz5*%xp)Ja6ytzZ(RK>$;E z$U;7ZGUiaMht`!UX!GV3Oja#FI`MNBEzWK*v@ka}woNhNoox>@kL`js?*1-0`V2J* zsmx+Qu{lFBAD@RvWH|tB_fB0hhC(j=L;n&i@0oF)wI@e~RshBDXSTxy9 z-(gMPXSr8;xCeXo_pXOfmz})vw@0C9!_}>8ybXv7=W;?S8YZO%)*pKh?qzmEsy)D( ztq^yaBCO*r$zoanRoRWbDrT9^7P-^BR}BZJBP^J{?{{RLsh>5p9BF8|<$)s&4EJr< z+6+rYqTTd#gXuF3+}VEanLbq8hB$TEhRif-Y$j~qw2UjHd_0DPCGzoj2CXw9l;YT) zN#%jjE~8aRWPlWGC}uX`7#L!j!U=M&+R?D{4g6uaw{DmDN$v<23h`p0DU}W<;?42w z!ALk0Q^k*6kSfyI%v>g%MzwaDUSv%#v)nr$zUFpC59{%njhWVKzTi`1eFGTaH9NH7% z>wB!}RhIkMVQ%$~{kYX*3;b^p3U=Us8iZ6 z+}uNMqBI4SE+=m`8_&R!juw|+F+7t7ilo$ssViP8#|y>oh>(rIQX(JkE~hfFY`M@4 z3l0l=bI7t}wc269V8P-O7Yam*X$?NsRb^ZZ&7O0rW^KSh1y+A&0jpus?J$IbB_&W3K>vTy#t{8jZ1r~%?U$Uk*O&-#}27QEDi9%XOYZn1XrTJ>1(=MzK z9Y|j+HyUY4{q~)*5%kwMu$1jvP516Gy|9CO+sB<9K#gB5g8y~MN7snkjC-ZX)4U&J z1}@0JbRbdI>~C><$ zH_rXpILciOT$jpQjGYpd*Lkl20s=nwaG>3D)Yo22K_=C1VO~=^!d!O`AjA#LMx_3Q zDVh3OQ;{=utLf{k=>?X1VUk;!M4IaWY7pr1v20wD__%$!ABZ3c#RAql|G{=u&OHP< zx5XHOJ->Ni?@(08mSVSMAq=NeRgw?tcWnFD0Yss%0n2>JGwVcner633O1Zz)^tDFQ zbB)~TAosvDO7M~~Uk1&(TKT8l#xFyGNCWwQ*>T~_W5AmfDmPOg))X<~TDUuxNgP&| zy_+p&IJ(j}MD$5-<|3)sGHtlE&ttpQ?ya_7vL1s(J z;99n%vb)vv5^H*endV=NNDsf*fAsY%*H3D72A8aw6m5Z1EYIx>2&>bT}wRSJ$ z5*JHMzC<=TOU%n9=Hi>oi*M4DMXnfoMYuL^NQA!|veLC}jZ@0f(kW~dQJR%Y z(1A8`p8AAx>_G~h$`sw0d6qSOndKh(ILp2Ido1x#nG`j)&2cd$>0B?g$erdE`LZsw zoI0t{FU3d|{Yr48QRVdSv)td^OR1JYs-f#rz2Dd_1;lH5a08jfyoFp`FCw-oy=K*Q z9pNi~upZ$`G^82of%{nQyPqU5kRb(}Emr|pmg1%< zJq2<#>i{J4;*-qjCz&?b5VG~aHT>QOpJOcio*H9GEug-6eM~R9e*V&{fvs_?4I!If zYn!)?N^OMRJe8o5+8BQCLs}DVmZYY{@Qc@(&b4r7A7Htc?k5(!ZQTAl(21_sYYw2H zQVKeG0(OF+!%Y8xezp1GzDh}1ongxThP6BPKA7j?E)N%dv>9VESs>MaF4lD3LBb76gPlaT1Zu7yX6q41?WXYcF3Tryaa?d@@s}KNk+y6J*A^mGmfACMX}ZtNC9 zr;fwr(@!D+_*=a|0T$i4laS?)()lFYRh*%~D1%C#l}bfl0UI7O{7SEMQG<}FW{~XJG_Bmpx5(Hf$R#@ZRqi{Y74^JFv zK63-p#2WsWxoM|?d2CM`^E3GO2>koOp0?Zn6pszV&yURXFVOk{W4CgE+l;fxkGc5= z;usW%wiN~$$MIl-eB8RR>Ey@0bO#)IIe$)(et#>mr$REEiz|neKoNUQ&o`J(H*lx_ zg5{q2Ix0eoV-3u@GI)hGJ_L?>JrP0@$YKo_4ma)G!|q0RZP{hG@C~-P84MO&v~2)g zc5Vec4t%o{J+civ5_}Kcu)WLs=svaYO&e#sjI;9TK@^W-_>d^RfpltHq2<6+c%*30 z#sBv&zx?@4XdSbShCOZ9d9Gz9Y$mqtH-GP1=2N!o*|z`it!qJU&wWefhu09f74zXt zVLqEr*T`j+|pv%y?{rXIeRv19ahnB+8=3DNj92?7ugCaD3!*>Lr(e2kU>sN^7}E~r+d z4ME{BdiYIm9Kyg+#%2*a9*a|>!~JaaJ*-#ZAd|XsFJ*O zEkf}f0$WLpRKuziq<~b#Z(W#3r0kD%K}&anLWHSx-~N5?{MMEF4i=}$7s3t4r$8Y zJtc=&bv*Ja9oq_}tE^b)r%M~%4m zPOoU*4Imwu(X;IBJY>RM{SzVFHk{qy-g~&Nd{?<9MB^jt?Ky;^XUf~~Je>Pi_2`+~ z-bcdRm3*AbUG>MU-1a;U<*wkPOm5pB@^Rbr5RSVtkI~+Rr&jOM10T~vpJz?2T(fC4 zyBEG*d%Jl@4!!~$E||Y`-H8tPUT1uH4i5R1(wRcX!S=gObSy#}Ty?;ss6@P@tGxps zNkBuUBnWT^k1X>=xV4h&Yz{7+=(&MNBOf z9RvwLsG&n{-7F=AVjdCJQi5{&a2F6Ha*Ps_-|K~AYjM{1UIpCB8?e?uUpt_V0(@!K zU7|AXK>(SJwV{zgDyBtE+v>2bsrAjtT6#B+7Mr@K4rioN1S`En&hY4j7Ms*shl$8? zUdV=vy*-yKOY#N~ElRmr4=44ROGZhbg=%ps?R7{MAH5W7Q4SqzQP%d=p}kz|Dd?Cf zJexx*>M)5)#GYCXRUNAEq;Ux8OqG^Fbx$2qsjFDk;+6V(d+y2lJY#f&dQ*t$|zWs>7Yi zfahw3FE_8pu5#i>i?g<`4(XK;GdYo?#Y@B>_toJgf+nn8a^N8FRj=zrK+?*|7tbd0;aoBm^#mrdLt2RheUt6|#C#!Il+ttX zD$sa3B4AKZmMg(*UWl~=j2ImHjJ8i!3UIEb-RA1=X_vm#s7#B-rNjdpToM9;r)G%tqix>oLd5K>+S2=68yei`&P!=Z4MWVN5hQ^LMm6t z!V0NIGqU=zFp?=6Ll{s1 zokgO#f$>Qn(nJcSgY7*IdoLz-IpoMS(|A^>B&c9SYVW!YUfR>0_-23^J3P@oc@94^59Ld3poiv*q3;e~7h=BfL8hldKO*`O1~1agsdG<7t^r>3n% z&v2@o7*3B>q7{G%V@8I?7Q(*i;>;w$k&O)n%F$|eY1HAaMC{p8#4(Z?b%8MC3rN@SBJgI9Rc1o&Y&baPJtp{5qaHpL zNe>ohDb7&91_JX=koH;Mpfwh>%>(V#*`O^I@f`!&-F?GpXEGARYv9A~sziJfiGY22 zY1TJw4F~;+V-BGj13rem15SXIP|qj$g1}>Xn({qZ+%mj72LqKEpeY=h$PH(#1(cnd z2ar=?*5e)m9W8>6ax)e8R6I0hoef$O{;9>DF%O>&RHIeDXE8NDSw?-8W`Z^v(T?|jViZj97 zQdDpy<354-%-Dk8anw6d9jhQbp%g>=uusp=rk9i82SNneX3v!kMTf?g!off?;vxPc(-?Hr*qPeC3IID78=77OI2aF-GwG4aY-l!z?Gbwqp=k>I ziTZpX7fnwufNg=#5?@E(wuUJ_8}Mo+{+bR5vyKsAR>#iN{uc4MO0#wWd^I3Y_=BmL z%!m+63rQ6YKhlA+0$EgMgJa;Ut`fyFos2rD&&Pt!Tnu&S8CjeS#$epB)cmxK$T$iy zFiK(opPKZ!D&bIWDUv}sxgoaM$`QL$h-4-b5&KNSZ=Fa)#W*mUqA~%VlLVjWV`6)1 zChh9McJRY$!~rp6(503q&|-D^Jzy7^m;in_U&AlV8=V^HA^mCRQZy6DhU{Q7K2aXf ze{_mRod`1!1E6myl8#Bb0(*kFLl8qeBeLzzgn$OZU%*M|U7VgcW{ncr&DbHPV65~` zRQ(IHE{q?;X^a&%AvOeg!Vz#vGCJmS72?4d>KNh6d zp0VlxME3z0Yjqx->~V~FXJFh+Vc6@pjV|zIe|5MT3Q=89|B&owxIDTrQSwv!SwK0* zfPPV?g>Y~LV*Uu_1!;A-laX{F33&(Uvd@M_sZb88*DQ=;>E{SgNr3%O^qepin z12G-_qBggpUKT=*Ss|JkBXLbvo>YIDaZw-W+&5TN`v}Gx(Wj6P(2=bipLQv07SJ`U z!7%n?Kd!lwxGo`UB%-%vO%4XWk~21!5XZbh=!A9I8ZumSY(}(en~Ih+t8+lOT{7IiZMp3*R(4Y#vDjv)~56dIdv&c$EU-=>B8)wOIf$2M^X_- zg3zlcCs?0UV}r#s&%OC(34Xk40$PN7v(W{ zC2eZ33Tr53Ej|~<}QJZLswehQOID_;|_u_hwtdE50V-pp~XZ5ud z)QRB`-y09Y`jW1JbY!X-qbJ+T%E4>oGEz4mRmBY*-_?xS?${C?8d(Z2Jy&q zwW#8FUEMJNds(Dr$0{R$9}|3*=f}$bZG+<2n6V>pi6eB-#O=Oz1lVYm{v3Xi##EEc$NdvT+WN5BLMe3Gp}01UHe zeS>>%y=u8b*96X=9FM^MA>R235FbvGw<<+=P=~_46X}^=ny2FvZfhNW#DrYH55Yf! zS7d*4sK$o^Z6u>({a8Y@>JyiZG`yq3IU!GUxZqf*> zfgR2ui=-bKp|mEb;V;imPY2-df=J(h$WOg|(UjepsC|MTSjvd@#Fr0@Q+r-dpIDDa zeB!&KV6oV$6237)6x~#(qC}#638bxn{xythp<44{7u3mBSesO#F^c#Fgh%9Q zMM?3a_(Az2@G5vqgzP5w1PIxbcf(%PGW_pFur?$!Ju?W;D#N4tq2q}>5EAv5A+p~> z?#npdBFk@(GC=^6OEQQ@2LODbKml!JzAeiaO%wQqXBRWL9t$D> z58>A?!`qyED`dR4cYS{AA9PjFv@NH7Q1sqSG z2Z4Adti+#>!lh`xBs3ks+cErb_u%Iget4>3x}k+8op4SS}6d5BfG|0={#+`Pw% z@cYQ~CLFyvc{_0Y@>JiJhv*;;}7&p_%>(%o&t_{ z%JL`^{m~>vrHJyR@fQPlaY&6+Kg7RD(xzIV`bQu%5-awh zS|1JIeCq)#_5qQ2s_8$0I#OJKm z;Uf&G@&%d_dA?DEdYA4QPk1H}Uq$AZ%1gC=AgO~G9nCj%4+uezB0h^%qSre~=*!|K z%{M4-50Os<8`qJS3-MQ~V9FcHi|nT&INpupUB@ioN_e-1eGt;|845h}#bE@%;-DKXg6lR_Oz$ zQhz2=A^U*@v8NyLxnz8FJ+9IR8Ab6)>v6IU1Cl6iT-}U-4O9Lm2$A*kNvOy30phbB z3KZ8It_k>1%Tuv$GKgjg*vAEX1)qleFO6Tf{y^4;pmq+9&BOB)s3YK+H$1b8mfdBX ze_W<_o%p5luQZN#dErB4FEoFt>>ZK1>-YHy7ZsepjCB^SUuSqw4XLYOvZPeWp6dON zz$edC=twe!+B*S#h%vXrf5>;d6ffQu&E;+j<8i!p@t8 = std::result::Result; + +/// Converts a [RecordBatch] into a vector of [Items](Item). +/// +/// # Examples +/// +/// ``` +/// use std::fs::File; +/// use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; +/// +/// let file = File::open("data/naip.parquet").unwrap(); +/// let reader = ParquetRecordBatchReaderBuilder::try_new(file) +/// .unwrap() +/// .build() +/// .unwrap(); +/// let mut items = Vec::new(); +/// for result in reader { +/// items.extend(stac_arrow::record_batch_to_items(result.unwrap()).unwrap()); +/// } +/// assert_eq!(items.len(), 5); +/// ``` +pub fn record_batch_to_items(mut record_batch: RecordBatch) -> Result> { + let index = record_batch.schema().index_of("geometry")?; + let geometry = record_batch.remove_column(index); + // TODO allow for i64 offsets + let geometry = geometry + .as_binary_opt::() + .ok_or_else(|| Error::NonBinaryGeometryColumn)?; + let mut writer = ArrayWriter::new(Vec::new()); + writer.write(&record_batch.into())?; + writer.finish()?; + let items: Vec> = serde_json::from_reader(writer.into_inner().as_slice())?; + items + .into_iter() + .enumerate() + .map(|(i, item)| { + let mut item = map_to_item(item)?; + // TODO handle null geometries + item.geometry = Some((&wkb::wkb_to_geom(&mut Cursor::new(geometry.value(i)))?).into()); + Ok(item) + }) + .collect() +} + +fn map_to_item(mut map: Map) -> Result { + let _ = map.remove("type"); + let mut item = Item::new(map.remove("id").ok_or_else(|| Error::MissingField("id"))?); + if let Some(stac_extensions) = map.remove("stac_extensions") { + if let Value::Array(array) = stac_extensions { + for value in array { + if let Value::String(stac_extension) = value { + item.extensions.push(stac_extension); + } else { + log::warn!( + "stac_extension value not a string, discarding: {}", + value.to_string() + ); + } + } + } else { + log::warn!( + "stac_extensions value not an array, discarding: {}", + stac_extensions.to_string() + ); + } + } + if let Some(bbox) = map.remove("bbox") { + if let Value::Array(bbox) = bbox { + let original_length = bbox.len(); + let bbox: Vec<_> = bbox + .into_iter() + .map(|value| value.as_f64()) + .flatten() + .collect(); + if bbox.len() != original_length { + log::warn!("some bbox values were not floats, discarding") + } else if bbox.len() != 4 && bbox.len() != 6 { + log::warn!("bbox is invalid length, discarding") + } else { + item.bbox = Some(bbox); + } + } + } + item.links = serde_json::from_value( + map.remove("links") + .ok_or_else(|| Error::MissingField("links"))?, + )?; + item.assets = serde_json::from_value( + map.remove("assets") + .ok_or_else(|| Error::MissingField("assets"))?, + )?; + if let Some(collection) = map.remove("collection") { + if let Value::String(collection) = collection { + item.collection = Some(collection) + } else { + log::warn!( + "collection is not a string, discarding: {}", + collection.to_string() + ); + } + }; + item.properties = serde_json::from_value(Value::Object(map))?; + Ok(item) +} + +impl From for Error { + fn from(value: wkb::WKBReadError) -> Self { + Error::WkbRead(value) + } +} + +#[cfg(test)] +mod tests { + use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; + use stac_validate::Validate; + use std::fs::File; + + #[test] + fn record_batch_to_items() { + let file = File::open("data/naip.parquet").unwrap(); + let mut reader = ParquetRecordBatchReaderBuilder::try_new(file) + .unwrap() + .build() + .unwrap(); + let items = reader + .next() + .map(|result| super::record_batch_to_items(result.unwrap()).unwrap()) + .unwrap(); + assert_eq!(items.len(), 5); + for item in items { + assert_eq!(item.extensions.len(), 2); + assert!(item.geometry.is_some()); + assert!(item.bbox.is_some()); + assert!(!item.links.is_empty()); + assert!(!item.assets.is_empty()); + assert!(item.collection.is_some()); + item.validate().unwrap(); + } + } +} + +// From https://github.com/rust-lang/cargo/issues/383#issuecomment-720873790, +// may they be forever blessed. +#[cfg(doctest)] +mod readme { + macro_rules! external_doc_test { + ($x:expr) => { + #[doc = $x] + extern "C" {} + }; + } + + external_doc_test!(include_str!("../README.md")); +}