From b3b81ce3e9f9e6f25b41f463577976628515384a Mon Sep 17 00:00:00 2001 From: Vinoo Ganesh Date: Fri, 8 Mar 2024 16:33:45 -0500 Subject: [PATCH 1/4] Update to new website --- .asf.yaml | 22 ++++++++++++- .github/workflows/deploy.yml | 10 +++--- README.md | 1 - config.toml | 25 ++++++++++----- content/en/_index.md | 31 +++++++++++++++++++ content/en/docs/Concepts/_index.md | 1 + .../File Format/Data Pages/compression.md | 1 - .../docs/File Format/Data Pages/encryption.md | 1 - content/en/docs/File Format/Types/_index.md | 1 + .../en/docs/File Format/Types/logicaltypes.md | 2 +- content/en/docs/File Format/configurations.md | 3 +- content/en/docs/File Format/metadata.md | 1 + go.mod | 5 +++ go.sum | 4 +++ 14 files changed, 90 insertions(+), 18 deletions(-) create mode 100644 content/en/_index.md create mode 100644 go.mod create mode 100644 go.sum diff --git a/.asf.yaml b/.asf.yaml index 79c2ea68..389b0c5a 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -1,6 +1,26 @@ +github: + description: "Apache Parquet" + homepage: https://parquet.apache.org/ + labels: + - parquet + - apache + - parquet-site + + enabled_merge_buttons: + merge: false + squash: true + rebase: false + + features: + wiki: false + issues: true + projects: false + collaborators: # Note: the number of collaborators is limited to 10 + - vinooganesh + staging: profile: ~ whoami: asf-staging publish: - whoami: asf-site + whoami: asf-site \ No newline at end of file diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 9113452f..a236190e 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -6,11 +6,11 @@ on: jobs: Build_and_Deploy_Site: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 concurrency: group: ${{ github.workflow }}-${{ github.ref }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: submodules: recursive fetch-depth: 0 @@ -21,12 +21,12 @@ jobs: hugo-version: 'latest' extended: true - - uses: actions/setup-node@v2 + - uses: actions/setup-node@v4 with: - node-version: '16' + node-version: '20' - name: Cache dependencies - uses: actions/cache@v1 + uses: actions/cache@v4 with: path: ~/.npm key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }} diff --git a/README.md b/README.md index cd219932..63829fcc 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,6 @@ To create documentation for a new release of `parquet-mr` create a new }} + + Documentation + + + Download + +

Apache Parquet is a columnar storage format available to any project in the Hadoop ecosystem, regardless of the choice of data processing framework, data model or programming language.

+{{< blocks/link-down color="info" >}} +{{< /blocks/cover >}} + + +{{< blocks/section color="white" type="row">}} +{{% blocks/feature icon="fab fa-jira" title="File an Issue" url="https://issues.apache.org/jira/projects/PARQUET/issues" %}} +Or Search Open Issues +{{% /blocks/feature %}} + +{{% blocks/feature icon="fab fa-github" title="Contributions welcome!" url="https://github.com/apache/parquet-mr" %}} +We do a [Pull Request](https://github.com/apache/parquet-mr/pulls) contributions workflow on **GitHub**. New users are always welcome! +{{% /blocks/feature %}} + + +{{% blocks/feature icon="fab fa-twitter" title="Follow us on Twitter!" url="https://twitter.com/ApacheParquet" %}} +For announcement of latest features etc. +{{% /blocks/feature %}} + +{{% /blocks/section %}} \ No newline at end of file diff --git a/content/en/docs/Concepts/_index.md b/content/en/docs/Concepts/_index.md index ed32229b..d55a2d3d 100644 --- a/content/en/docs/Concepts/_index.md +++ b/content/en/docs/Concepts/_index.md @@ -5,6 +5,7 @@ weight: 4 description: > Glossary of relevant terminology. --- + - *Block (HDFS block)*: This means a block in HDFS and the meaning is unchanged for describing this file format. The file format is designed to work well on top of HDFS. diff --git a/content/en/docs/File Format/Data Pages/compression.md b/content/en/docs/File Format/Data Pages/compression.md index f4489835..32176121 100644 --- a/content/en/docs/File Format/Data Pages/compression.md +++ b/content/en/docs/File Format/Data Pages/compression.md @@ -3,7 +3,6 @@ title: "Compression" linkTitle: "Compression" weight: 1 --- - ## Overview Parquet allows the data block inside dictionary pages and data pages to diff --git a/content/en/docs/File Format/Data Pages/encryption.md b/content/en/docs/File Format/Data Pages/encryption.md index e9fbd0f0..1f736c50 100644 --- a/content/en/docs/File Format/Data Pages/encryption.md +++ b/content/en/docs/File Format/Data Pages/encryption.md @@ -3,7 +3,6 @@ title: "Parquet Modular Encryption" linkTitle: "Encryption" weight: 1 --- - Parquet files containing sensitive information can be protected by the modular encryption mechanism that encrypts and authenticates the file data and metadata - while allowing for a regular Parquet functionality (columnar projection, predicate pushdown, encoding diff --git a/content/en/docs/File Format/Types/_index.md b/content/en/docs/File Format/Types/_index.md index a079888b..b07dc61f 100644 --- a/content/en/docs/File Format/Types/_index.md +++ b/content/en/docs/File Format/Types/_index.md @@ -4,6 +4,7 @@ linkTitle: "Types" weight: 5 --- + The types supported by the file format are intended to be as minimal as possible, with a focus on how the types effect on disk storage. For example, 16-bit ints are not explicitly supported in the storage format since they are covered by diff --git a/content/en/docs/File Format/Types/logicaltypes.md b/content/en/docs/File Format/Types/logicaltypes.md index cd610a8d..0173b75c 100644 --- a/content/en/docs/File Format/Types/logicaltypes.md +++ b/content/en/docs/File Format/Types/logicaltypes.md @@ -10,4 +10,4 @@ of primitive types to a minimum and reuses parquet's efficient encodings. For example, strings are stored as byte arrays (binary) with a UTF8 annotation. These annotations define how to further decode and interpret the data. Annotations are stored as `LogicalType` fields in the file metadata and are -documented in LogicalTypes.md. +documented in LogicalTypes.md. \ No newline at end of file diff --git a/content/en/docs/File Format/configurations.md b/content/en/docs/File Format/configurations.md index 9e21955c..f12be5d5 100644 --- a/content/en/docs/File Format/configurations.md +++ b/content/en/docs/File Format/configurations.md @@ -5,6 +5,7 @@ weight: 5 --- ### Row Group Size + Larger row groups allow for larger column chunks which makes it possible to do larger sequential IO. Larger groups also require more buffering in the write path (or a two pass write). We recommend large row groups (512MB - 1GB). @@ -18,4 +19,4 @@ Data pages should be considered indivisible so smaller data pages allow for more fine grained reading (e.g. single row lookup). Larger page sizes incur less space overhead (less page headers) and potentially less parsing overhead (processing headers). Note: for sequential scans, it is not expected to read a page -at a time; this is not the IO chunk. We recommend 8KB for page sizes. \ No newline at end of file +at a time; this is not the IO chunk. We recommend 8KB for page sizes. diff --git a/content/en/docs/File Format/metadata.md b/content/en/docs/File Format/metadata.md index 0e5e19b5..a2eae253 100644 --- a/content/en/docs/File Format/metadata.md +++ b/content/en/docs/File Format/metadata.md @@ -6,4 +6,5 @@ weight: 5 There are three types of metadata: file metadata, column (chunk) metadata and page header metadata. All thrift structures are serialized using the TCompactProtocol. + ![File Layout](/images/FileFormat.gif) diff --git a/go.mod b/go.mod new file mode 100644 index 00000000..90cb5415 --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module github.com/apache/parquet-site + +go 1.12 + +require github.com/google/docsy v0.9.1 // indirect diff --git a/go.sum b/go.sum new file mode 100644 index 00000000..645c0da3 --- /dev/null +++ b/go.sum @@ -0,0 +1,4 @@ +github.com/FortAwesome/Font-Awesome v0.0.0-20240108205627-a1232e345536/go.mod h1:IUgezN/MFpCDIlFezw3L8j83oeiIuYoj28Miwr/KUYo= +github.com/google/docsy v0.9.1 h1:+jqges1YCd+yHeuZ1BUvD8V8mEGVtPxULg5j/vaJ984= +github.com/google/docsy v0.9.1/go.mod h1:saOqKEUOn07Bc0orM/JdIF3VkOanHta9LU5Y53bwN2U= +github.com/twbs/bootstrap v5.2.3+incompatible/go.mod h1:fZTSrkpSf0/HkL0IIJzvVspTt1r9zuf7XlZau8kpcY0= From 5904d8dbf988bc7b31f6a0b397caa5d41425cfe6 Mon Sep 17 00:00:00 2001 From: Vinoo Ganesh Date: Mon, 11 Mar 2024 08:29:31 -0400 Subject: [PATCH 2/4] Adding more cleanup into the same PR i# Changes to be committed: --- .gitmodules | 4 ---- content/search.md | 5 +++++ layouts/404.html | 15 ++++++--------- package.json | 4 ++-- themes/docsy | 1 - 5 files changed, 13 insertions(+), 16 deletions(-) delete mode 100644 .gitmodules create mode 100644 content/search.md delete mode 160000 themes/docsy diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index a1524f2c..00000000 --- a/.gitmodules +++ /dev/null @@ -1,4 +0,0 @@ - -[submodule "themes/docsy"] - path = themes/docsy - url = https://github.com/google/docsy diff --git a/content/search.md b/content/search.md new file mode 100644 index 00000000..31b7cb39 --- /dev/null +++ b/content/search.md @@ -0,0 +1,5 @@ +--- +title: Search Results +layout: search + +--- \ No newline at end of file diff --git a/layouts/404.html b/layouts/404.html index 40875048..b962591d 100644 --- a/layouts/404.html +++ b/layouts/404.html @@ -1,9 +1,6 @@ -{{ define "main"}} -
-
-

Not found

-

Oops! This page doesn't exist. Try going back to our home page.

- -
-
-{{ end }} +{{ define "main" -}} +
+

Not found

+

Oops! This page doesn't exist. Try going back to the home page.

+
+{{- end }} \ No newline at end of file diff --git a/package.json b/package.json index 67e9bbdf..f81aaddb 100644 --- a/package.json +++ b/package.json @@ -17,8 +17,8 @@ }, "homepage": "https://github.com/apache/parquet-site#readme", "devDependencies": { - "autoprefixer": "^10.4.4", - "postcss": "^8.4.12", + "autoprefixer": "^10.4.17", + "postcss": "^8.4.35", "postcss-cli": "^9.1.0" } } diff --git a/themes/docsy b/themes/docsy deleted file mode 160000 index 868b7510..00000000 --- a/themes/docsy +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 868b75107c53196f25c6e57a3c704a556ef1f56e From 662980f00fb4066bf05ece12fa8a0dd1aff34c33 Mon Sep 17 00:00:00 2001 From: Vinoo Ganesh Date: Mon, 11 Mar 2024 08:59:45 -0400 Subject: [PATCH 3/4] Moving to hugo.toml --- config.toml => hugo.toml | 2 +- package.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename config.toml => hugo.toml (99%) diff --git a/config.toml b/hugo.toml similarity index 99% rename from config.toml rename to hugo.toml index 977dd3aa..0e48bbd4 100644 --- a/config.toml +++ b/hugo.toml @@ -190,4 +190,4 @@ enable = false min = "0.110.0" [[module.imports]] path = "github.com/google/docsy" - disable = false \ No newline at end of file + disable = false diff --git a/package.json b/package.json index f81aaddb..a3e84f84 100644 --- a/package.json +++ b/package.json @@ -17,7 +17,7 @@ }, "homepage": "https://github.com/apache/parquet-site#readme", "devDependencies": { - "autoprefixer": "^10.4.17", + "autoprefixer": "^10.4.18", "postcss": "^8.4.35", "postcss-cli": "^9.1.0" } From c8ee0d493b2d17b344524a790b7fc0af04f3b341 Mon Sep 17 00:00:00 2001 From: Vinoo Ganesh Date: Mon, 11 Mar 2024 09:38:46 -0400 Subject: [PATCH 4/4] Right adjust layout --- layouts/partials/navbar.html | 101 +++++++++++++++++++++++------------ 1 file changed, 66 insertions(+), 35 deletions(-) diff --git a/layouts/partials/navbar.html b/layouts/partials/navbar.html index b20aec09..c0b643a1 100644 --- a/layouts/partials/navbar.html +++ b/layouts/partials/navbar.html @@ -1,35 +1,66 @@ -{{ $cover := and (.HasShortcode "blocks/cover") (not .Site.Params.ui.navbar_translucent_over_cover_disable) }} - +{{ $cover := and + (.HasShortcode "blocks/cover") + (not .Site.Params.ui.navbar_translucent_over_cover_disable) +-}} +{{ $baseURL := urls.Parse $.Site.Params.Baseurl -}} + + \ No newline at end of file