diff --git a/.asf.yaml b/.asf.yaml index 79c2ea68..389b0c5a 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -1,6 +1,26 @@ +github: + description: "Apache Parquet" + homepage: https://parquet.apache.org/ + labels: + - parquet + - apache + - parquet-site + + enabled_merge_buttons: + merge: false + squash: true + rebase: false + + features: + wiki: false + issues: true + projects: false + collaborators: # Note: the number of collaborators is limited to 10 + - vinooganesh + staging: profile: ~ whoami: asf-staging publish: - whoami: asf-site + whoami: asf-site \ No newline at end of file diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 9113452f..a236190e 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -6,11 +6,11 @@ on: jobs: Build_and_Deploy_Site: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 concurrency: group: ${{ github.workflow }}-${{ github.ref }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: submodules: recursive fetch-depth: 0 @@ -21,12 +21,12 @@ jobs: hugo-version: 'latest' extended: true - - uses: actions/setup-node@v2 + - uses: actions/setup-node@v4 with: - node-version: '16' + node-version: '20' - name: Cache dependencies - uses: actions/cache@v1 + uses: actions/cache@v4 with: path: ~/.npm key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }} diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index a1524f2c..00000000 --- a/.gitmodules +++ /dev/null @@ -1,4 +0,0 @@ - -[submodule "themes/docsy"] - path = themes/docsy - url = https://github.com/google/docsy diff --git a/README.md b/README.md index cd219932..63829fcc 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,6 @@ To create documentation for a new release of `parquet-mr` create a new }} + + Documentation + + + Download + +

Apache Parquet is a columnar storage format available to any project in the Hadoop ecosystem, regardless of the choice of data processing framework, data model or programming language.

+{{< blocks/link-down color="info" >}} +{{< /blocks/cover >}} + + +{{< blocks/section color="white" type="row">}} +{{% blocks/feature icon="fab fa-jira" title="File an Issue" url="https://issues.apache.org/jira/projects/PARQUET/issues" %}} +Or Search Open Issues +{{% /blocks/feature %}} + +{{% blocks/feature icon="fab fa-github" title="Contributions welcome!" url="https://github.com/apache/parquet-mr" %}} +We do a [Pull Request](https://github.com/apache/parquet-mr/pulls) contributions workflow on **GitHub**. New users are always welcome! +{{% /blocks/feature %}} + + +{{% blocks/feature icon="fab fa-twitter" title="Follow us on Twitter!" url="https://twitter.com/ApacheParquet" %}} +For announcement of latest features etc. +{{% /blocks/feature %}} + +{{% /blocks/section %}} \ No newline at end of file diff --git a/content/en/docs/Concepts/_index.md b/content/en/docs/Concepts/_index.md index ed32229b..d55a2d3d 100644 --- a/content/en/docs/Concepts/_index.md +++ b/content/en/docs/Concepts/_index.md @@ -5,6 +5,7 @@ weight: 4 description: > Glossary of relevant terminology. --- + - *Block (HDFS block)*: This means a block in HDFS and the meaning is unchanged for describing this file format. The file format is designed to work well on top of HDFS. diff --git a/content/en/docs/File Format/Data Pages/compression.md b/content/en/docs/File Format/Data Pages/compression.md index f4489835..32176121 100644 --- a/content/en/docs/File Format/Data Pages/compression.md +++ b/content/en/docs/File Format/Data Pages/compression.md @@ -3,7 +3,6 @@ title: "Compression" linkTitle: "Compression" weight: 1 --- - ## Overview Parquet allows the data block inside dictionary pages and data pages to diff --git a/content/en/docs/File Format/Data Pages/encryption.md b/content/en/docs/File Format/Data Pages/encryption.md index e9fbd0f0..1f736c50 100644 --- a/content/en/docs/File Format/Data Pages/encryption.md +++ b/content/en/docs/File Format/Data Pages/encryption.md @@ -3,7 +3,6 @@ title: "Parquet Modular Encryption" linkTitle: "Encryption" weight: 1 --- - Parquet files containing sensitive information can be protected by the modular encryption mechanism that encrypts and authenticates the file data and metadata - while allowing for a regular Parquet functionality (columnar projection, predicate pushdown, encoding diff --git a/content/en/docs/File Format/Types/_index.md b/content/en/docs/File Format/Types/_index.md index a079888b..b07dc61f 100644 --- a/content/en/docs/File Format/Types/_index.md +++ b/content/en/docs/File Format/Types/_index.md @@ -4,6 +4,7 @@ linkTitle: "Types" weight: 5 --- + The types supported by the file format are intended to be as minimal as possible, with a focus on how the types effect on disk storage. For example, 16-bit ints are not explicitly supported in the storage format since they are covered by diff --git a/content/en/docs/File Format/Types/logicaltypes.md b/content/en/docs/File Format/Types/logicaltypes.md index cd610a8d..0173b75c 100644 --- a/content/en/docs/File Format/Types/logicaltypes.md +++ b/content/en/docs/File Format/Types/logicaltypes.md @@ -10,4 +10,4 @@ of primitive types to a minimum and reuses parquet's efficient encodings. For example, strings are stored as byte arrays (binary) with a UTF8 annotation. These annotations define how to further decode and interpret the data. Annotations are stored as `LogicalType` fields in the file metadata and are -documented in LogicalTypes.md. +documented in LogicalTypes.md. \ No newline at end of file diff --git a/content/en/docs/File Format/configurations.md b/content/en/docs/File Format/configurations.md index 9e21955c..f12be5d5 100644 --- a/content/en/docs/File Format/configurations.md +++ b/content/en/docs/File Format/configurations.md @@ -5,6 +5,7 @@ weight: 5 --- ### Row Group Size + Larger row groups allow for larger column chunks which makes it possible to do larger sequential IO. Larger groups also require more buffering in the write path (or a two pass write). We recommend large row groups (512MB - 1GB). @@ -18,4 +19,4 @@ Data pages should be considered indivisible so smaller data pages allow for more fine grained reading (e.g. single row lookup). Larger page sizes incur less space overhead (less page headers) and potentially less parsing overhead (processing headers). Note: for sequential scans, it is not expected to read a page -at a time; this is not the IO chunk. We recommend 8KB for page sizes. \ No newline at end of file +at a time; this is not the IO chunk. We recommend 8KB for page sizes. diff --git a/content/en/docs/File Format/metadata.md b/content/en/docs/File Format/metadata.md index 0e5e19b5..a2eae253 100644 --- a/content/en/docs/File Format/metadata.md +++ b/content/en/docs/File Format/metadata.md @@ -6,4 +6,5 @@ weight: 5 There are three types of metadata: file metadata, column (chunk) metadata and page header metadata. All thrift structures are serialized using the TCompactProtocol. + ![File Layout](/images/FileFormat.gif) diff --git a/content/search.md b/content/search.md new file mode 100644 index 00000000..31b7cb39 --- /dev/null +++ b/content/search.md @@ -0,0 +1,5 @@ +--- +title: Search Results +layout: search + +--- \ No newline at end of file diff --git a/go.mod b/go.mod new file mode 100644 index 00000000..90cb5415 --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module github.com/apache/parquet-site + +go 1.12 + +require github.com/google/docsy v0.9.1 // indirect diff --git a/go.sum b/go.sum new file mode 100644 index 00000000..645c0da3 --- /dev/null +++ b/go.sum @@ -0,0 +1,4 @@ +github.com/FortAwesome/Font-Awesome v0.0.0-20240108205627-a1232e345536/go.mod h1:IUgezN/MFpCDIlFezw3L8j83oeiIuYoj28Miwr/KUYo= +github.com/google/docsy v0.9.1 h1:+jqges1YCd+yHeuZ1BUvD8V8mEGVtPxULg5j/vaJ984= +github.com/google/docsy v0.9.1/go.mod h1:saOqKEUOn07Bc0orM/JdIF3VkOanHta9LU5Y53bwN2U= +github.com/twbs/bootstrap v5.2.3+incompatible/go.mod h1:fZTSrkpSf0/HkL0IIJzvVspTt1r9zuf7XlZau8kpcY0= diff --git a/config.toml b/hugo.toml similarity index 93% rename from config.toml rename to hugo.toml index 43cc577a..0e48bbd4 100644 --- a/config.toml +++ b/hugo.toml @@ -10,9 +10,6 @@ enableMissingTranslationPlaceholders = true enableRobotsTXT = true -# Base theme for website -theme = ["docsy"] - # Will give values to .Lastmod etc. enableGitInfo = true @@ -44,16 +41,17 @@ quality = 75 anchor = "smart" # Language configuration - [languages] [languages.en] -title = "Apache Parquet" languageName ="English" -contentDir = "content/en" # Weight used for sorting. weight = 1 [languages.en.params] +title = "Apache Parquet" description = "The Apache Parquet Website" +contentDir = "content/en" +# Weight used for sorting. +weight = 1 [markup] [markup.goldmark] @@ -98,6 +96,7 @@ url_latest_version = "https://parquet.apache.org" # Repository configuration (URLs for in-page links to opening issues and suggesting changes) github_repo = "https://github.com/apache/parquet-site" + github_branch= "production" # Comment out to disable search. @@ -118,7 +117,7 @@ prism_syntax_highlighting = false # Set to true to disable breadcrumb navigation. breadcrumb_disable = false # Set to true to disable the About link in the site footer -footer_about_disable = false +footer_about_enable = true # Set to false if you don't want to display a logo (/assets/icons/logo.svg) in the top navbar navbar_logo = true # Set to true if you don't want the top navbar to be translucent when over a `block/cover`, like on the homepage. @@ -155,7 +154,7 @@ enable = false name ="Twitter" url = "https://twitter.com/ApacheParquet" icon = "fab fa-twitter" - desc = "Follow us on Twitter to get the latest news" + desc = "Follow us on Twitter to get the latest news!" [[params.links.user]] name = "Stack Overflow" url = "https://stackoverflow.com/questions/tagged/parquet" @@ -166,7 +165,7 @@ enable = false name = "GitHub" url = "https://github.com/apache/parquet-mr" icon = "fab fa-github" - desc = "Development takes place here" + desc = "Development takes place here!" [[params.links.developer]] name = "Slack" url = "https://the-asf.slack.com/" @@ -182,3 +181,13 @@ enable = false url = "https://issues.apache.org/jira/projects/PARQUET/issues" icon = "fas fa-bug" desc = "File/Track Open Bugs" + +[module] + # Uncomment the next line to build and serve using local docsy clone declared in the named Hugo workspace: + # workspace = "docsy.work" + [module.hugoVersion] + extended = true + min = "0.110.0" + [[module.imports]] + path = "github.com/google/docsy" + disable = false diff --git a/layouts/404.html b/layouts/404.html index 40875048..b962591d 100644 --- a/layouts/404.html +++ b/layouts/404.html @@ -1,9 +1,6 @@ -{{ define "main"}} -
-
-

Not found

-

Oops! This page doesn't exist. Try going back to our home page.

- -
-
-{{ end }} +{{ define "main" -}} +
+

Not found

+

Oops! This page doesn't exist. Try going back to the home page.

+
+{{- end }} \ No newline at end of file diff --git a/layouts/partials/navbar.html b/layouts/partials/navbar.html index b20aec09..c0b643a1 100644 --- a/layouts/partials/navbar.html +++ b/layouts/partials/navbar.html @@ -1,35 +1,66 @@ -{{ $cover := and (.HasShortcode "blocks/cover") (not .Site.Params.ui.navbar_translucent_over_cover_disable) }} - +{{ $cover := and + (.HasShortcode "blocks/cover") + (not .Site.Params.ui.navbar_translucent_over_cover_disable) +-}} +{{ $baseURL := urls.Parse $.Site.Params.Baseurl -}} + + \ No newline at end of file diff --git a/package.json b/package.json index 67e9bbdf..a3e84f84 100644 --- a/package.json +++ b/package.json @@ -17,8 +17,8 @@ }, "homepage": "https://github.com/apache/parquet-site#readme", "devDependencies": { - "autoprefixer": "^10.4.4", - "postcss": "^8.4.12", + "autoprefixer": "^10.4.18", + "postcss": "^8.4.35", "postcss-cli": "^9.1.0" } } diff --git a/themes/docsy b/themes/docsy deleted file mode 160000 index 868b7510..00000000 --- a/themes/docsy +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 868b75107c53196f25c6e57a3c704a556ef1f56e