diff --git a/Dockerfile b/Dockerfile
index 8ed0bdd0..f57b4625 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -6,6 +6,21 @@ FROM alpine:3.10
RUN apk update
RUN apk add git
+# The base alpine find command is quite
+# limited. We need full featured find.
+RUN apk add findutils
+
+# We also need coreutils to get fuller
+# featured versions of shell commands,
+# such as sort.
+RUN apk add coreutils
+
+# We also need gawk
+RUN apk add gawk
+
+# Let's use bash
+RUN apk add bash bash-doc bash-completion
+
COPY LICENSE README.md /
COPY entrypoint.sh /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]
diff --git a/README.md b/README.md
index 21c0e64d..518fba3a 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# Generate Sitemap
+# generate-sitemap
[![build](https://github.com/cicirello/generate-sitemap/workflows/build/badge.svg)](https://github.com/cicirello/generate-sitemap/actions?query=workflow%3Abuild)
[![GitHub](https://img.shields.io/github/license/cicirello/generate-sitemap)](https://github.com/cicirello/generate-sitemap/blob/master/LICENSE)
@@ -11,7 +11,14 @@ html as well as pdf files in the sitemap, and has inputs to
control the included file types (defaults include both html
and pdf files in the sitemap). It skips over html files that
contain ``. It otherwise
-does not currently attempt to respect a robots.txt file.
+does not currently attempt to respect a robots.txt file. The
+sitemap entries are sorted in a consistent order. Specifically,
+all html pages appear prior to all URLs to pdf files (if pdfs
+are included). The html pages are then first sorted by depth
+in the directory structure (i.e., pages at the website root
+appear first, etc), and then pages at the same depth are sorted
+alphabetically. URLs to pdf files are sorted in the same manner
+as the html pages.
It is designed to be used in combination with other GitHub
Actions. For example, it does not commit and push the generated
@@ -101,7 +108,7 @@ file in the root of the repository. After completion, it then
simply echos the outputs.
```yml
-name: Generate API sitemap
+name: Generate xml sitemap
on:
push:
@@ -119,7 +126,7 @@ jobs:
fetch-depth: 0
- name: Generate the sitemap
id: sitemap
- uses: cicirello/generate-sitemap@v1.0.0
+ uses: cicirello/generate-sitemap@v1.1.0
with:
base-url-path: https://THE.URL.TO.YOUR.PAGE/
- name: Output stats
@@ -155,7 +162,7 @@ jobs:
fetch-depth: 0
- name: Generate the sitemap
id: sitemap
- uses: cicirello/generate-sitemap@v1.0.0
+ uses: cicirello/generate-sitemap@v1.1.0
with:
base-url-path: https://THE.URL.TO.YOUR.PAGE/
path-to-root: docs
@@ -178,7 +185,7 @@ then the `peter-evans/create-pull-request` monitors for changes, and
if the sitemap changed will create a pull request.
```yml
-name: Generate API sitemap
+name: Generate xml sitemap
on:
push:
@@ -196,7 +203,7 @@ jobs:
fetch-depth: 0
- name: Generate the sitemap
id: sitemap
- uses: cicirello/generate-sitemap@v1.0.0
+ uses: cicirello/generate-sitemap@v1.1.0
with:
base-url-path: https://THE.URL.TO.YOUR.PAGE/
- name: Create Pull Request
diff --git a/entrypoint.sh b/entrypoint.sh
index 365ee3b7..9601fbb1 100755
--- a/entrypoint.sh
+++ b/entrypoint.sh
@@ -1,4 +1,4 @@
-#!/bin/sh -l
+#!/bin/bash -l
websiteRoot=$1
baseUrl=$2
@@ -11,12 +11,9 @@ skipCount=0
function formatSitemapEntry {
if [ "$sitemapFormat" == "xml" ]; then
- lastModDate=${3/ /T}
- lastModDate=${lastModDate/ /}
- lastModDate="${lastModDate:0:22}:${lastModDate:22:2}"
echo "" >> sitemap.xml
echo "$2${1%index.html}" >> sitemap.xml
- echo "$lastModDate" >> sitemap.xml
+ echo "$3" >> sitemap.xml
echo "" >> sitemap.xml
else
echo "$2${1/%\/index.html/\/}" >> sitemap.txt
@@ -35,20 +32,20 @@ else
fi
if [ "$includeHTML" == "true" ]; then
- for i in $(find . \( -name '*.html' -o -name '*.htm' \) -type f); do
- if [ "0" == $(grep -i -c -E "