Testing modified action

moe-ad · Nov 11, 2024 · fab166a · fab166a
1 parent 5f66425
commit fab166a
Show file tree

Hide file tree

Showing 4 changed files with 568 additions and 140 deletions.
diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml
@@ -50,164 +50,180 @@
 #                 path: src/sample_file.txt
 #                 #path: ./output_files/sample_file.txt
 
-name: Update index.html in GH Pages and generate global sitemap
+# name: Update index.html in GH Pages and generate global sitemap
 
-on: 
-  push:
-    branches:
-     - main
+# on: push
 
-permissions: write-all
+# permissions: write-all
 
-jobs:
+# jobs:
 
-  run-global-sitemap-script-and-update-gh-pages:
+#   run-global-sitemap-script-and-update-gh-pages:
+#     runs-on: ubuntu-latest
+
+#     steps:
+
+#     - name: Install Python
+#       uses: actions/setup-python@v5
+#       with:
+#         python-version: '3.13'
+
+#     - name: Install dependencies
+#       shell: bash
+#       run: |
+
+#         python -m pip install --upgrade pip
+#         pip install requests
+
+#     - name: Write script
+#       shell: bash
+#       run: |
+
+#         cat > /tmp/catsitemap.py << "EOF"
+
+#         import re
+#         import xml.etree.ElementTree as ET
+#         import requests
+#         from xml.dom import minidom
+
+#         def extract_urls_and_headers(url: str) -> tuple:
+#             """This function extracts projects names and sitemap.xml urls for each project
+
+#             Args:
+#                 url (str): link to the .rst file of the PyAnsys documentation landing page
+
+#             Returns:
+#                 tuple: returns a tuple of list of project names and list of urls to projects' sitemap.xml files
+#             """
+#             try:
+#                 response = requests.get(url, timeout=10)
+#             except requests.exceptions.Timeout:
+#                 print("Timed out while trying to get request")
+#                 raise
+
+#             content = response.text
+
+#             # Extract section headers and URLs (modify regex based on your needs)
+#             project_names = [project_name.strip() for project_name in re.findall(r'\.\. grid-item-card:: ([\w\s-]+)', content)]
+#             urls = re.findall(r':link: (https://[\w./-]+)', content)
+
+#             # Modify URLs
+#             updated_urls = [re.match(r"^(https:\/\/[^\/]+)", url).group(1) + "/sitemap.xml" for url in urls]
+
+#             # Filter none existent URLS
+#             valid_project_names = []
+#             valid_urls = []
+#             for index, url in enumerate(updated_urls):
+#                 if requests.get(url).status_code == 404:
+#                     continue
+#                 else:
+#                     valid_project_names.append(project_names[index])
+#                     valid_urls.append(url)
+
+#             return valid_project_names, valid_urls
+
+#         def generate_sitemap_index(url: str) -> None:
+#             """This function generates a sitemap_index.xml file indexing other sitemap.xml files
+
+#             Args:
+#                 url (str): link to the .rst file of the PyAnsys documentation landing page
+#             """
+
+#             # Create the root element with namespace
+#             sitemap_index = ET.Element("sitemapindex", xmlns="http://www.sitemaps.org/schemas/sitemap/0.9")
+
+#             # Build the list of urls
+#             urls = extract_urls_and_headers(url)[1]
+
+#             # Create sitemap elements for each URL
+#             for url in urls:
+#                 sitemap = ET.SubElement(sitemap_index, "sitemap")
+#                 loc = ET.SubElement(sitemap, "loc")
+#                 loc.text = url
+
+#             # Format XML with indentation
+#             rough_string = ET.tostring(sitemap_index, 'utf-8')
+#             reparsed = minidom.parseString(rough_string)
+#             pretty_xml = reparsed.toprettyxml(indent="  ")    
+
+#             # Create the tree and write to XML file
+#             with open("globalsitemap.xml", "w") as f:
+#                 f.write(pretty_xml)
+
+
+#         # URL of the .rst
+#         URL = "https://docs.pyansys.com/version/dev/_sources/index.rst.txt"
+#         generate_sitemap_index(URL)
+#         EOF
+
+#     - name: Run above script
+#       working-directory: /tmp
+#       shell: bash
+#       run: |
+#         python catsitemap.py
+
+#     - name: Checkout repository
+#       uses: actions/checkout@v4
+#       #with:
+#         #ref: gh-pages
+
+#     #- name: Replace 'version/stable' with 'version/dev' in index.html
+#       #run: |
+#         # Replace landing page with the dev version
+#         #cp version/dev/index.html index.html
+#         #sed -i 's/href="\([^:"]*\)"/href="version\/dev\/\1"/g' index.html
+#         #sed -i 's/src="\([^:"]*\)"/src="version\/dev\/\1"/g' index.html
+#         # Replace "version/stable" with "version/dev" in the sitemap.xml
+#         #sed -i 's/version\/stable/version\/dev/g' sitemap.xml
+
+#     - name: Copy globalsitemap.xml to root of gh-pages
+#       shell: bash
+#       run: |
+#         mv /tmp/globalsitemap.xml /home/runner/work/ci_cd/ci_cd/
+
+#     - name: "Commit changes"
+#       uses: EndBug/add-and-commit@v9
+#       with:
+#         default_author: github_actions
+#         message: "testing action script, especially that file is added"
+name: Custom GH Pages Post-processing
+
+on: push
+#  workflow_dispatch:
+#  workflow_call:
+
+env:
+  MAIN_PYTHON_VERSION: '3.13'
+
+jobs:
+  update-gh-pages:
     runs-on: ubuntu-latest
 
     steps:
-
     - name: Install Python
       uses: actions/setup-python@v5
       with:
-        python-version: '3.13'
+        python-version: ${{ env.MAIN_PYTHON_VERSION }}
 
-    - name: Install dependencies
+    - name: Install Python dependencies
       shell: bash
       run: |
-
         python -m pip install --upgrade pip
-        pip install requests
-
-    - name: Write script
-      shell: bash
-      run: |
-
-        cat > /tmp/catsitemap.py << "EOF"
-        import re
-        import os
-        import xml.etree.ElementTree as ET
-        import requests
-        from xml.dom import minidom
-
-        def download_file(url: str, folder_path: str, file_name: str) -> None:
-            """_summary_
+        python -m pip install requests
 
-            Args:
-                url (str): _description_
-                folder_path (str): _description_
-                file_name (str): _description_.
-            """
-
-            # Full path for the downloaded file
-            file_path = os.path.join(folder_path, file_name)
-
-            # Send the request
-            try:
-                response = requests.get(url, stream=True, timeout=10)
-            except:
-                print("Timed out while trying to get request")
-                raise
-
-            # Write the file content to the specified location
-            with open(file_path, mode='wb') as file:
-                for chunk in response.iter_content(chunk_size=8192):
-                    file.write(chunk)
-
-        def extract_urls_and_headers(url: str) -> tuple:
-            """This function extracts projects names and sitemap.xml urls for each project
-
-            Args:
-                url (str): link to the .rst file of the PyAnsys documentation landing page
-
-            Returns:
-                tuple: returns a tuple of list of project names and list of urls to projects' sitemap.xml files
-            """
-            try:
-                response = requests.get(url, timeout=10)
-            except requests.exceptions.Timeout:
-                print("Timed out while trying to get request")
-                raise
-
-            content = response.text
-
-            # Extract section headers and URLs (modify regex based on your needs)
-            project_names = [project_name.strip() for project_name in re.findall(r'\.\. grid-item-card:: ([\w\s-]+)', content)]
-            urls = re.findall(r':link: (https://[\w./-]+)', content)
-
-            # Modify URLs
-            updated_urls = [re.match(r"^(https:\/\/[^\/]+)", url).group(1) + "/sitemap.xml" for url in urls]
-
-            # Filter none existent URLS
-            valid_project_names = []
-            valid_urls = []
-            for index, url in enumerate(updated_urls):
-                if requests.get(url).status_code == 404:
-                    continue
-                else:
-                    valid_project_names.append(project_names[index])
-                    valid_urls.append(url)
-
-            return valid_project_names, valid_urls
-
-
-        def generate_sitemap_index(project_names: list, folder_path: str, file_name: str) -> None:
-            """This function generates a sitemap_index.xml file indexing other sitemap.xml files
-
-            Args:
-                url (list): list of the urls pointing to the location of the sitemaps
-                folder_path (str): _description_
-                file_name (str): _description_.
-            """
-
-            # Create the root element with namespace
-            sitemap_index = ET.Element("sitemapindex", xmlns="http://www.sitemaps.org/schemas/sitemap/0.9")
-
-
-            # Create sitemap elements for each URL
-            for project in project_names:
-                # Modify the url to point to the correct gh-pages directory
-                modified_url = f"https://docs.pyansys.com/sitemap/{project.lower().replace(" ", "")}_sitemap.xml"
-
-                sitemap = ET.SubElement(sitemap_index, "sitemap")
-                loc = ET.SubElement(sitemap, "loc")
-                loc.text = modified_url
-
-            # Format XML with indentation
-            rough_string = ET.tostring(sitemap_index, 'utf-8')
-            reparsed = minidom.parseString(rough_string)
-            pretty_xml = reparsed.toprettyxml(indent="  ")    
-
-            # Create the tree and write to XML file
-            file_path = os.path.join(folder_path, file_name)
-            with open(file_path, "w") as f:
-                f.write(pretty_xml)
-
-        # Inputs
-        folder_path = './sitemaps'
-        URL = "https://docs.pyansys.com/version/dev/_sources/index.rst.txt"
-
-        # Create folder
-        os.makedirs(folder_path, exist_ok=True)
-
-        # Get actual valid URLS and corresponding project names
-        project_names, project_urls = extract_urls_and_headers(URL)
-
-        # Generate global sitemaps
-        file_name = "globalsitemap.xml"
-        generate_sitemap_index(project_names, folder_path, file_name)
-
-        for index, url in enumerate(project_urls):
-            file_name = project_names[index].lower().replace(" ", "") + '_sitemap.xml'
-            download_file(url, folder_path, file_name)
-        EOF
+    - name: Checkout repository main branch
+      uses: actions/checkout@v4
+      with:
+        ref: main        
 
-    - name: Run above script
+    - name: Copy tools folder to /tmp and run sitemap script
       working-directory: /tmp
       shell: bash
       run: |
-        python catsitemap.py
+        cp -r /home/runner/work/ci_cd/ci_cd/tools/ .
+        python ./tools/catsitemap.py
 
-    - name: Checkout repository
+    - name: Checkout repository test-branch branch
       uses: actions/checkout@v4
       with:
         ref: test-branch
@@ -221,7 +237,7 @@ jobs:
         # Replace "version/stable" with "version/dev" in the sitemap.xml
         #sed -i 's/version\/stable/version\/dev/g' sitemap.xml
 
-    - name: Copy globalsitemap.xml to root of gh-pages
+    - name: Move sitemaps/ to test-branch root
       shell: bash
       run: |
         mv /tmp/sitemaps/ /home/runner/work/ci_cd/ci_cd/
@@ -230,4 +246,4 @@ jobs:
       uses: EndBug/add-and-commit@v9
       with:
         default_author: github_actions
-        message: "testing action script, especially that file is added"
+        message: "update sitemaps folder"