forked from wso2/docs-is
-
Notifications
You must be signed in to change notification settings - Fork 0
137 lines (125 loc) · 5.76 KB
/
link-checker.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
name: Broken Link Checker
on:
workflow_dispatch:
inputs:
siteUrl:
description: 'Site URL to Check'
default: 'https://is.docs.wso2.com/en/next/'
required: true
schedule:
- cron: '30 2 * * *' # Schedule to run once daily at 02:30 UTC
env:
DEFAULT_URL: 'https://is.docs.wso2.com/en/next/'
jobs:
linkchecker:
runs-on: ubuntu-latest
steps:
- name: Check repository
run: |
if [ "${{ github.event_name }}" = "schedule" ] && [ "$GITHUB_REPOSITORY" != "wso2/docs-is" ]; then
echo "This scheduled action is disabled for forks."
exit 0
fi
- name: Check out repository
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'
- name: Install dependencies
run: |
pip install linkchecker
pip install beautifulsoup4
- name: Run Link Checker
run: |
SITE_URL=${{ github.event.inputs.siteUrl || env.DEFAULT_URL }}
linkchecker -F html $SITE_URL --threads=100
continue-on-error: true
- name: Filter HTML for specific string and preserve page structure
id: filter_html
run: |
echo "import sys" > filter_script.py
echo "from bs4 import BeautifulSoup" >> filter_script.py
echo "website_url = 'https://is.docs.wso2.com/en/next/'" >> filter_script.py
echo "with open('linkchecker-out.html', 'r') as file:" >> filter_script.py
echo " soup = BeautifulSoup(file, 'html.parser')" >> filter_script.py
echo "string_to_filter = 'https://is.docs.wso2.com/en/latest/page-not-found'" >> filter_script.py
echo "final_soup = BeautifulSoup('<!DOCTYPE html><html><head></head><body></body></html>', 'html.parser')" >> filter_script.py
echo "final_soup.head.append(soup.head)" >> filter_script.py
echo "# Add custom header, broken link count and spacing" >> filter_script.py
echo "header_tag = final_soup.new_tag('h1')" >> filter_script.py
echo "header_tag.string = 'Broken Link Checker - ' + website_url" >> filter_script.py
echo "final_soup.body.append(header_tag)" >> filter_script.py
echo "# Count and append the number of broken links found" >> filter_script.py
echo "tables = soup.find_all('table')" >> filter_script.py
echo "broken_link_count = sum(string_to_filter in str(table) for table in tables)" >> filter_script.py
echo "with open('broken_link_count.txt', 'w') as count_file:" >> filter_script.py
echo " count_file.write(str(broken_link_count))" >> filter_script.py
echo "count_tag = final_soup.new_tag('p')" >> filter_script.py
echo "count_tag.string = 'Number of broken links found: ' + str(broken_link_count)" >> filter_script.py
echo "final_soup.body.append(count_tag)" >> filter_script.py
echo "final_soup.body.append(final_soup.new_tag('br'))" >> filter_script.py
echo "# Append filtered tables" >> filter_script.py
echo "for table in tables:" >> filter_script.py
echo " if string_to_filter in str(table):" >> filter_script.py
echo " final_soup.body.append(table)" >> filter_script.py
echo " final_soup.body.append(final_soup.new_tag('br'))" >> filter_script.py
echo "final_soup.body.append(final_soup.new_tag('br'))" >> filter_script.py
echo "with open('broken_links_report_IS.html', 'w') as file:" >> filter_script.py
echo " file.write(str(final_soup))" >> filter_script.py
python filter_script.py
BROKEN_LINKS_COUNT=$(cat broken_link_count.txt)
echo "broken_links_count=$BROKEN_LINKS_COUNT" >> $GITHUB_ENV
- name: Upload HTML as Artifact
uses: actions/upload-artifact@v3
with:
name: broken_links_report_IS
path: broken_links_report_IS.html
- name: Notify Google Chat
env:
WEBHOOK_URL: ${{ secrets.GOOGLE_CHAT_WEBHOOK_URL }}
SITE_URL: ${{ github.event.inputs.siteUrl || env.DEFAULT_URL }}
THREAD_NAME: "BrokenLinkCheckerWSO2IS"
BROKEN_LINKS_COUNT: ${{ env.broken_links_count }}
run: |
MESSAGE_JSON=$(cat <<EOF
{
"cards": [
{
"header": {
"title": "Broken Link Checker Report - WSO2 IS",
"subtitle": "${SITE_URL}",
"imageUrl": "https://biq.cloud/wp-content/uploads/2021/01/broken-link-building.gif"
},
"sections": [
{
"widgets": [
{
"keyValue": {
"topLabel": "Report Details",
"content": "Broken Links Found: ${BROKEN_LINKS_COUNT}"
}
},
{
"buttons": [
{
"textButton": {
"text": "VIEW REPORT",
"onClick": {
"openLink": {
"url": "https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
}
}
}
}
]
}
]
}
]
}
]
}
EOF
)
curl -X POST -H 'Content-Type: application/json' -d "$MESSAGE_JSON" "$WEBHOOK_URL&threadKey=$THREAD_NAME&messageReplyOption=REPLY_MESSAGE_FALLBACK_TO_NEW_THREAD"