-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmd-links-to-csv.py
32 lines (28 loc) · 1.29 KB
/
md-links-to-csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import os
import re
import csv
import sys
# Set the directory to search
root_dir = './docs'
# Create a CSV file for output
output_file = 'links.csv'
with open(output_file, mode='w', newline='') as csvfile:
csv_writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
csv_writer.writerow(['File', 'Link'])
# Define a regular expression to match hyperlinks in Markdown syntax
regex = re.compile(r'\[.*?\]\((.*?)\)', re.IGNORECASE)
# Recursively traverse the directory structure and process each file
for subdir, dirs, files in os.walk(root_dir):
for file in files:
# Only process Markdown files
if file.endswith('.md'):
file_path = os.path.join(subdir, file)
sys.stderr.write("Processing file: {}\n".format(file_path))
with open(file_path, 'r') as md_file:
md_content = md_file.read()
for match in re.finditer(regex, md_content):
link = match.group(1)
print('LINK:', link)
with open(output_file, mode='a', newline='') as csvfile:
csv_writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
csv_writer.writerow([file_path, link])