-
-
Notifications
You must be signed in to change notification settings - Fork 25
/
Copy pathmd_convert.py
60 lines (50 loc) · 2.22 KB
/
md_convert.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import re
import base64
import requests
import sys
def convert_images_to_base64(markdown_file, output_file):
with open(markdown_file, 'r', encoding='utf-8') as file:
content = file.read()
# Find Markdown image syntax: ![alt-text](image-url)
markdown_pattern = r"!\[(.*?)\]\((.*?)\)"
markdown_matches = re.findall(markdown_pattern, content)
# Find HTML <img> tags
img_tag_pattern = r'<img[^>]+src=["\'](.*?)["\']'
img_tag_matches = re.findall(img_tag_pattern, content)
# Combine all matches with their types
matches = [{'type': 'markdown', 'alt': alt, 'url': url} for alt, url in markdown_matches]
matches += [{'type': 'html', 'url': url} for url in img_tag_matches]
for match in matches:
try:
img_url = match['url']
# Download the image
response = requests.get(img_url)
response.raise_for_status()
img_data = response.content
# Get the image MIME type
mime_type = response.headers.get('Content-Type', 'image/png') # Default to PNG
# Convert to base64
base64_data = base64.b64encode(img_data).decode('utf-8')
base64_string = f"data:{mime_type};base64,{base64_data}"
# Replace the URL with base64 data
if match['type'] == 'markdown':
alt_text = match['alt']
original = f"![{alt_text}]({img_url})"
replacement = f"![{alt_text}]({base64_string})"
content = content.replace(original, replacement)
elif match['type'] == 'html':
original = f'src="{img_url}"'
replacement = f'src="{base64_string}"'
content = content.replace(original, replacement)
except Exception as e:
print(f"Failed to process image {img_url}: {e}")
# Write the updated content to a new file
with open(output_file, 'w', encoding='utf-8') as file:
file.write(content)
print(f"Updated markdown saved to {output_file}")
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python md_convert.py input.md")
sys.exit(1)
in_file = sys.argv[1]
convert_images_to_base64(in_file, f"{in_file}_output.md")