-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
228 lines (186 loc) · 7.27 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
#!/usr/bin/env python3
"""Convert your Obsidian vault to Org-roam compatible org-files."""
# For command - line argument parsing
import argparse
# Multiprocessing
import multiprocessing as mp
# Traverse all files
import os
# Regular expressions, for finding when the YAML begins and ends in markdown
import re
# Convert from markdown to org
import pypandoc
# For yaml in markdown files
import yaml
def get_yaml_data(file_path):
"""Get the YAML data from a Markdown file."""
with open(file_path, "r") as file:
content = file.read()
yaml_data = re.findall(r"(?<=---\n).*?(?=\n---)", content, re.DOTALL)[0]
return yaml.load(yaml_data, Loader=yaml.FullLoader)
def get_org_roam_file_name(yaml_data, file_name):
"""Convert datetime.date to proper org-roam format."""
return (
yaml_data["created"].strftime("%Y%m%d%H%M%S")
+ "-"
+ file_name.replace(" ", "_").lower().replace(".md", "")
+ ".org"
)
def construct_header(yaml):
"""Construct the header for the org file."""
if yaml["aliases"] is not None and None not in yaml["aliases"]:
if len(yaml["aliases"]) >= 1:
alias = get_aliases_roam_format(yaml["aliases"])
else:
alias = ""
else:
alias = ""
result = """:PROPERTIES:
:ID: {}
:ROAM_ALIASES: {}
:END:\n""".format(
yaml["title"].lower().replace(" ", "-").rstrip(".md"), alias
)
return result
def convert_to_org(file_path, output_path):
"""Convert file from markdown to org and print location."""
pypandoc.convert_file(
file_path, "org", outputfile=output_path, extra_args=["--wrap=none"]
)
print(file_path + " -> " + output_path)
def convert_dict_to_string(alias_dict):
"""Convert a dictionary to a string."""
return ", ".join(f'"{key}: {value}"' for key, value in alias_dict.items())
def get_aliases_roam_format(aliases):
"""Create a string for aliases property."""
# I apologize for these lines
# If the alias is a dictionary, convert it to a string
# If the alias is a string, check if it has spaces, if so, add quotes
# If the alias is a string without spaces, leave it as is
formatted_aliases = [
convert_dict_to_string(alias)
if isinstance(alias, dict)
else (f'"{alias}"' if " " in alias else alias)
for alias in aliases
]
print(formatted_aliases)
return f'{" ".join(formatted_aliases)}'
def add_math(org_file):
"""Add #+STARTUP: latexpreview to org_file header."""
with open(org_file, "r") as f:
lines = f.readlines()
with open(org_file, "w") as f:
for i, line in enumerate(lines):
if line.startswith(":END:"):
lines.insert(i + 1, "#+STARTUP: latexpreview\n")
break
f.write("".join(lines))
def add_tags(org_file, yaml):
"""Add tags to org_file header."""
if yaml is None or yaml["tags"] is None or not yaml["tags"] or None in yaml["tags"]:
return
with open(org_file, "r") as f:
lines = f.readlines()
with open(org_file, "w") as f:
for i, line in enumerate(lines):
if line.startswith(":END:"):
output = "#+filetags: :" + ":".join(yaml["tags"]) + ":"
print(output)
lines.insert(i + 1, f"{output}\n")
break
f.write("".join(lines))
def parse_commandline_arguments():
"""Parse the commandline arguments."""
parser = argparse.ArgumentParser(
description="""Convert your Obsidian vault to
Org-roam compatible org-files."""
)
parser.add_argument(
"input_folder",
help="Your Obsidian Vault (Remember to Backup)",
)
parser.add_argument(
"output_folder", help="The Folder which The Org Files will Output To"
)
parser.add_argument(
"--math",
help="Adds #+STARTUP: latexpreview to headers of all files.",
action="store_true",
)
# Maybe sometime add a --math-all, such that we take all, otherwise only
# those with math equations?
return parser.parse_args()
def is_markdown_file(filename):
"""Return true if file is a markdown file."""
_, ext = os.path.splitext(filename)
return ext.lower() in [".md", ".markdown"]
def remove_properties(file_path):
"""Remove properties from org file."""
with open(file_path, "r") as f:
file_text = f.read()
# find all occurrences of :PROPERTIES: and :END:
start = file_text.find(":PROPERTIES:")
end = file_text.find(":END:")
while start != -1 and end != -1:
# remove the text between :PROPERTIES: and :END:
file_text = file_text[:start] + file_text[end + 5 :]
# find the next occurrence of :PROPERTIES: and :END:
start = file_text.find(":PROPERTIES:")
end = file_text.find(":END:")
with open(file_path, "w") as f:
f.write(file_text)
def add_string_to_file_start(file_path, new_string):
"""Add string to start of file."""
with open(file_path, "r") as f:
file_text = f.read()
file_text = file_text.strip()
# Concatenate the new string with the contents of the file
updated_text = new_string + file_text
with open(file_path, "w") as f:
f.write(updated_text)
def convert_wikilinks_to_org_links(file_path):
"""Convert wikilinks to org links."""
with open(file_path, "r") as f:
file_text = f.read()
# Regular expression pattern for wikilinks with and without aliases
pattern = r"\[\[([^\]|]+)(?:\|([^\]]+))?\]\]"
def link_replacer(match):
link_id = match.group(1).replace(" ", "-").lower()
link_text = match.group(2) if match.group(2) else match.group(1)
return f"[[id:{link_id}][{link_text}]]"
# Replace wikilinks with org links using a custom replacement function
file_text = re.sub(pattern, link_replacer, file_text)
# Write the updated file text back to the file
with open(file_path, "w") as f:
f.write(file_text)
def process_file(filename, args):
"""Process a file."""
filepath = os.path.join(args.input_folder, filename)
if os.path.isfile(filepath):
# Convert!
if is_markdown_file(filename):
file_yaml = get_yaml_data(filepath)
new_path = args.output_folder + get_org_roam_file_name(file_yaml, filename)
convert_to_org(filepath, new_path)
remove_properties(new_path)
add_string_to_file_start(
new_path, ("#+title: " + file_yaml["title"] + "\n")
)
add_string_to_file_start(new_path, construct_header(file_yaml))
add_tags(new_path, file_yaml)
if args.math:
add_math(new_path)
convert_wikilinks_to_org_links(new_path)
elif os.path.isdir(filepath):
# Recursively traverse subdirectories
for subfilename in os.listdir(filepath):
process_file(subfilename, args)
def process_file_wrapper(args):
"""Wrap process_file to allow for multiprocessing."""
return process_file(*args)
if __name__ == "__main__":
args = parse_commandline_arguments()
filenames = os.listdir(args.input_folder)
# Using multiprocessing Pool
with mp.Pool(processes=mp.cpu_count()) as pool:
pool.map(process_file_wrapper, [(filename, args) for filename in filenames])