-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathwrap.awk
152 lines (133 loc) · 4.11 KB
/
wrap.awk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#! /usr/bin/awk -f
#
# Like the Un*x `fmt` command, but for markdown.
#
# https://github.com/wernsey/d.awk
#
# It does its best to preserve markdown headings,
# lists, pre-formatted code blocks and block quotes.
#
# (c) 2016 Werner Stoop
# Copying and distribution of this file, with or without modification,
# are permitted in any medium without royalty provided the copyright
# notice and this notice are preserved. This file is offered as-is,
# without any warranty.
BEGIN { if(!Width) Width = 80; }
# Preserve headings
/^[[:space:]]*[=\-][=\-][=\-]+/ {
if(Buf)
Out = Out Buf "\n" $0 "\n";
else
Out = Out $0 "\n";
Buf = "";
next;
}
# Preformatted text is sent to the output verbatim
# Ditto for links/abbreviations
/^( |\t)+/ && match(last,/^[[:space:]]*$/) || /^[[:space:]]*\*?\[.*\]:/ {
Out = Out $0 "\n";
next;
}
# GitHub-style ``` code blocks:
/^```/ {
Out = Out Buf "\n" $0 "\n";
Buf = "";
Code = !Code;
next;
}
Code {
Out = Out $0 "\n";
next;
}
$0 !~ /^[[:space:]]*$/ && match(last,/^[[:space:]]*$/) {
# how much is the current line indented
match(str, /^[[:space:]]+/);
Indent = substr(str, 1, RLENGTH);
}
# Blank lines cause blank lines in the output
/^[[:space:]]*$/ {
# You need to preserve $0 for blank lines in preformatted blocks
if(Buf)
Out = Out Buf "\n" $0 "\n";
else
Out = Out "\n" $0;
Buf = "";
last = $0;
InList = 0;
next;
}
# Handle every other input line
{ last = $0; fmt($0); }
# Write output when done
END {
Out = Out Buf;
print Out;
}
function fmt(str, loc,word,n,indent) {
# Get the current line's indentation level.
match(str, /^[[:space:]]+/);
indent = substr(str, 1, RLENGTH);
# Trim leading whitespace
str = substr(str, RLENGTH+1);
gsub(/\r/, "", str); # Windows :(
# Lines starting with list item characters
# force a line break in the output
if(match(str,/^([*+\-]|[[:digit:]]+\.)/)) {
if(Buf) Out = Out Buf "\n";
Buf = "";
# Preserve the indentation in the global Indent
# if it is a list that is going to be split.
Indent = indent;
InList = 1; # remember we're in a list on subsequent calls.
} else if(match(str,/^>[[:space:]]+/)) {
Indent = indent substr(str, 1, RLENGTH);
str = substr(str,RLENGTH+1);
}
# Current indentation level = global Indent
indent = Indent;
# This implements the simple algorithm from the wikipedia
# https://en.wikipedia.org/wiki/Line_wrap_and_word_wrap
# There is a better way. See #Minimum_raggedness on that wiki page.
# The C code example on https://www.rosettacode.org/wiki/Word_wrap#C
# may actually be easy to port to Awk (the Awk version on that page
# implements the greedy algorithm, like I do here).
loc = match(str, /[[:space:]]+/);
while(loc) {
word = substr(str, 1, RSTART-1);
n = RSTART+RLENGTH;
# Handle forced line breaks
if(match(str,/( |[[:space:]]+\\)$/) == loc) {
if(length(Buf) + length(str) + 1 >= Width)
Out = Out Buf "\n" indent str "\n" indent;
else
Out = Out Buf " " str "\n" indent;
Buf = "";
return;
}
# If the buffer + the word exceeds the allowed width
# then insert a line break. Otherwise, just append the
# word to the buffer.
# Also, preserve the indentation.
if(length(Buf) + length(word) + 1 >= Width) {
Out = Out Buf "\n";
if(InList) indent = Indent " ";
Buf = indent word;
} else if(length(Buf))
Buf = Buf " " word;
else
Buf = indent word;
str = substr(str, n);
loc = match(str, /[[:space:]]+/);
}
# Append the remainder of str to Buf
if(length(str)) {
if(length(Buf) + length(str) + 1 >= Width) {
Out = Out Buf "\n";
if(InList) indent = Indent " ";
Buf = indent str;
} else if(length(Buf))
Buf = Buf " " str;
else
Buf = indent str;
}
}