ggdgsdbsdbbb / highlight / highlight.v
289 lines · 277 sloc · 6.25 KB · cbcf1476fe0844712412e7fa72a04a7c5e03b64c
Raw
1// Copyright (c) 2019-2021 Alexander Medvednikov. All rights reserved.
2// Use of this source code is governed by a GPL license that can be found in the LICENSE file.
3module highlight
4
5const tab = ' ' // '
6
7// returns HTML code, number of lines, number of lines with source code
8pub fn highlight_text(st string, file_path string, commit bool) (string, int, int) {
9 if !commit {
10 file_extension := extract_extension_from_file_path(file_path)
11
12 if file_extension == 'md' {
13 return convert_markdown_to_html(st), 0, 0
14 } else if file_extension == 'txt' {
15 return st, 0, 0
16 }
17 }
18
19 lang := extension_to_lang(file_path) or { Lang{} }
20 text := '${st} '
21 mut res := []u8{cap: text.len}
22 mut lines := 0
23 mut sloc := 0
24 mut ss := u8(` `)
25 lc := lang.line_comments
26 mut mlc := ''
27 mut mlc_end := ''
28 if lang.mline_comments.len >= 2 {
29 mlc = lang.mline_comments[0]
30 mlc_end = lang.mline_comments[1]
31 }
32 res << '<table class="hl_table">'.bytes()
33 res << `\n`
34 if !is_single_line(st) {
35 res << '<tr><td><a id="1" class="no_select" href="#1">1</a></td><td>'.bytes()
36 lines++
37 }
38 mut in_comment := false
39 mut in_line_comment := false
40 mut in_string := false
41 mut runes := text.bytes()
42 for pos := 0; pos < runes.len - 1; pos++ {
43 mut c := runes[pos]
44 if c == `\n` {
45 lines++
46 if commit {
47 mut class := ''
48 if runes[pos + 1] == `+` {
49 class = 'class="a"'
50 } else if runes[pos + 1] == `-` {
51 class = 'class="d"'
52 }
53 res << '</td></tr>\n<tr><td><a id="${lines}" class="no_select" href="#${lines}">${lines}</a></td><td ${class}>'.bytes()
54 } else {
55 res << '</td></tr>\n<tr><td><a id="${lines}" class="no_select" href="#${lines}">${lines}</a></td><td>'.bytes()
56 }
57 if in_line_comment {
58 in_line_comment = false
59 res << '</i>'.bytes()
60 }
61 if in_comment {
62 res << '<i>'.bytes()
63 }
64 if !in_comment && !in_line_comment && runes[pos + 1] != `\n` {
65 sloc++
66 }
67 continue
68 }
69 if c == `\t` {
70 res << tab.bytes()
71 continue
72 }
73 if in_comment {
74 res << write(c)
75 if c == mlc_end[0] && is_line_comment(runes, pos, mlc_end) {
76 in_comment = false
77 res << runes[pos + 1]
78 pos++
79 res << '</i>'.bytes()
80 }
81 continue
82 }
83 if in_line_comment {
84 res << write(c)
85 continue
86 }
87 if in_string {
88 res << write(c)
89 if runes[pos - 1] == `\\` && ss == `"` {
90 continue
91 }
92 if c == ss {
93 in_string = false
94 res << '</u>'.bytes()
95 }
96 continue
97 }
98 if is_letter(c, lang) {
99 word_start := pos
100 for is_letter(c, lang) {
101 pos++
102 c = runes[pos]
103 }
104 delta := pos - word_start
105 mut data := []u8{}
106 for i in 0 .. delta {
107 data << runes[word_start + i]
108 }
109 w := data.bytestr()
110 pos--
111 if w in lang.keywords {
112 res << '<b>${w}</b>'.bytes()
113 } else {
114 res << w.bytes()
115 }
116 continue
117 }
118 if is_string_token(c, lang) {
119 in_string = true
120 ss = c
121 res << '<u>'.bytes()
122 } else if mlc != '' && c == mlc.bytes()[0] && is_line_comment(runes, pos, mlc) {
123 in_comment = true
124 res << '<i>'.bytes()
125 } else if lc != '' && c == lc.bytes()[0] && is_line_comment(runes, pos, lc) {
126 in_line_comment = true
127 res << '<i>'.bytes()
128 }
129 res << write(c)
130 }
131 res << '</tr>'.bytes()
132 res << '</table>'.bytes()
133 return res.bytestr(), lines, sloc
134}
135
136// highlight_line returns HTML-escaped, syntax-highlighted markup for a
137// single line of source code. It is stateless across calls (does not
138// track multi-line strings or block comments), so it suits diff rendering
139// where each line is colored independently.
140pub fn highlight_line(content string, file_path string) string {
141 if content.len == 0 {
142 return ''
143 }
144 lang := extension_to_lang(file_path) or { return escape_html(content) }
145 lc := lang.line_comments
146 mut mlc := ''
147 if lang.mline_comments.len >= 2 {
148 mlc = lang.mline_comments[0]
149 }
150 runes := content.bytes()
151 mut res := []u8{cap: runes.len + 16}
152 mut in_string := false
153 mut ss := u8(` `)
154 mut in_line_comment := false
155 for pos := 0; pos < runes.len; pos++ {
156 mut c := runes[pos]
157 if in_line_comment {
158 res << write(c)
159 continue
160 }
161 if in_string {
162 res << write(c)
163 if pos > 0 && runes[pos - 1] == `\\` && ss == `"` {
164 continue
165 }
166 if c == ss {
167 in_string = false
168 res << '</u>'.bytes()
169 }
170 continue
171 }
172 if is_letter(c, lang) {
173 word_start := pos
174 for pos < runes.len && is_letter(runes[pos], lang) {
175 pos++
176 }
177 w := runes[word_start..pos].bytestr()
178 pos--
179 if w in lang.keywords {
180 res << '<b>'.bytes()
181 res << w.bytes()
182 res << '</b>'.bytes()
183 } else {
184 res << w.bytes()
185 }
186 continue
187 }
188 if is_string_token(c, lang) {
189 in_string = true
190 ss = c
191 res << '<u>'.bytes()
192 res << write(c)
193 continue
194 }
195 if mlc != '' && c == mlc[0] && pos + mlc.len <= runes.len
196 && is_line_comment(runes, pos, mlc) {
197 in_line_comment = true
198 res << '<i>'.bytes()
199 res << write(c)
200 continue
201 }
202 if lc != '' && c == lc[0] && pos + lc.len <= runes.len && is_line_comment(runes, pos, lc) {
203 in_line_comment = true
204 res << '<i>'.bytes()
205 res << write(c)
206 continue
207 }
208 res << write(c)
209 }
210 if in_line_comment {
211 res << '</i>'.bytes()
212 }
213 if in_string {
214 res << '</u>'.bytes()
215 }
216 return res.bytestr()
217}
218
219fn escape_html(s string) string {
220 mut res := []u8{cap: s.len}
221 for i in 0 .. s.len {
222 c := s[i]
223 if c == `<` {
224 res << '<'.bytes()
225 } else if c == `>` {
226 res << '>'.bytes()
227 } else if c == `&` {
228 res << '&'.bytes()
229 } else {
230 res << c
231 }
232 }
233 return res.bytestr()
234}
235
236fn write(c u8) []u8 {
237 mut tmp := []u8{}
238 if c == `<` {
239 tmp << '<'.bytes()
240 } else if c == `>` {
241 tmp << '>'.bytes()
242 } else {
243 tmp << c
244 }
245 return tmp
246}
247
248fn is_letter(c u8, lang Lang) bool {
249 name := lang.name.to_lower()
250 if (name == 'cpp' || name == 'c' || name == 'd' || name == 'swift') && c == `#` {
251 return true
252 }
253 return c.is_letter() || c == `_`
254}
255
256fn is_string_token(c u8, lang Lang) bool {
257 for val in lang.string_start {
258 if c == val[0] {
259 return true
260 }
261 }
262 return false
263}
264
265fn is_line_comment(s []u8, pos int, lc string) bool {
266 for i, b in lc {
267 if s[pos + i] != b {
268 return false
269 }
270 }
271 return true
272}
273
274fn is_single_line(s string) bool {
275 mut cnt := 0
276 for i in 0 .. s.len {
277 if s[i] == `\n` {
278 cnt++
279 if cnt > 1 {
280 return false
281 }
282 }
283 }
284 return true
285}
286
287fn extract_extension_from_file_path(path string) string {
288 return path.split('.').last().to_lower()
289}
290