| 1 | // Copyright (c) 2019-2021 Alexander Medvednikov. All rights reserved. |
| 2 | // Use of this source code is governed by a GPL license that can be found in the LICENSE file. |
| 3 | module highlight |
| 4 | |
| 5 | const tab = ' ' // ' |
| 6 | |
| 7 | // returns HTML code, number of lines, number of lines with source code |
| 8 | pub fn highlight_text(st string, file_path string, commit bool) (string, int, int) { |
| 9 | if !commit { |
| 10 | file_extension := extract_extension_from_file_path(file_path) |
| 11 | |
| 12 | if file_extension == 'md' { |
| 13 | return convert_markdown_to_html(st), 0, 0 |
| 14 | } else if file_extension == 'txt' { |
| 15 | return st, 0, 0 |
| 16 | } |
| 17 | } |
| 18 | |
| 19 | lang := extension_to_lang(file_path) or { Lang{} } |
| 20 | text := '${st} ' |
| 21 | mut res := []u8{cap: text.len} |
| 22 | mut lines := 0 |
| 23 | mut sloc := 0 |
| 24 | mut ss := u8(` `) |
| 25 | lc := lang.line_comments |
| 26 | mut mlc := '' |
| 27 | mut mlc_end := '' |
| 28 | if lang.mline_comments.len >= 2 { |
| 29 | mlc = lang.mline_comments[0] |
| 30 | mlc_end = lang.mline_comments[1] |
| 31 | } |
| 32 | res << '<table class="hl_table">'.bytes() |
| 33 | res << `\n` |
| 34 | if !is_single_line(st) { |
| 35 | res << '<tr><td><a id="1" class="no_select" href="#1">1</a></td><td>'.bytes() |
| 36 | lines++ |
| 37 | } |
| 38 | mut in_comment := false |
| 39 | mut in_line_comment := false |
| 40 | mut in_string := false |
| 41 | mut runes := text.bytes() |
| 42 | for pos := 0; pos < runes.len - 1; pos++ { |
| 43 | mut c := runes[pos] |
| 44 | if c == `\n` { |
| 45 | lines++ |
| 46 | if commit { |
| 47 | mut class := '' |
| 48 | if runes[pos + 1] == `+` { |
| 49 | class = 'class="a"' |
| 50 | } else if runes[pos + 1] == `-` { |
| 51 | class = 'class="d"' |
| 52 | } |
| 53 | res << '</td></tr>\n<tr><td><a id="${lines}" class="no_select" href="#${lines}">${lines}</a></td><td ${class}>'.bytes() |
| 54 | } else { |
| 55 | res << '</td></tr>\n<tr><td><a id="${lines}" class="no_select" href="#${lines}">${lines}</a></td><td>'.bytes() |
| 56 | } |
| 57 | if in_line_comment { |
| 58 | in_line_comment = false |
| 59 | res << '</i>'.bytes() |
| 60 | } |
| 61 | if in_comment { |
| 62 | res << '<i>'.bytes() |
| 63 | } |
| 64 | if !in_comment && !in_line_comment && runes[pos + 1] != `\n` { |
| 65 | sloc++ |
| 66 | } |
| 67 | continue |
| 68 | } |
| 69 | if c == `\t` { |
| 70 | res << tab.bytes() |
| 71 | continue |
| 72 | } |
| 73 | if in_comment { |
| 74 | res << write(c) |
| 75 | if c == mlc_end[0] && is_line_comment(runes, pos, mlc_end) { |
| 76 | in_comment = false |
| 77 | res << runes[pos + 1] |
| 78 | pos++ |
| 79 | res << '</i>'.bytes() |
| 80 | } |
| 81 | continue |
| 82 | } |
| 83 | if in_line_comment { |
| 84 | res << write(c) |
| 85 | continue |
| 86 | } |
| 87 | if in_string { |
| 88 | res << write(c) |
| 89 | if runes[pos - 1] == `\\` && ss == `"` { |
| 90 | continue |
| 91 | } |
| 92 | if c == ss { |
| 93 | in_string = false |
| 94 | res << '</u>'.bytes() |
| 95 | } |
| 96 | continue |
| 97 | } |
| 98 | if is_letter(c, lang) { |
| 99 | word_start := pos |
| 100 | for is_letter(c, lang) { |
| 101 | pos++ |
| 102 | c = runes[pos] |
| 103 | } |
| 104 | delta := pos - word_start |
| 105 | mut data := []u8{} |
| 106 | for i in 0 .. delta { |
| 107 | data << runes[word_start + i] |
| 108 | } |
| 109 | w := data.bytestr() |
| 110 | pos-- |
| 111 | if w in lang.keywords { |
| 112 | res << '<b>${w}</b>'.bytes() |
| 113 | } else { |
| 114 | res << w.bytes() |
| 115 | } |
| 116 | continue |
| 117 | } |
| 118 | if is_string_token(c, lang) { |
| 119 | in_string = true |
| 120 | ss = c |
| 121 | res << '<u>'.bytes() |
| 122 | } else if mlc != '' && c == mlc.bytes()[0] && is_line_comment(runes, pos, mlc) { |
| 123 | in_comment = true |
| 124 | res << '<i>'.bytes() |
| 125 | } else if lc != '' && c == lc.bytes()[0] && is_line_comment(runes, pos, lc) { |
| 126 | in_line_comment = true |
| 127 | res << '<i>'.bytes() |
| 128 | } |
| 129 | res << write(c) |
| 130 | } |
| 131 | res << '</tr>'.bytes() |
| 132 | res << '</table>'.bytes() |
| 133 | return res.bytestr(), lines, sloc |
| 134 | } |
| 135 | |
| 136 | // highlight_line returns HTML-escaped, syntax-highlighted markup for a |
| 137 | // single line of source code. It is stateless across calls (does not |
| 138 | // track multi-line strings or block comments), so it suits diff rendering |
| 139 | // where each line is colored independently. |
| 140 | pub fn highlight_line(content string, file_path string) string { |
| 141 | if content.len == 0 { |
| 142 | return '' |
| 143 | } |
| 144 | lang := extension_to_lang(file_path) or { return escape_html(content) } |
| 145 | lc := lang.line_comments |
| 146 | mut mlc := '' |
| 147 | if lang.mline_comments.len >= 2 { |
| 148 | mlc = lang.mline_comments[0] |
| 149 | } |
| 150 | runes := content.bytes() |
| 151 | mut res := []u8{cap: runes.len + 16} |
| 152 | mut in_string := false |
| 153 | mut ss := u8(` `) |
| 154 | mut in_line_comment := false |
| 155 | for pos := 0; pos < runes.len; pos++ { |
| 156 | mut c := runes[pos] |
| 157 | if in_line_comment { |
| 158 | res << write(c) |
| 159 | continue |
| 160 | } |
| 161 | if in_string { |
| 162 | res << write(c) |
| 163 | if pos > 0 && runes[pos - 1] == `\\` && ss == `"` { |
| 164 | continue |
| 165 | } |
| 166 | if c == ss { |
| 167 | in_string = false |
| 168 | res << '</u>'.bytes() |
| 169 | } |
| 170 | continue |
| 171 | } |
| 172 | if is_letter(c, lang) { |
| 173 | word_start := pos |
| 174 | for pos < runes.len && is_letter(runes[pos], lang) { |
| 175 | pos++ |
| 176 | } |
| 177 | w := runes[word_start..pos].bytestr() |
| 178 | pos-- |
| 179 | if w in lang.keywords { |
| 180 | res << '<b>'.bytes() |
| 181 | res << w.bytes() |
| 182 | res << '</b>'.bytes() |
| 183 | } else { |
| 184 | res << w.bytes() |
| 185 | } |
| 186 | continue |
| 187 | } |
| 188 | if is_string_token(c, lang) { |
| 189 | in_string = true |
| 190 | ss = c |
| 191 | res << '<u>'.bytes() |
| 192 | res << write(c) |
| 193 | continue |
| 194 | } |
| 195 | if mlc != '' && c == mlc[0] && pos + mlc.len <= runes.len |
| 196 | && is_line_comment(runes, pos, mlc) { |
| 197 | in_line_comment = true |
| 198 | res << '<i>'.bytes() |
| 199 | res << write(c) |
| 200 | continue |
| 201 | } |
| 202 | if lc != '' && c == lc[0] && pos + lc.len <= runes.len && is_line_comment(runes, pos, lc) { |
| 203 | in_line_comment = true |
| 204 | res << '<i>'.bytes() |
| 205 | res << write(c) |
| 206 | continue |
| 207 | } |
| 208 | res << write(c) |
| 209 | } |
| 210 | if in_line_comment { |
| 211 | res << '</i>'.bytes() |
| 212 | } |
| 213 | if in_string { |
| 214 | res << '</u>'.bytes() |
| 215 | } |
| 216 | return res.bytestr() |
| 217 | } |
| 218 | |
| 219 | fn escape_html(s string) string { |
| 220 | mut res := []u8{cap: s.len} |
| 221 | for i in 0 .. s.len { |
| 222 | c := s[i] |
| 223 | if c == `<` { |
| 224 | res << '<'.bytes() |
| 225 | } else if c == `>` { |
| 226 | res << '>'.bytes() |
| 227 | } else if c == `&` { |
| 228 | res << '&'.bytes() |
| 229 | } else { |
| 230 | res << c |
| 231 | } |
| 232 | } |
| 233 | return res.bytestr() |
| 234 | } |
| 235 | |
| 236 | fn write(c u8) []u8 { |
| 237 | mut tmp := []u8{} |
| 238 | if c == `<` { |
| 239 | tmp << '<'.bytes() |
| 240 | } else if c == `>` { |
| 241 | tmp << '>'.bytes() |
| 242 | } else { |
| 243 | tmp << c |
| 244 | } |
| 245 | return tmp |
| 246 | } |
| 247 | |
| 248 | fn is_letter(c u8, lang Lang) bool { |
| 249 | name := lang.name.to_lower() |
| 250 | if (name == 'cpp' || name == 'c' || name == 'd' || name == 'swift') && c == `#` { |
| 251 | return true |
| 252 | } |
| 253 | return c.is_letter() || c == `_` |
| 254 | } |
| 255 | |
| 256 | fn is_string_token(c u8, lang Lang) bool { |
| 257 | for val in lang.string_start { |
| 258 | if c == val[0] { |
| 259 | return true |
| 260 | } |
| 261 | } |
| 262 | return false |
| 263 | } |
| 264 | |
| 265 | fn is_line_comment(s []u8, pos int, lc string) bool { |
| 266 | for i, b in lc { |
| 267 | if s[pos + i] != b { |
| 268 | return false |
| 269 | } |
| 270 | } |
| 271 | return true |
| 272 | } |
| 273 | |
| 274 | fn is_single_line(s string) bool { |
| 275 | mut cnt := 0 |
| 276 | for i in 0 .. s.len { |
| 277 | if s[i] == `\n` { |
| 278 | cnt++ |
| 279 | if cnt > 1 { |
| 280 | return false |
| 281 | } |
| 282 | } |
| 283 | } |
| 284 | return true |
| 285 | } |
| 286 | |
| 287 | fn extract_extension_from_file_path(path string) string { |
| 288 | return path.split('.').last().to_lower() |
| 289 | } |
| 290 | |