]> cat aescling's git repositories - mastodon.git/blob - app/lib/formatter.rb
Optimize some regex matching (#15528)
[mastodon.git] / app / lib / formatter.rb
1 # frozen_string_literal: true
2
3 require 'singleton'
4 require_relative './sanitize_config'
5
6 class Formatter
7 include Singleton
8 include RoutingHelper
9
10 include ActionView::Helpers::TextHelper
11
12 def format(status, **options)
13 if status.reblog?
14 prepend_reblog = status.reblog.account.acct
15 status = status.proper
16 else
17 prepend_reblog = false
18 end
19
20 raw_content = status.text
21
22 if options[:inline_poll_options] && status.preloadable_poll
23 raw_content = raw_content + "\n\n" + status.preloadable_poll.options.map { |title| "[ ] #{title}" }.join("\n")
24 end
25
26 return '' if raw_content.blank?
27
28 unless status.local?
29 html = reformat(raw_content)
30 html = encode_custom_emojis(html, status.emojis, options[:autoplay]) if options[:custom_emojify]
31 return html.html_safe # rubocop:disable Rails/OutputSafety
32 end
33
34 linkable_accounts = status.active_mentions.map(&:account)
35 linkable_accounts << status.account
36
37 html = raw_content
38 html = "RT @#{prepend_reblog} #{html}" if prepend_reblog
39 html = encode_and_link_urls(html, linkable_accounts)
40 html = encode_custom_emojis(html, status.emojis, options[:autoplay]) if options[:custom_emojify]
41 html = simple_format(html, {}, sanitize: false)
42 html = html.delete("\n")
43
44 html.html_safe # rubocop:disable Rails/OutputSafety
45 end
46
47 def reformat(html)
48 sanitize(html, Sanitize::Config::MASTODON_STRICT)
49 rescue ArgumentError
50 ''
51 end
52
53 def plaintext(status)
54 return status.text if status.local?
55
56 text = status.text.gsub(/(<br \/>|<br>|<\/p>)+/) { |match| "#{match}\n" }
57 strip_tags(text)
58 end
59
60 def simplified_format(account, **options)
61 html = account.local? ? linkify(account.note) : reformat(account.note)
62 html = encode_custom_emojis(html, account.emojis, options[:autoplay]) if options[:custom_emojify]
63 html.html_safe # rubocop:disable Rails/OutputSafety
64 end
65
66 def sanitize(html, config)
67 Sanitize.fragment(html, config)
68 end
69
70 def format_spoiler(status, **options)
71 html = encode(status.spoiler_text)
72 html = encode_custom_emojis(html, status.emojis, options[:autoplay])
73 html.html_safe # rubocop:disable Rails/OutputSafety
74 end
75
76 def format_poll_option(status, option, **options)
77 html = encode(option.title)
78 html = encode_custom_emojis(html, status.emojis, options[:autoplay])
79 html.html_safe # rubocop:disable Rails/OutputSafety
80 end
81
82 def format_display_name(account, **options)
83 html = encode(account.display_name.presence || account.username)
84 html = encode_custom_emojis(html, account.emojis, options[:autoplay]) if options[:custom_emojify]
85 html.html_safe # rubocop:disable Rails/OutputSafety
86 end
87
88 def format_field(account, str, **options)
89 html = account.local? ? encode_and_link_urls(str, me: true) : reformat(str)
90 html = encode_custom_emojis(html, account.emojis, options[:autoplay]) if options[:custom_emojify]
91 html.html_safe # rubocop:disable Rails/OutputSafety
92 end
93
94 def linkify(text)
95 html = encode_and_link_urls(text)
96 html = simple_format(html, {}, sanitize: false)
97 html = html.delete("\n")
98
99 html.html_safe # rubocop:disable Rails/OutputSafety
100 end
101
102 private
103
104 def html_entities
105 @html_entities ||= HTMLEntities.new
106 end
107
108 def encode(html)
109 html_entities.encode(html)
110 end
111
112 def encode_and_link_urls(html, accounts = nil, options = {})
113 entities = utf8_friendly_extractor(html, extract_url_without_protocol: false)
114
115 if accounts.is_a?(Hash)
116 options = accounts
117 accounts = nil
118 end
119
120 rewrite(html.dup, entities) do |entity|
121 if entity[:url]
122 link_to_url(entity, options)
123 elsif entity[:hashtag]
124 link_to_hashtag(entity)
125 elsif entity[:screen_name]
126 link_to_mention(entity, accounts)
127 end
128 end
129 end
130
131 def count_tag_nesting(tag)
132 if tag[1] == '/' then -1
133 elsif tag[-2] == '/' then 0
134 else 1
135 end
136 end
137
138 # rubocop:disable Metrics/BlockNesting
139 def encode_custom_emojis(html, emojis, animate = false)
140 return html if emojis.empty?
141
142 emoji_map = emojis.each_with_object({}) { |e, h| h[e.shortcode] = [full_asset_url(e.image.url), full_asset_url(e.image.url(:static))] }
143
144 i = -1
145 tag_open_index = nil
146 inside_shortname = false
147 shortname_start_index = -1
148 invisible_depth = 0
149
150 while i + 1 < html.size
151 i += 1
152
153 if invisible_depth.zero? && inside_shortname && html[i] == ':'
154 shortcode = html[shortname_start_index + 1..i - 1]
155 emoji = emoji_map[shortcode]
156
157 if emoji
158 original_url, static_url = emoji
159 replacement = begin
160 if animate
161 "<img draggable=\"false\" class=\"emojione\" alt=\":#{encode(shortcode)}:\" title=\":#{encode(shortcode)}:\" src=\"#{encode(original_url)}\" />"
162 else
163 "<img draggable=\"false\" class=\"emojione custom-emoji\" alt=\":#{encode(shortcode)}:\" title=\":#{encode(shortcode)}:\" src=\"#{encode(static_url)}\" data-original=\"#{original_url}\" data-static=\"#{static_url}\" />"
164 end
165 end
166 before_html = shortname_start_index.positive? ? html[0..shortname_start_index - 1] : ''
167 html = before_html + replacement + html[i + 1..-1]
168 i += replacement.size - (shortcode.size + 2) - 1
169 else
170 i -= 1
171 end
172
173 inside_shortname = false
174 elsif tag_open_index && html[i] == '>'
175 tag = html[tag_open_index..i]
176 tag_open_index = nil
177 if invisible_depth.positive?
178 invisible_depth += count_tag_nesting(tag)
179 elsif tag == '<span class="invisible">'
180 invisible_depth = 1
181 end
182 elsif html[i] == '<'
183 tag_open_index = i
184 inside_shortname = false
185 elsif !tag_open_index && html[i] == ':'
186 inside_shortname = true
187 shortname_start_index = i
188 end
189 end
190
191 html
192 end
193 # rubocop:enable Metrics/BlockNesting
194
195 def rewrite(text, entities)
196 text = text.to_s
197
198 # Sort by start index
199 entities = entities.sort_by do |entity|
200 indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices]
201 indices.first
202 end
203
204 result = []
205
206 last_index = entities.reduce(0) do |index, entity|
207 indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices]
208 result << encode(text[index...indices.first])
209 result << yield(entity)
210 indices.last
211 end
212
213 result << encode(text[last_index..-1])
214
215 result.flatten.join
216 end
217
218 UNICODE_ESCAPE_BLACKLIST_RE = /\p{Z}|\p{P}/
219
220 def utf8_friendly_extractor(text, options = {})
221 old_to_new_index = [0]
222
223 escaped = text.chars.map do |c|
224 output = begin
225 if c.ord.to_s(16).length > 2 && !UNICODE_ESCAPE_BLACKLIST_RE.match?(c)
226 CGI.escape(c)
227 else
228 c
229 end
230 end
231
232 old_to_new_index << old_to_new_index.last + output.length
233
234 output
235 end.join
236
237 # Note: I couldn't obtain list_slug with @user/list-name format
238 # for mention so this requires additional check
239 special = Extractor.extract_urls_with_indices(escaped, options).map do |extract|
240 new_indices = [
241 old_to_new_index.find_index(extract[:indices].first),
242 old_to_new_index.find_index(extract[:indices].last),
243 ]
244
245 next extract.merge(
246 indices: new_indices,
247 url: text[new_indices.first..new_indices.last - 1]
248 )
249 end
250
251 standard = Extractor.extract_entities_with_indices(text, options)
252 extra = Extractor.extract_extra_uris_with_indices(text, options)
253
254 Extractor.remove_overlapping_entities(special + standard + extra)
255 end
256
257 def link_to_url(entity, options = {})
258 url = Addressable::URI.parse(entity[:url])
259 html_attrs = { target: '_blank', rel: 'nofollow noopener noreferrer' }
260
261 html_attrs[:rel] = "me #{html_attrs[:rel]}" if options[:me]
262
263 Twitter::Autolink.send(:link_to_text, entity, link_html(entity[:url]), url, html_attrs)
264 rescue Addressable::URI::InvalidURIError, IDN::Idna::IdnaError
265 encode(entity[:url])
266 end
267
268 def link_to_mention(entity, linkable_accounts)
269 acct = entity[:screen_name]
270
271 return link_to_account(acct) unless linkable_accounts
272
273 account = linkable_accounts.find { |item| TagManager.instance.same_acct?(item.acct, acct) }
274 account ? mention_html(account) : "@#{encode(acct)}"
275 end
276
277 def link_to_account(acct)
278 username, domain = acct.split('@')
279
280 domain = nil if TagManager.instance.local_domain?(domain)
281 account = EntityCache.instance.mention(username, domain)
282
283 account ? mention_html(account) : "@#{encode(acct)}"
284 end
285
286 def link_to_hashtag(entity)
287 hashtag_html(entity[:hashtag])
288 end
289
290 def link_html(url)
291 url = Addressable::URI.parse(url).to_s
292 prefix = url.match(/\A(https?:\/\/(www\.)?|xmpp:)/).to_s
293 text = url[prefix.length, 30]
294 suffix = url[prefix.length + 30..-1]
295 cutoff = url[prefix.length..-1].length > 30
296
297 "<span class=\"invisible\">#{encode(prefix)}</span><span class=\"#{cutoff ? 'ellipsis' : ''}\">#{encode(text)}</span><span class=\"invisible\">#{encode(suffix)}</span>"
298 end
299
300 def hashtag_html(tag)
301 "<a href=\"#{encode(tag_url(tag))}\" class=\"mention hashtag\" rel=\"tag\">#<span>#{encode(tag)}</span></a>"
302 end
303
304 def mention_html(account)
305 "<span class=\"h-card\"><a href=\"#{encode(ActivityPub::TagManager.instance.url_for(account))}\" class=\"u-url mention\">@<span>#{encode(account.username)}</span></a></span>"
306 end
307 end
This page took 0.248816 seconds and 4 git commands to generate.