]> cat aescling's git repositories - mastodon.git/blob - app/lib/formatter.rb
Fix URL linkifier grabbing full-width spaces and quotations (#9997)
[mastodon.git] / app / lib / formatter.rb
1 # frozen_string_literal: true
2
3 require 'singleton'
4 require_relative './sanitize_config'
5
6 class Formatter
7 include Singleton
8 include RoutingHelper
9
10 include ActionView::Helpers::TextHelper
11
12 def format(status, **options)
13 if status.reblog?
14 prepend_reblog = status.reblog.account.acct
15 status = status.proper
16 else
17 prepend_reblog = false
18 end
19
20 raw_content = status.text
21
22 return '' if raw_content.blank?
23
24 unless status.local?
25 html = reformat(raw_content)
26 html = encode_custom_emojis(html, status.emojis, options[:autoplay]) if options[:custom_emojify]
27 return html.html_safe # rubocop:disable Rails/OutputSafety
28 end
29
30 linkable_accounts = status.active_mentions.map(&:account)
31 linkable_accounts << status.account
32
33 html = raw_content
34 html = "RT @#{prepend_reblog} #{html}" if prepend_reblog
35 html = encode_and_link_urls(html, linkable_accounts)
36 html = encode_custom_emojis(html, status.emojis, options[:autoplay]) if options[:custom_emojify]
37 html = simple_format(html, {}, sanitize: false)
38 html = html.delete("\n")
39
40 html.html_safe # rubocop:disable Rails/OutputSafety
41 end
42
43 def reformat(html)
44 sanitize(html, Sanitize::Config::MASTODON_STRICT)
45 end
46
47 def plaintext(status)
48 return status.text if status.local?
49
50 text = status.text.gsub(/(<br \/>|<br>|<\/p>)+/) { |match| "#{match}\n" }
51 strip_tags(text)
52 end
53
54 def simplified_format(account, **options)
55 html = account.local? ? linkify(account.note) : reformat(account.note)
56 html = encode_custom_emojis(html, account.emojis, options[:autoplay]) if options[:custom_emojify]
57 html.html_safe # rubocop:disable Rails/OutputSafety
58 end
59
60 def sanitize(html, config)
61 Sanitize.fragment(html, config)
62 end
63
64 def format_spoiler(status, **options)
65 html = encode(status.spoiler_text)
66 html = encode_custom_emojis(html, status.emojis, options[:autoplay])
67 html.html_safe # rubocop:disable Rails/OutputSafety
68 end
69
70 def format_display_name(account, **options)
71 html = encode(account.display_name.presence || account.username)
72 html = encode_custom_emojis(html, account.emojis, options[:autoplay]) if options[:custom_emojify]
73 html.html_safe # rubocop:disable Rails/OutputSafety
74 end
75
76 def format_field(account, str, **options)
77 return reformat(str).html_safe unless account.local? # rubocop:disable Rails/OutputSafety
78 html = encode_and_link_urls(str, me: true)
79 html = encode_custom_emojis(html, account.emojis, options[:autoplay]) if options[:custom_emojify]
80 html.html_safe # rubocop:disable Rails/OutputSafety
81 end
82
83 def linkify(text)
84 html = encode_and_link_urls(text)
85 html = simple_format(html, {}, sanitize: false)
86 html = html.delete("\n")
87
88 html.html_safe # rubocop:disable Rails/OutputSafety
89 end
90
91 private
92
93 def html_entities
94 @html_entities ||= HTMLEntities.new
95 end
96
97 def encode(html)
98 html_entities.encode(html)
99 end
100
101 def encode_and_link_urls(html, accounts = nil, options = {})
102 entities = utf8_friendly_extractor(html, extract_url_without_protocol: false)
103
104 if accounts.is_a?(Hash)
105 options = accounts
106 accounts = nil
107 end
108
109 rewrite(html.dup, entities) do |entity|
110 if entity[:url]
111 link_to_url(entity, options)
112 elsif entity[:hashtag]
113 link_to_hashtag(entity)
114 elsif entity[:screen_name]
115 link_to_mention(entity, accounts)
116 end
117 end
118 end
119
120 def count_tag_nesting(tag)
121 if tag[1] == '/' then -1
122 elsif tag[-2] == '/' then 0
123 else 1
124 end
125 end
126
127 def encode_custom_emojis(html, emojis, animate = false)
128 return html if emojis.empty?
129
130 emoji_map = if animate
131 emojis.each_with_object({}) { |e, h| h[e.shortcode] = full_asset_url(e.image.url) }
132 else
133 emojis.each_with_object({}) { |e, h| h[e.shortcode] = full_asset_url(e.image.url(:static)) }
134 end
135
136 i = -1
137 tag_open_index = nil
138 inside_shortname = false
139 shortname_start_index = -1
140 invisible_depth = 0
141
142 while i + 1 < html.size
143 i += 1
144
145 if invisible_depth.zero? && inside_shortname && html[i] == ':'
146 shortcode = html[shortname_start_index + 1..i - 1]
147 emoji = emoji_map[shortcode]
148
149 if emoji
150 replacement = "<img draggable=\"false\" class=\"emojione\" alt=\":#{encode(shortcode)}:\" title=\":#{encode(shortcode)}:\" src=\"#{encode(emoji)}\" />"
151 before_html = shortname_start_index.positive? ? html[0..shortname_start_index - 1] : ''
152 html = before_html + replacement + html[i + 1..-1]
153 i += replacement.size - (shortcode.size + 2) - 1
154 else
155 i -= 1
156 end
157
158 inside_shortname = false
159 elsif tag_open_index && html[i] == '>'
160 tag = html[tag_open_index..i]
161 tag_open_index = nil
162 if invisible_depth.positive?
163 invisible_depth += count_tag_nesting(tag)
164 elsif tag == '<span class="invisible">'
165 invisible_depth = 1
166 end
167 elsif html[i] == '<'
168 tag_open_index = i
169 inside_shortname = false
170 elsif !tag_open_index && html[i] == ':'
171 inside_shortname = true
172 shortname_start_index = i
173 end
174 end
175
176 html
177 end
178
179 def rewrite(text, entities)
180 chars = text.to_s.to_char_a
181
182 # Sort by start index
183 entities = entities.sort_by do |entity|
184 indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices]
185 indices.first
186 end
187
188 result = []
189
190 last_index = entities.reduce(0) do |index, entity|
191 indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices]
192 result << encode(chars[index...indices.first].join)
193 result << yield(entity)
194 indices.last
195 end
196
197 result << encode(chars[last_index..-1].join)
198
199 result.flatten.join
200 end
201
202 UNICODE_ESCAPE_BLACKLIST_RE = /\p{Z}|\p{P}/
203
204 def utf8_friendly_extractor(text, options = {})
205 old_to_new_index = [0]
206
207 escaped = text.chars.map do |c|
208 output = begin
209 if c.ord.to_s(16).length > 2 && UNICODE_ESCAPE_BLACKLIST_RE.match(c).nil?
210 CGI.escape(c)
211 else
212 c
213 end
214 end
215
216 old_to_new_index << old_to_new_index.last + output.length
217
218 output
219 end.join
220
221 # Note: I couldn't obtain list_slug with @user/list-name format
222 # for mention so this requires additional check
223 special = Extractor.extract_urls_with_indices(escaped, options).map do |extract|
224 # exactly one of :url, :hashtag, :screen_name, :cashtag keys is present
225 key = (extract.keys & [:url, :hashtag, :screen_name, :cashtag]).first
226
227 new_indices = [
228 old_to_new_index.find_index(extract[:indices].first),
229 old_to_new_index.find_index(extract[:indices].last),
230 ]
231
232 has_prefix_char = [:hashtag, :screen_name, :cashtag].include?(key)
233 value_indices = [
234 new_indices.first + (has_prefix_char ? 1 : 0), # account for #, @ or $
235 new_indices.last - 1,
236 ]
237
238 next extract.merge(
239 :indices => new_indices,
240 key => text[value_indices.first..value_indices.last]
241 )
242 end
243
244 standard = Extractor.extract_entities_with_indices(text, options)
245
246 Extractor.remove_overlapping_entities(special + standard)
247 end
248
249 def link_to_url(entity, options = {})
250 url = Addressable::URI.parse(entity[:url])
251 html_attrs = { target: '_blank', rel: 'nofollow noopener' }
252
253 html_attrs[:rel] = "me #{html_attrs[:rel]}" if options[:me]
254
255 Twitter::Autolink.send(:link_to_text, entity, link_html(entity[:url]), url, html_attrs)
256 rescue Addressable::URI::InvalidURIError, IDN::Idna::IdnaError
257 encode(entity[:url])
258 end
259
260 def link_to_mention(entity, linkable_accounts)
261 acct = entity[:screen_name]
262
263 return link_to_account(acct) unless linkable_accounts
264
265 account = linkable_accounts.find { |item| TagManager.instance.same_acct?(item.acct, acct) }
266 account ? mention_html(account) : "@#{encode(acct)}"
267 end
268
269 def link_to_account(acct)
270 username, domain = acct.split('@')
271
272 domain = nil if TagManager.instance.local_domain?(domain)
273 account = EntityCache.instance.mention(username, domain)
274
275 account ? mention_html(account) : "@#{encode(acct)}"
276 end
277
278 def link_to_hashtag(entity)
279 hashtag_html(entity[:hashtag])
280 end
281
282 def link_html(url)
283 url = Addressable::URI.parse(url).to_s
284 prefix = url.match(/\Ahttps?:\/\/(www\.)?/).to_s
285 text = url[prefix.length, 30]
286 suffix = url[prefix.length + 30..-1]
287 cutoff = url[prefix.length..-1].length > 30
288
289 "<span class=\"invisible\">#{encode(prefix)}</span><span class=\"#{cutoff ? 'ellipsis' : ''}\">#{encode(text)}</span><span class=\"invisible\">#{encode(suffix)}</span>"
290 end
291
292 def hashtag_html(tag)
293 "<a href=\"#{encode(tag_url(tag.downcase))}\" class=\"mention hashtag\" rel=\"tag\">#<span>#{encode(tag)}</span></a>"
294 end
295
296 def mention_html(account)
297 "<span class=\"h-card\"><a href=\"#{encode(TagManager.instance.url_for(account))}\" class=\"u-url mention\">@<span>#{encode(account.username)}</span></a></span>"
298 end
299 end
This page took 0.276174 seconds and 5 git commands to generate.