]> cat aescling's git repositories - mastodon.git/blob - lib/mastodon/domains_cli.rb
Address vulnerability from GHSA-3fjr-858r-92rw
[mastodon.git] / lib / mastodon / domains_cli.rb
1 # frozen_string_literal: true
2
3 require 'concurrent'
4 require_relative '../../config/boot'
5 require_relative '../../config/environment'
6 require_relative 'cli_helper'
7
8 module Mastodon
9 class DomainsCLI < Thor
10 include CLIHelper
11
12 def self.exit_on_failure?
13 true
14 end
15
16 option :concurrency, type: :numeric, default: 5, aliases: [:c]
17 option :verbose, type: :boolean, aliases: [:v]
18 option :dry_run, type: :boolean
19 option :limited_federation_mode, type: :boolean
20 option :by_uri, type: :boolean
21 desc 'purge [DOMAIN...]', 'Remove accounts from a DOMAIN without a trace'
22 long_desc <<-LONG_DESC
23 Remove all accounts from a given DOMAIN without leaving behind any
24 records. Unlike a suspension, if the DOMAIN still exists in the wild,
25 it means the accounts could return if they are resolved again.
26
27 When the --limited-federation-mode option is given, instead of purging accounts
28 from a single domain, all accounts from domains that have not been explicitly allowed
29 are removed from the database.
30
31 When the --by-uri option is given, DOMAIN is used to match the domain part of actor
32 URIs rather than the domain part of the webfinger handle. For instance, an account
33 that has the handle `foo@bar.com` but whose profile is at the URL
34 `https://mastodon-bar.com/users/foo`, would be purged by either
35 `tootctl domains purge bar.com` or `tootctl domains purge --by-uri mastodon-bar.com`.
36 LONG_DESC
37 def purge(*domains)
38 dry_run = options[:dry_run] ? ' (DRY RUN)' : ''
39
40 scope = begin
41 if options[:limited_federation_mode]
42 Account.remote.where.not(domain: DomainAllow.pluck(:domain))
43 elsif !domains.empty?
44 if options[:by_uri]
45 domains.map { |domain| Account.remote.where(Account.arel_table[:uri].matches("https://#{domain}/%", false, true)) }.reduce(:or)
46 else
47 Account.remote.where(domain: domains)
48 end
49 else
50 say('No domain(s) given', :red)
51 exit(1)
52 end
53 end
54
55 processed, = parallelize_with_progress(scope) do |account|
56 DeleteAccountService.new.call(account, reserve_username: false, skip_side_effects: true) unless options[:dry_run]
57 end
58
59 DomainBlock.where(domain: domains).destroy_all unless options[:dry_run]
60
61 say("Removed #{processed} accounts#{dry_run}", :green)
62
63 custom_emojis = CustomEmoji.where(domain: domains)
64 custom_emojis_count = custom_emojis.count
65 custom_emojis.destroy_all unless options[:dry_run]
66
67 Instance.refresh unless options[:dry_run]
68
69 say("Removed #{custom_emojis_count} custom emojis", :green)
70 end
71
72 option :concurrency, type: :numeric, default: 50, aliases: [:c]
73 option :format, type: :string, default: 'summary', aliases: [:f]
74 option :exclude_suspended, type: :boolean, default: false, aliases: [:x]
75 desc 'crawl [START]', 'Crawl all known peers, optionally beginning at START'
76 long_desc <<-LONG_DESC
77 Crawl the fediverse by using the Mastodon REST API endpoints that expose
78 all known peers, and collect statistics from those peers, as long as those
79 peers support those API endpoints. When no START is given, the command uses
80 this server's own database of known peers to seed the crawl.
81
82 The --concurrency (-c) option controls the number of threads performing HTTP
83 requests at the same time. More threads means the crawl may complete faster.
84
85 The --format (-f) option controls how the data is displayed at the end. By
86 default (`summary`), a summary of the statistics is returned. The other options
87 are `domains`, which returns a newline-delimited list of all discovered peers,
88 and `json`, which dumps all the aggregated data raw.
89
90 The --exclude-suspended (-x) option means that domains that are suspended
91 instance-wide do not appear in the output and are not included in summaries.
92 This also excludes subdomains of any of those domains.
93 LONG_DESC
94 def crawl(start = nil)
95 stats = Concurrent::Hash.new
96 processed = Concurrent::AtomicFixnum.new(0)
97 failed = Concurrent::AtomicFixnum.new(0)
98 start_at = Time.now.to_f
99 seed = start ? [start] : Instance.pluck(:domain)
100 blocked_domains = Regexp.new('\\.?' + DomainBlock.where(severity: 1).pluck(:domain).join('|') + '$')
101 progress = create_progress_bar
102
103 pool = Concurrent::ThreadPoolExecutor.new(min_threads: 0, max_threads: options[:concurrency], idletime: 10, auto_terminate: true, max_queue: 0)
104
105 work_unit = ->(domain) do
106 next if stats.key?(domain)
107 next if options[:exclude_suspended] && domain.match?(blocked_domains)
108
109 stats[domain] = nil
110
111 begin
112 Request.new(:get, "https://#{domain}/api/v1/instance").perform do |res|
113 next unless res.code == 200
114 stats[domain] = Oj.load(res.to_s)
115 end
116
117 Request.new(:get, "https://#{domain}/api/v1/instance/peers").perform do |res|
118 next unless res.code == 200
119
120 Oj.load(res.to_s).reject { |peer| stats.key?(peer) }.each do |peer|
121 pool.post(peer, &work_unit)
122 end
123 end
124
125 Request.new(:get, "https://#{domain}/api/v1/instance/activity").perform do |res|
126 next unless res.code == 200
127 stats[domain]['activity'] = Oj.load(res.to_s)
128 end
129 rescue StandardError
130 failed.increment
131 ensure
132 processed.increment
133 progress.increment unless progress.finished?
134 end
135 end
136
137 seed.each do |domain|
138 pool.post(domain, &work_unit)
139 end
140
141 sleep 20
142 sleep 20 until pool.queue_length.zero?
143
144 pool.shutdown
145 pool.wait_for_termination(20)
146 ensure
147 progress.finish
148 pool.shutdown
149
150 case options[:format]
151 when 'summary'
152 stats_to_summary(stats, processed, failed, start_at)
153 when 'domains'
154 stats_to_domains(stats)
155 when 'json'
156 stats_to_json(stats)
157 end
158 end
159
160 private
161
162 def stats_to_summary(stats, processed, failed, start_at)
163 stats.compact!
164
165 total_domains = stats.size
166 total_users = stats.reduce(0) { |sum, (_key, val)| val.is_a?(Hash) && val['stats'].is_a?(Hash) ? sum + val['stats']['user_count'].to_i : sum }
167 total_active = stats.reduce(0) { |sum, (_key, val)| val.is_a?(Hash) && val['activity'].is_a?(Array) && val['activity'].size > 2 && val['activity'][1].is_a?(Hash) ? sum + val['activity'][1]['logins'].to_i : sum }
168 total_joined = stats.reduce(0) { |sum, (_key, val)| val.is_a?(Hash) && val['activity'].is_a?(Array) && val['activity'].size > 2 && val['activity'][1].is_a?(Hash) ? sum + val['activity'][1]['registrations'].to_i : sum }
169
170 say("Visited #{processed.value} domains, #{failed.value} failed (#{(Time.now.to_f - start_at).round}s elapsed)", :green)
171 say("Total servers: #{total_domains}", :green)
172 say("Total registered: #{total_users}", :green)
173 say("Total active last week: #{total_active}", :green)
174 say("Total joined last week: #{total_joined}", :green)
175 end
176
177 def stats_to_domains(stats)
178 say(stats.keys.join("\n"))
179 end
180
181 def stats_to_json(stats)
182 stats.compact!
183 say(Oj.dump(stats))
184 end
185 end
186 end
This page took 0.215331 seconds and 5 git commands to generate.