]> cat aescling's git repositories - mastodon.git/blob - lib/mastodon/domains_cli.rb
Optimize some regex matching (#15528)
[mastodon.git] / lib / mastodon / domains_cli.rb
1 # frozen_string_literal: true
2
3 require 'concurrent'
4 require_relative '../../config/boot'
5 require_relative '../../config/environment'
6 require_relative 'cli_helper'
7
8 module Mastodon
9 class DomainsCLI < Thor
10 include CLIHelper
11
12 def self.exit_on_failure?
13 true
14 end
15
16 option :concurrency, type: :numeric, default: 5, aliases: [:c]
17 option :verbose, type: :boolean, aliases: [:v]
18 option :dry_run, type: :boolean
19 option :limited_federation_mode, type: :boolean
20 desc 'purge [DOMAIN...]', 'Remove accounts from a DOMAIN without a trace'
21 long_desc <<-LONG_DESC
22 Remove all accounts from a given DOMAIN without leaving behind any
23 records. Unlike a suspension, if the DOMAIN still exists in the wild,
24 it means the accounts could return if they are resolved again.
25
26 When the --limited-federation-mode option is given, instead of purging accounts
27 from a single domain, all accounts from domains that have not been explicitly allowed
28 are removed from the database.
29 LONG_DESC
30 def purge(*domains)
31 dry_run = options[:dry_run] ? ' (DRY RUN)' : ''
32
33 scope = begin
34 if options[:limited_federation_mode]
35 Account.remote.where.not(domain: DomainAllow.pluck(:domain))
36 elsif !domains.empty?
37 Account.remote.where(domain: domains)
38 else
39 say('No domain(s) given', :red)
40 exit(1)
41 end
42 end
43
44 processed, = parallelize_with_progress(scope) do |account|
45 DeleteAccountService.new.call(account, reserve_username: false, skip_side_effects: true) unless options[:dry_run]
46 end
47
48 DomainBlock.where(domain: domains).destroy_all unless options[:dry_run]
49
50 say("Removed #{processed} accounts#{dry_run}", :green)
51
52 custom_emojis = CustomEmoji.where(domain: domains)
53 custom_emojis_count = custom_emojis.count
54 custom_emojis.destroy_all unless options[:dry_run]
55
56 Instance.refresh unless options[:dry_run]
57
58 say("Removed #{custom_emojis_count} custom emojis", :green)
59 end
60
61 option :concurrency, type: :numeric, default: 50, aliases: [:c]
62 option :format, type: :string, default: 'summary', aliases: [:f]
63 option :exclude_suspended, type: :boolean, default: false, aliases: [:x]
64 desc 'crawl [START]', 'Crawl all known peers, optionally beginning at START'
65 long_desc <<-LONG_DESC
66 Crawl the fediverse by using the Mastodon REST API endpoints that expose
67 all known peers, and collect statistics from those peers, as long as those
68 peers support those API endpoints. When no START is given, the command uses
69 this server's own database of known peers to seed the crawl.
70
71 The --concurrency (-c) option controls the number of threads performing HTTP
72 requests at the same time. More threads means the crawl may complete faster.
73
74 The --format (-f) option controls how the data is displayed at the end. By
75 default (`summary`), a summary of the statistics is returned. The other options
76 are `domains`, which returns a newline-delimited list of all discovered peers,
77 and `json`, which dumps all the aggregated data raw.
78
79 The --exclude-suspended (-x) option means that domains that are suspended
80 instance-wide do not appear in the output and are not included in summaries.
81 This also excludes subdomains of any of those domains.
82 LONG_DESC
83 def crawl(start = nil)
84 stats = Concurrent::Hash.new
85 processed = Concurrent::AtomicFixnum.new(0)
86 failed = Concurrent::AtomicFixnum.new(0)
87 start_at = Time.now.to_f
88 seed = start ? [start] : Instance.pluck(:domain)
89 blocked_domains = Regexp.new('\\.?' + DomainBlock.where(severity: 1).pluck(:domain).join('|') + '$')
90 progress = create_progress_bar
91
92 pool = Concurrent::ThreadPoolExecutor.new(min_threads: 0, max_threads: options[:concurrency], idletime: 10, auto_terminate: true, max_queue: 0)
93
94 work_unit = ->(domain) do
95 next if stats.key?(domain)
96 next if options[:exclude_suspended] && domain.match?(blocked_domains)
97
98 stats[domain] = nil
99
100 begin
101 Request.new(:get, "https://#{domain}/api/v1/instance").perform do |res|
102 next unless res.code == 200
103 stats[domain] = Oj.load(res.to_s)
104 end
105
106 Request.new(:get, "https://#{domain}/api/v1/instance/peers").perform do |res|
107 next unless res.code == 200
108
109 Oj.load(res.to_s).reject { |peer| stats.key?(peer) }.each do |peer|
110 pool.post(peer, &work_unit)
111 end
112 end
113
114 Request.new(:get, "https://#{domain}/api/v1/instance/activity").perform do |res|
115 next unless res.code == 200
116 stats[domain]['activity'] = Oj.load(res.to_s)
117 end
118 rescue StandardError
119 failed.increment
120 ensure
121 processed.increment
122 progress.increment unless progress.finished?
123 end
124 end
125
126 seed.each do |domain|
127 pool.post(domain, &work_unit)
128 end
129
130 sleep 20
131 sleep 20 until pool.queue_length.zero?
132
133 pool.shutdown
134 pool.wait_for_termination(20)
135 ensure
136 progress.finish
137 pool.shutdown
138
139 case options[:format]
140 when 'summary'
141 stats_to_summary(stats, processed, failed, start_at)
142 when 'domains'
143 stats_to_domains(stats)
144 when 'json'
145 stats_to_json(stats)
146 end
147 end
148
149 private
150
151 def stats_to_summary(stats, processed, failed, start_at)
152 stats.compact!
153
154 total_domains = stats.size
155 total_users = stats.reduce(0) { |sum, (_key, val)| val.is_a?(Hash) && val['stats'].is_a?(Hash) ? sum + val['stats']['user_count'].to_i : sum }
156 total_active = stats.reduce(0) { |sum, (_key, val)| val.is_a?(Hash) && val['activity'].is_a?(Array) && val['activity'].size > 2 && val['activity'][1].is_a?(Hash) ? sum + val['activity'][1]['logins'].to_i : sum }
157 total_joined = stats.reduce(0) { |sum, (_key, val)| val.is_a?(Hash) && val['activity'].is_a?(Array) && val['activity'].size > 2 && val['activity'][1].is_a?(Hash) ? sum + val['activity'][1]['registrations'].to_i : sum }
158
159 say("Visited #{processed.value} domains, #{failed.value} failed (#{(Time.now.to_f - start_at).round}s elapsed)", :green)
160 say("Total servers: #{total_domains}", :green)
161 say("Total registered: #{total_users}", :green)
162 say("Total active last week: #{total_active}", :green)
163 say("Total joined last week: #{total_joined}", :green)
164 end
165
166 def stats_to_domains(stats)
167 say(stats.keys.join("\n"))
168 end
169
170 def stats_to_json(stats)
171 stats.compact!
172 say(Oj.dump(stats))
173 end
174 end
175 end
This page took 0.130541 seconds and 5 git commands to generate.