lib/mastodon/migration_helpers.rb

   1 # frozen_string_literal: true
   2
   3 # This file is copied almost entirely from GitLab, which has done a large
   4 # amount of work to ensure that migrations can happen with minimal downtime.
   5 # Many thanks to those engineers.
   6
   7 # Changes have been made to remove dependencies on other GitLab files and to
   8 # shorten temporary column names.
   9
  10 # Documentation on using these functions (and why one might do so):
  11 # https://gitlab.com/gitlab-org/gitlab-ce/blob/master/doc/development/what_requires_downtime.md
  12
  13 # The file itself:
  14 # https://gitlab.com/gitlab-org/gitlab-ce/blob/master/lib/gitlab/database/migration_helpers.rb
  15
  16 # It is licensed as follows:
  17
  18 # Copyright (c) 2011-2017 GitLab B.V.
  19
  20 # Permission is hereby granted, free of charge, to any person obtaining a copy
  21 # of this software and associated documentation files (the "Software"), to deal
  22 # in the Software without restriction, including without limitation the rights
  23 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  24 # copies of the Software, and to permit persons to whom the Software is
  25 # furnished to do so, subject to the following conditions:
  26
  27 # The above copyright notice and this permission notice shall be included in
  28 # all copies or substantial portions of the Software.
  29
  30 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  31 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  32 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  33 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  34 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  35 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  36 # THE SOFTWARE.
  37
  38 # This is bad form, but there are enough differences that it's impractical to do
  39 # otherwise:
  40 # rubocop:disable all
  41
  42 module Mastodon
  43   module MigrationHelpers
  44     class CorruptionError < StandardError
  45       attr_reader :index_name
  46
  47       def initialize(index_name)
  48         @index_name = index_name
  49
  50         super "The index `#{index_name}` seems to be corrupted, it contains duplicate rows. " \
  51           'For information on how to fix this, see our documentation: ' \
  52           'https://docs.joinmastodon.org/admin/troubleshooting/index-corruption/'
  53       end
  54
  55       def cause
  56         nil
  57       end
  58
  59       def backtrace
  60         []
  61       end
  62     end
  63
  64     # Model that can be used for querying permissions of a SQL user.
  65     class Grant < ActiveRecord::Base
  66       self.table_name = 'information_schema.role_table_grants'
  67
  68       def self.scope_to_current_user
  69         where('grantee = user')
  70       end
  71
  72       # Returns true if the current user can create and execute triggers on the
  73       # given table.
  74       def self.create_and_execute_trigger?(table)
  75         priv = where(privilege_type: 'TRIGGER', table_name: table)
  76
  77         priv.scope_to_current_user.any?
  78       end
  79     end
  80
  81     BACKGROUND_MIGRATION_BATCH_SIZE = 1000 # Number of rows to process per job
  82     BACKGROUND_MIGRATION_JOB_BUFFER_SIZE = 1000 # Number of jobs to bulk queue at a time
  83
  84     # Gets an estimated number of rows for a table
  85     def estimate_rows_in_table(table_name)
  86       exec_query('SELECT reltuples FROM pg_class WHERE relname = ' +
  87         "'#{table_name}'").to_a.first['reltuples']
  88     end
  89
  90     # Adds `created_at` and `updated_at` columns with timezone information.
  91     #
  92     # This method is an improved version of Rails' built-in method `add_timestamps`.
  93     #
  94     # Available options are:
  95     # default - The default value for the column.
  96     # null - When set to `true` the column will allow NULL values.
  97     #        The default is to not allow NULL values.
  98     def add_timestamps_with_timezone(table_name, **options)
  99       options[:null] = false if options[:null].nil?
 100
 101       [:created_at, :updated_at].each do |column_name|
 102         if options[:default] && transaction_open?
 103           raise '`add_timestamps_with_timezone` with default value cannot be run inside a transaction. ' \
 104             'You can disable transactions by calling `disable_ddl_transaction!` ' \
 105             'in the body of your migration class'
 106         end
 107
 108         # If default value is presented, use `add_column_with_default` method instead.
 109         if options[:default]
 110           add_column_with_default(
 111             table_name,
 112             column_name,
 113             :datetime_with_timezone,
 114             default: options[:default],
 115             allow_null: options[:null]
 116           )
 117         else
 118           add_column(table_name, column_name, :datetime_with_timezone, **options)
 119         end
 120       end
 121     end
 122
 123     # Creates a new index, concurrently when supported
 124     #
 125     # On PostgreSQL this method creates an index concurrently, on MySQL this
 126     # creates a regular index.
 127     #
 128     # Example:
 129     #
 130     #     add_concurrent_index :users, :some_column
 131     #
 132     # See Rails' `add_index` for more info on the available arguments.
 133     def add_concurrent_index(table_name, column_name, **options)
 134       if transaction_open?
 135         raise 'add_concurrent_index can not be run inside a transaction, ' \
 136           'you can disable transactions by calling disable_ddl_transaction! ' \
 137           'in the body of your migration class'
 138       end
 139
 140       options = options.merge({ algorithm: :concurrently })
 141       disable_statement_timeout
 142
 143       add_index(table_name, column_name, **options)
 144     end
 145
 146     # Removes an existed index, concurrently when supported
 147     #
 148     # On PostgreSQL this method removes an index concurrently.
 149     #
 150     # Example:
 151     #
 152     #     remove_concurrent_index :users, :some_column
 153     #
 154     # See Rails' `remove_index` for more info on the available arguments.
 155     def remove_concurrent_index(table_name, column_name, **options)
 156       if transaction_open?
 157         raise 'remove_concurrent_index can not be run inside a transaction, ' \
 158           'you can disable transactions by calling disable_ddl_transaction! ' \
 159           'in the body of your migration class'
 160       end
 161
 162       if supports_drop_index_concurrently?
 163         options = options.merge({ algorithm: :concurrently })
 164         disable_statement_timeout
 165       end
 166
 167       remove_index(table_name, **options.merge({ column: column_name }))
 168     end
 169
 170     # Removes an existing index, concurrently when supported
 171     #
 172     # On PostgreSQL this method removes an index concurrently.
 173     #
 174     # Example:
 175     #
 176     #     remove_concurrent_index :users, "index_X_by_Y"
 177     #
 178     # See Rails' `remove_index` for more info on the available arguments.
 179     def remove_concurrent_index_by_name(table_name, index_name, **options)
 180       if transaction_open?
 181         raise 'remove_concurrent_index_by_name can not be run inside a transaction, ' \
 182           'you can disable transactions by calling disable_ddl_transaction! ' \
 183           'in the body of your migration class'
 184       end
 185
 186       if supports_drop_index_concurrently?
 187         options = options.merge({ algorithm: :concurrently })
 188         disable_statement_timeout
 189       end
 190
 191       remove_index(table_name, **options.merge({ name: index_name }))
 192     end
 193
 194     # Only available on Postgresql >= 9.2
 195     def supports_drop_index_concurrently?
 196       version = select_one("SELECT current_setting('server_version_num') AS v")['v'].to_i
 197
 198       version >= 90200
 199     end
 200
 201     # Adds a foreign key with only minimal locking on the tables involved.
 202     #
 203     # This method only requires minimal locking when using PostgreSQL. When
 204     # using MySQL this method will use Rails' default `add_foreign_key`.
 205     #
 206     # source - The source table containing the foreign key.
 207     # target - The target table the key points to.
 208     # column - The name of the column to create the foreign key on.
 209     # on_delete - The action to perform when associated data is removed,
 210     #             defaults to "CASCADE".
 211     def add_concurrent_foreign_key(source, target, column:, on_delete: :cascade, target_col: 'id')
 212       # Transactions would result in ALTER TABLE locks being held for the
 213       # duration of the transaction, defeating the purpose of this method.
 214       if transaction_open?
 215         raise 'add_concurrent_foreign_key can not be run inside a transaction'
 216       end
 217
 218       # While MySQL does allow disabling of foreign keys it has no equivalent
 219       # of PostgreSQL's "VALIDATE CONSTRAINT". As a result we'll just fall
 220       # back to the normal foreign key procedure.
 221       on_delete = 'SET NULL' if on_delete == :nullify
 222
 223       disable_statement_timeout
 224
 225       key_name = concurrent_foreign_key_name(source, column, target_col)
 226
 227       # Using NOT VALID allows us to create a key without immediately
 228       # validating it. This means we keep the ALTER TABLE lock only for a
 229       # short period of time. The key _is_ enforced for any newly created
 230       # data.
 231       execute <<-EOF.strip_heredoc
 232       ALTER TABLE #{source}
 233       ADD CONSTRAINT #{key_name}
 234       FOREIGN KEY (#{column})
 235       REFERENCES #{target} (#{target_col})
 236       #{on_delete ? "ON DELETE #{on_delete.upcase}" : ''}
 237       NOT VALID;
 238       EOF
 239
 240       # Validate the existing constraint. This can potentially take a very
 241       # long time to complete, but fortunately does not lock the source table
 242       # while running.
 243       execute("ALTER TABLE #{source} VALIDATE CONSTRAINT #{key_name};")
 244     end
 245
 246     # Returns the name for a concurrent foreign key.
 247     #
 248     # PostgreSQL constraint names have a limit of 63 bytes. The logic used
 249     # here is based on Rails' foreign_key_name() method, which unfortunately
 250     # is private so we can't rely on it directly.
 251     def concurrent_foreign_key_name(table, column, target_col)
 252       "fk_#{Digest::SHA256.hexdigest("#{table}_#{column}_#{target_col}_fk").first(10)}"
 253     end
 254
 255     # Long-running migrations may take more than the timeout allowed by
 256     # the database. Disable the session's statement timeout to ensure
 257     # migrations don't get killed prematurely. (PostgreSQL only)
 258     def disable_statement_timeout
 259       execute('SET statement_timeout TO 0')
 260     end
 261
 262     # Updates the value of a column in batches.
 263     #
 264     # This method updates the table in batches of 5% of the total row count.
 265     # This method will continue updating rows until no rows remain.
 266     #
 267     # When given a block this method will yield two values to the block:
 268     #
 269     # 1. An instance of `Arel::Table` for the table that is being updated.
 270     # 2. The query to run as an Arel object.
 271     #
 272     # By supplying a block one can add extra conditions to the queries being
 273     # executed. Note that the same block is used for _all_ queries.
 274     #
 275     # Example:
 276     #
 277     #     update_column_in_batches(:projects, :foo, 10) do |table, query|
 278     #       query.where(table[:some_column].eq('hello'))
 279     #     end
 280     #
 281     # This would result in this method updating only rows where
 282     # `projects.some_column` equals "hello".
 283     #
 284     # table - The name of the table.
 285     # column - The name of the column to update.
 286     # value - The value for the column.
 287     #
 288     # Rubocop's Metrics/AbcSize metric is disabled for this method as Rubocop
 289     # determines this method to be too complex while there's no way to make it
 290     # less "complex" without introducing extra methods (which actually will
 291     # make things _more_ complex).
 292     #
 293     # rubocop: disable Metrics/AbcSize
 294     def update_column_in_batches(table_name, column, value)
 295       if transaction_open?
 296         raise 'update_column_in_batches can not be run inside a transaction, ' \
 297           'you can disable transactions by calling disable_ddl_transaction! ' \
 298           'in the body of your migration class'
 299       end
 300
 301       table = Arel::Table.new(table_name)
 302
 303       total = estimate_rows_in_table(table_name).to_i
 304       if total < 1
 305         count_arel = table.project(Arel.star.count.as('count'))
 306         count_arel = yield table, count_arel if block_given?
 307
 308         total = exec_query(count_arel.to_sql).to_ary.first['count'].to_i
 309
 310         return if total == 0
 311       end
 312
 313       # Update in batches of 5% until we run out of any rows to update.
 314       batch_size = ((total / 100.0) * 5.0).ceil
 315       max_size = 1000
 316
 317       # The upper limit is 1000 to ensure we don't lock too many rows. For
 318       # example, for "merge_requests" even 1% of the table is around 35 000
 319       # rows for GitLab.com.
 320       batch_size = max_size if batch_size > max_size
 321
 322       start_arel = table.project(table[:id]).order(table[:id].asc).take(1)
 323       start_arel = yield table, start_arel if block_given?
 324       first_row = exec_query(start_arel.to_sql).to_ary.first
 325       # In case there are no rows but we didn't catch it in the estimated size:
 326       return unless first_row
 327       start_id = first_row['id'].to_i
 328
 329       say "Migrating #{table_name}.#{column} (~#{total.to_i} rows)"
 330
 331       started_time = Time.zone.now
 332       last_time = Time.zone.now
 333       migrated = 0
 334       loop do
 335         stop_row = nil
 336
 337         suppress_messages do
 338           stop_arel = table.project(table[:id])
 339             .where(table[:id].gteq(start_id))
 340             .order(table[:id].asc)
 341             .take(1)
 342             .skip(batch_size)
 343
 344           stop_arel = yield table, stop_arel if block_given?
 345           stop_row = exec_query(stop_arel.to_sql).to_ary.first
 346
 347           update_arel = Arel::UpdateManager.new
 348             .table(table)
 349             .set([[table[column], value]])
 350             .where(table[:id].gteq(start_id))
 351
 352           if stop_row
 353             stop_id = stop_row['id'].to_i
 354             start_id = stop_id
 355             update_arel = update_arel.where(table[:id].lt(stop_id))
 356           end
 357
 358           update_arel = yield table, update_arel if block_given?
 359
 360           execute(update_arel.to_sql)
 361         end
 362
 363         migrated += batch_size
 364         if Time.zone.now - last_time > 1
 365           status = "Migrated #{migrated} rows"
 366
 367           percentage = 100.0 * migrated / total
 368           status += " (~#{sprintf('%.2f', percentage)}%, "
 369
 370           remaining_time = (100.0 - percentage) * (Time.zone.now - started_time) / percentage
 371
 372           status += "#{(remaining_time / 60).to_i}:"
 373           status += sprintf('%02d', remaining_time.to_i % 60)
 374           status += ' remaining, '
 375
 376           # Tell users not to interrupt if we're almost done.
 377           if remaining_time > 10
 378             status += 'safe to interrupt'
 379           else
 380             status += 'DO NOT interrupt'
 381           end
 382
 383           status += ')'
 384
 385           say status, true
 386           last_time = Time.zone.now
 387         end
 388
 389         # There are no more rows left to update.
 390         break unless stop_row
 391       end
 392     end
 393
 394     # Adds a column with a default value without locking an entire table.
 395     #
 396     # This method runs the following steps:
 397     #
 398     # 1. Add the column with a default value of NULL.
 399     # 2. Change the default value of the column to the specified value.
 400     # 3. Update all existing rows in batches.
 401     # 4. Set a `NOT NULL` constraint on the column if desired (the default).
 402     #
 403     # These steps ensure a column can be added to a large and commonly used
 404     # table without locking the entire table for the duration of the table
 405     # modification.
 406     #
 407     # table - The name of the table to update.
 408     # column - The name of the column to add.
 409     # type - The column type (e.g. `:integer`).
 410     # default - The default value for the column.
 411     # limit - Sets a column limit. For example, for :integer, the default is
 412     #         4-bytes. Set `limit: 8` to allow 8-byte integers.
 413     # allow_null - When set to `true` the column will allow NULL values, the
 414     #              default is to not allow NULL values.
 415     #
 416     # This method can also take a block which is passed directly to the
 417     # `update_column_in_batches` method.
 418     def add_column_with_default(table, column, type, default:, limit: nil, allow_null: false, &block)
 419       if transaction_open?
 420         raise 'add_column_with_default can not be run inside a transaction, ' \
 421           'you can disable transactions by calling disable_ddl_transaction! ' \
 422           'in the body of your migration class'
 423       end
 424
 425       disable_statement_timeout
 426
 427       transaction do
 428         if limit
 429           add_column(table, column, type, default: nil, limit: limit)
 430         else
 431           add_column(table, column, type, default: nil)
 432         end
 433
 434         # Changing the default before the update ensures any newly inserted
 435         # rows already use the proper default value.
 436         change_column_default(table, column, default)
 437       end
 438
 439       begin
 440         update_column_in_batches(table, column, default, &block)
 441
 442         change_column_null(table, column, false) unless allow_null
 443       # We want to rescue _all_ exceptions here, even those that don't inherit
 444       # from StandardError.
 445       rescue Exception => error # rubocop: disable all
 446         remove_column(table, column)
 447
 448         raise error
 449       end
 450     end
 451
 452     # Renames a column without requiring downtime.
 453     #
 454     # Concurrent renames work by using database triggers to ensure both the
 455     # old and new column are in sync. However, this method will _not_ remove
 456     # the triggers or the old column automatically; this needs to be done
 457     # manually in a post-deployment migration. This can be done using the
 458     # method `cleanup_concurrent_column_rename`.
 459     #
 460     # table - The name of the database table containing the column.
 461     # old - The old column name.
 462     # new - The new column name.
 463     # type - The type of the new column. If no type is given the old column's
 464     #        type is used.
 465     def rename_column_concurrently(table, old, new, type: nil)
 466       if transaction_open?
 467         raise 'rename_column_concurrently can not be run inside a transaction'
 468       end
 469
 470       check_trigger_permissions!(table)
 471       trigger_name = rename_trigger_name(table, old, new)
 472
 473       # If we were in the middle of update_column_in_batches, we should remove
 474       # the old column and start over, as we have no idea where we were.
 475       if column_for(table, new)
 476         remove_rename_triggers_for_postgresql(table, trigger_name)
 477
 478         remove_column(table, new)
 479       end
 480
 481       old_col = column_for(table, old)
 482       new_type = type || old_col.type
 483
 484       col_opts = {
 485         precision: old_col.precision,
 486         scale: old_col.scale,
 487       }
 488
 489       # We may be trying to reset the limit on an integer column type, so let
 490       # Rails handle that.
 491       unless [:bigint, :integer].include?(new_type)
 492         col_opts[:limit] = old_col.limit
 493       end
 494
 495       add_column(table, new, new_type, **col_opts)
 496
 497       # We set the default value _after_ adding the column so we don't end up
 498       # updating any existing data with the default value. This isn't
 499       # necessary since we copy over old values further down.
 500       change_column_default(table, new, old_col.default) if old_col.default
 501
 502       quoted_table = quote_table_name(table)
 503       quoted_old = quote_column_name(old)
 504       quoted_new = quote_column_name(new)
 505
 506       install_rename_triggers_for_postgresql(trigger_name, quoted_table,
 507                                              quoted_old, quoted_new)
 508
 509       update_column_in_batches(table, new, Arel::Table.new(table)[old])
 510
 511       change_column_null(table, new, false) unless old_col.null
 512
 513       copy_indexes(table, old, new)
 514       copy_foreign_keys(table, old, new)
 515     end
 516
 517     # Changes the type of a column concurrently.
 518     #
 519     # table - The table containing the column.
 520     # column - The name of the column to change.
 521     # new_type - The new column type.
 522     def change_column_type_concurrently(table, column, new_type)
 523       temp_column = rename_column_name(column)
 524
 525       rename_column_concurrently(table, column, temp_column, type: new_type)
 526
 527       # Primary keys don't necessarily have an associated index.
 528       if ActiveRecord::Base.get_primary_key(table) == column.to_s
 529         old_pk_index_name = "index_#{table}_on_#{column}"
 530         new_pk_index_name = "index_#{table}_on_#{column}_cm"
 531
 532         unless indexes_for(table, column).find{|i| i.name == old_pk_index_name}
 533           add_concurrent_index(table, [temp_column],
 534             unique: true,
 535             name: new_pk_index_name
 536           )
 537         end
 538       end
 539     end
 540
 541     # Performs cleanup of a concurrent type change.
 542     #
 543     # table - The table containing the column.
 544     # column - The name of the column to change.
 545     # new_type - The new column type.
 546     def cleanup_concurrent_column_type_change(table, column)
 547       temp_column = rename_column_name(column)
 548
 549       # Wait for the indices to be built
 550       indexes_for(table, column).each do |index|
 551         expected_name = index.name + '_cm'
 552
 553         puts "Waiting for index #{expected_name}"
 554         sleep 1 until indexes_for(table, temp_column).find {|i| i.name == expected_name }
 555       end
 556
 557       was_primary = (ActiveRecord::Base.get_primary_key(table) == column.to_s)
 558       old_default_fn = column_for(table, column).default_function
 559
 560       old_fks = []
 561       if was_primary
 562         # Get any foreign keys pointing at this column we need to recreate, and
 563         # remove the old ones.
 564         # Based on code from:
 565         # http://errorbank.blogspot.com/2011/03/list-all-foreign-keys-references-for.html
 566         old_fks_res = execute <<-EOF.strip_heredoc
 567           select m.relname as src_table,
 568             (select a.attname
 569               from pg_attribute a
 570               where a.attrelid = m.oid
 571                 and a.attnum = o.conkey[1]
 572                 and a.attisdropped = false) as src_col,
 573             o.conname as name,
 574             o.confdeltype as on_delete
 575           from pg_constraint o
 576           left join pg_class f on f.oid = o.confrelid
 577           left join pg_class c on c.oid = o.conrelid
 578           left join pg_class m on m.oid = o.conrelid
 579           where o.contype = 'f'
 580             and o.conrelid in (
 581               select oid from pg_class c where c.relkind = 'r')
 582             and f.relname = '#{table}';
 583           EOF
 584         old_fks = old_fks_res.to_a
 585         old_fks.each do |old_fk|
 586           add_concurrent_foreign_key(
 587             old_fk['src_table'],
 588             table,
 589             column: old_fk['src_col'],
 590             target_col: temp_column,
 591             on_delete: extract_foreign_key_action(old_fk['on_delete'])
 592           )
 593
 594           remove_foreign_key(old_fk['src_table'], name: old_fk['name'])
 595         end
 596       end
 597
 598       # If there was a sequence owned by the old column, make it owned by the
 599       # new column, as it will otherwise be deleted when we get rid of the
 600       # old column.
 601       if (seq_match = /^nextval\('([^']*)'(::text|::regclass)?\)/.match(old_default_fn))
 602         seq_name = seq_match[1]
 603         execute("ALTER SEQUENCE #{seq_name} OWNED BY #{table}.#{temp_column}")
 604       end
 605
 606       transaction do
 607         # This has to be performed in a transaction as otherwise we might have
 608         # inconsistent data.
 609
 610         cleanup_concurrent_column_rename(table, column, temp_column)
 611         rename_column(table, temp_column, column)
 612
 613         # If there was an old default function, we didn't copy it. Do that now
 614         # in the transaction, so we don't miss anything.
 615         change_column_default(table, column, -> { old_default_fn }) if old_default_fn
 616       end
 617
 618       # Rename any indices back to what they should be.
 619       indexes_for(table, column).each do |index|
 620         next unless index.name.end_with?('_cm')
 621
 622         real_index_name = index.name.sub(/_cm$/, '')
 623         rename_index(table, index.name, real_index_name)
 624       end
 625
 626       # Rename any foreign keys back to names based on the real column.
 627       foreign_keys_for(table, column).each do |fk|
 628         old_fk_name = concurrent_foreign_key_name(fk.from_table, temp_column, 'id')
 629         new_fk_name = concurrent_foreign_key_name(fk.from_table, column, 'id')
 630         execute("ALTER TABLE #{fk.from_table} RENAME CONSTRAINT " +
 631           "#{old_fk_name} TO #{new_fk_name}")
 632       end
 633
 634       # Rename any foreign keys from other tables to names based on the real
 635       # column.
 636       old_fks.each do |old_fk|
 637         old_fk_name = concurrent_foreign_key_name(old_fk['src_table'],
 638           old_fk['src_col'], temp_column)
 639         new_fk_name = concurrent_foreign_key_name(old_fk['src_table'],
 640           old_fk['src_col'], column)
 641         execute("ALTER TABLE #{old_fk['src_table']} RENAME CONSTRAINT " +
 642           "#{old_fk_name} TO #{new_fk_name}")
 643       end
 644
 645       # If the old column was a primary key, mark the new one as a primary key.
 646       if was_primary
 647         execute("ALTER TABLE #{table} ADD PRIMARY KEY USING INDEX " +
 648           "index_#{table}_on_#{column}")
 649       end
 650     end
 651
 652     # Cleans up a concurrent column name.
 653     #
 654     # This method takes care of removing previously installed triggers as well
 655     # as removing the old column.
 656     #
 657     # table - The name of the database table.
 658     # old - The name of the old column.
 659     # new - The name of the new column.
 660     def cleanup_concurrent_column_rename(table, old, new)
 661       trigger_name = rename_trigger_name(table, old, new)
 662
 663       check_trigger_permissions!(table)
 664
 665       remove_rename_triggers_for_postgresql(table, trigger_name)
 666
 667       remove_column(table, old)
 668     end
 669
 670     # Performs a concurrent column rename when using PostgreSQL.
 671     def install_rename_triggers_for_postgresql(trigger, table, old, new)
 672       execute <<-EOF.strip_heredoc
 673       CREATE OR REPLACE FUNCTION #{trigger}()
 674       RETURNS trigger AS
 675       $BODY$
 676       BEGIN
 677         NEW.#{new} := NEW.#{old};
 678         RETURN NEW;
 679       END;
 680       $BODY$
 681       LANGUAGE 'plpgsql'
 682       VOLATILE
 683       EOF
 684
 685       execute <<-EOF.strip_heredoc
 686       CREATE TRIGGER #{trigger}
 687       BEFORE INSERT OR UPDATE
 688       ON #{table}
 689       FOR EACH ROW
 690       EXECUTE PROCEDURE #{trigger}()
 691       EOF
 692     end
 693
 694     # Installs the triggers necessary to perform a concurrent column rename on
 695     # MySQL.
 696     def install_rename_triggers_for_mysql(trigger, table, old, new)
 697       execute <<-EOF.strip_heredoc
 698       CREATE TRIGGER #{trigger}_insert
 699       BEFORE INSERT
 700       ON #{table}
 701       FOR EACH ROW
 702       SET NEW.#{new} = NEW.#{old}
 703       EOF
 704
 705       execute <<-EOF.strip_heredoc
 706       CREATE TRIGGER #{trigger}_update
 707       BEFORE UPDATE
 708       ON #{table}
 709       FOR EACH ROW
 710       SET NEW.#{new} = NEW.#{old}
 711       EOF
 712     end
 713
 714     # Removes the triggers used for renaming a PostgreSQL column concurrently.
 715     def remove_rename_triggers_for_postgresql(table, trigger)
 716       execute("DROP TRIGGER IF EXISTS #{trigger} ON #{table}")
 717       execute("DROP FUNCTION IF EXISTS #{trigger}()")
 718     end
 719
 720     # Removes the triggers used for renaming a MySQL column concurrently.
 721     def remove_rename_triggers_for_mysql(trigger)
 722       execute("DROP TRIGGER IF EXISTS #{trigger}_insert")
 723       execute("DROP TRIGGER IF EXISTS #{trigger}_update")
 724     end
 725
 726     # Returns the (base) name to use for triggers when renaming columns.
 727     def rename_trigger_name(table, old, new)
 728       'trigger_' + Digest::SHA256.hexdigest("#{table}_#{old}_#{new}").first(12)
 729     end
 730
 731     # Returns the name to use for temporary rename columns.
 732     def rename_column_name(base)
 733       base.to_s + '_cm'
 734     end
 735
 736     # Returns an Array containing the indexes for the given column
 737     def indexes_for(table, column)
 738       column = column.to_s
 739
 740       indexes(table).select { |index| index.columns.include?(column) }
 741     end
 742
 743     # Returns an Array containing the foreign keys for the given column.
 744     def foreign_keys_for(table, column)
 745       column = column.to_s
 746
 747       foreign_keys(table).select { |fk| fk.column == column }
 748     end
 749
 750     # Copies all indexes for the old column to a new column.
 751     #
 752     # table - The table containing the columns and indexes.
 753     # old - The old column.
 754     # new - The new column.
 755     def copy_indexes(table, old, new)
 756       old = old.to_s
 757       new = new.to_s
 758
 759       indexes_for(table, old).each do |index|
 760         new_columns = index.columns.map do |column|
 761           column == old ? new : column
 762         end
 763
 764         # This is necessary as we can't properly rename indexes such as
 765         # "ci_taggings_idx".
 766         name = index.name + '_cm'
 767
 768         # If the order contained the old column, map it to the new one.
 769         order = index.orders
 770         if order.key?(old)
 771           order[new] = order.delete(old)
 772         end
 773
 774         options = {
 775           unique: index.unique,
 776           name: name,
 777           length: index.lengths,
 778           order: order
 779         }
 780
 781         # These options are not supported by MySQL, so we only add them if
 782         # they were previously set.
 783         options[:using] = index.using if index.using
 784         options[:where] = index.where if index.where
 785
 786         add_concurrent_index(table, new_columns, **options)
 787       end
 788     end
 789
 790     # Copies all foreign keys for the old column to the new column.
 791     #
 792     # table - The table containing the columns and indexes.
 793     # old - The old column.
 794     # new - The new column.
 795     def copy_foreign_keys(table, old, new)
 796       foreign_keys_for(table, old).each do |fk|
 797         add_concurrent_foreign_key(fk.from_table,
 798                                    fk.to_table,
 799                                    column: new,
 800                                    on_delete: fk.on_delete)
 801       end
 802     end
 803
 804     # Returns the column for the given table and column name.
 805     def column_for(table, name)
 806       name = name.to_s
 807
 808       columns(table).find { |column| column.name == name }
 809     end
 810
 811     # Update the configuration of an index by creating a new one and then
 812     # removing the old one
 813     def update_index(table_name, index_name, columns, **index_options)
 814       if index_name_exists?(table_name, "#{index_name}_new") && index_name_exists?(table_name, index_name)
 815         remove_index table_name, name: "#{index_name}_new"
 816       elsif index_name_exists?(table_name, "#{index_name}_new")
 817         # Very unlikely case where the script has been interrupted during/after removal but before renaming
 818         rename_index table_name, "#{index_name}_new", index_name
 819       end
 820
 821       begin
 822         add_index table_name, columns, **index_options.merge(name: "#{index_name}_new", algorithm: :concurrently)
 823       rescue ActiveRecord::RecordNotUnique
 824         remove_index table_name, name: "#{index_name}_new"
 825         raise CorruptionError.new(index_name)
 826       end
 827
 828       remove_index table_name, name: index_name if index_name_exists?(table_name, index_name)
 829       rename_index table_name, "#{index_name}_new", index_name
 830     end
 831
 832     # This will replace the first occurrence of a string in a column with
 833     # the replacement
 834     # On postgresql we can use `regexp_replace` for that.
 835     # On mysql we find the location of the pattern, and overwrite it
 836     # with the replacement
 837     def replace_sql(column, pattern, replacement)
 838       quoted_pattern = Arel::Nodes::Quoted.new(pattern.to_s)
 839       quoted_replacement = Arel::Nodes::Quoted.new(replacement.to_s)
 840
 841       replace = Arel::Nodes::NamedFunction
 842         .new("regexp_replace", [column, quoted_pattern, quoted_replacement])
 843       Arel::Nodes::SqlLiteral.new(replace.to_sql)
 844     end
 845
 846     def remove_foreign_key_without_error(*args)
 847       remove_foreign_key(*args)
 848     rescue ArgumentError
 849     end
 850
 851     def sidekiq_queue_migrate(queue_from, to:)
 852       while sidekiq_queue_length(queue_from) > 0
 853         Sidekiq.redis do |conn|
 854           conn.rpoplpush "queue:#{queue_from}", "queue:#{to}"
 855         end
 856       end
 857     end
 858
 859     def sidekiq_queue_length(queue_name)
 860       Sidekiq.redis do |conn|
 861         conn.llen("queue:#{queue_name}")
 862       end
 863     end
 864
 865     def check_trigger_permissions!(table)
 866       unless Grant.create_and_execute_trigger?(table)
 867         dbname = ActiveRecord::Base.configurations[Rails.env]['database']
 868         user = ActiveRecord::Base.configurations[Rails.env]['username'] || ENV['USER']
 869
 870         raise <<-EOF
 871 Your database user is not allowed to create, drop, or execute triggers on the
 872 table #{table}.
 873
 874 If you are using PostgreSQL you can solve this by logging in to the Mastodon
 875 database (#{dbname}) using a super user and running:
 876
 877     ALTER USER #{user} WITH SUPERUSER
 878
 879 The query will grant the user super user permissions, ensuring you don't run
 880 into similar problems in the future (e.g. when new tables are created).
 881         EOF
 882       end
 883     end
 884
 885     # Bulk queues background migration jobs for an entire table, batched by ID range.
 886     # "Bulk" meaning many jobs will be pushed at a time for efficiency.
 887     # If you need a delay interval per job, then use `queue_background_migration_jobs_by_range_at_intervals`.
 888     #
 889     # model_class - The table being iterated over
 890     # job_class_name - The background migration job class as a string
 891     # batch_size - The maximum number of rows per job
 892     #
 893     # Example:
 894     #
 895     #     class Route < ActiveRecord::Base
 896     #       include EachBatch
 897     #       self.table_name = 'routes'
 898     #     end
 899     #
 900     #     bulk_queue_background_migration_jobs_by_range(Route, 'ProcessRoutes')
 901     #
 902     # Where the model_class includes EachBatch, and the background migration exists:
 903     #
 904     #     class Gitlab::BackgroundMigration::ProcessRoutes
 905     #       def perform(start_id, end_id)
 906     #         # do something
 907     #       end
 908     #     end
 909     def bulk_queue_background_migration_jobs_by_range(model_class, job_class_name, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE)
 910       raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id')
 911
 912       jobs = []
 913
 914       model_class.each_batch(of: batch_size) do |relation|
 915         start_id, end_id = relation.pluck('MIN(id), MAX(id)').first
 916
 917         if jobs.length >= BACKGROUND_MIGRATION_JOB_BUFFER_SIZE
 918           # Note: This code path generally only helps with many millions of rows
 919           # We push multiple jobs at a time to reduce the time spent in
 920           # Sidekiq/Redis operations. We're using this buffer based approach so we
 921           # don't need to run additional queries for every range.
 922           BackgroundMigrationWorker.perform_bulk(jobs)
 923           jobs.clear
 924         end
 925
 926         jobs << [job_class_name, [start_id, end_id]]
 927       end
 928
 929       BackgroundMigrationWorker.perform_bulk(jobs) unless jobs.empty?
 930     end
 931
 932     # Queues background migration jobs for an entire table, batched by ID range.
 933     # Each job is scheduled with a `delay_interval` in between.
 934     # If you use a small interval, then some jobs may run at the same time.
 935     #
 936     # model_class - The table being iterated over
 937     # job_class_name - The background migration job class as a string
 938     # delay_interval - The duration between each job's scheduled time (must respond to `to_f`)
 939     # batch_size - The maximum number of rows per job
 940     #
 941     # Example:
 942     #
 943     #     class Route < ActiveRecord::Base
 944     #       include EachBatch
 945     #       self.table_name = 'routes'
 946     #     end
 947     #
 948     #     queue_background_migration_jobs_by_range_at_intervals(Route, 'ProcessRoutes', 1.minute)
 949     #
 950     # Where the model_class includes EachBatch, and the background migration exists:
 951     #
 952     #     class Gitlab::BackgroundMigration::ProcessRoutes
 953     #       def perform(start_id, end_id)
 954     #         # do something
 955     #       end
 956     #     end
 957     def queue_background_migration_jobs_by_range_at_intervals(model_class, job_class_name, delay_interval, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE)
 958       raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id')
 959
 960       model_class.each_batch(of: batch_size) do |relation, index|
 961         start_id, end_id = relation.pluck('MIN(id), MAX(id)').first
 962
 963         # `BackgroundMigrationWorker.bulk_perform_in` schedules all jobs for
 964         # the same time, which is not helpful in most cases where we wish to
 965         # spread the work over time.
 966         BackgroundMigrationWorker.perform_in(delay_interval * index, job_class_name, [start_id, end_id])
 967       end
 968     end
 969
 970     private
 971
 972     # https://github.com/rails/rails/blob/v5.2.0/activerecord/lib/active_record/connection_adapters/postgresql/schema_statements.rb#L678-L684
 973     def extract_foreign_key_action(specifier)
 974       case specifier
 975       when 'c'; :cascade
 976       when 'n'; :nullify
 977       when 'r'; :restrict
 978       end
 979     end
 980   end
 981 end
 982
 983 # rubocop:enable all