lib/mastodon/migration_helpers.rb

   1 # frozen_string_literal: true
   2
   3 # This file is copied almost entirely from GitLab, which has done a large
   4 # amount of work to ensure that migrations can happen with minimal downtime.
   5 # Many thanks to those engineers.
   6
   7 # Changes have been made to remove dependencies on other GitLab files and to
   8 # shorten temporary column names.
   9
  10 # Documentation on using these functions (and why one might do so):
  11 # https://gitlab.com/gitlab-org/gitlab-ce/blob/master/doc/development/what_requires_downtime.md
  12
  13 # The file itself:
  14 # https://gitlab.com/gitlab-org/gitlab-ce/blob/master/lib/gitlab/database/migration_helpers.rb
  15
  16 # It is licensed as follows:
  17
  18 # Copyright (c) 2011-2017 GitLab B.V.
  19
  20 # Permission is hereby granted, free of charge, to any person obtaining a copy
  21 # of this software and associated documentation files (the "Software"), to deal
  22 # in the Software without restriction, including without limitation the rights
  23 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  24 # copies of the Software, and to permit persons to whom the Software is
  25 # furnished to do so, subject to the following conditions:
  26
  27 # The above copyright notice and this permission notice shall be included in
  28 # all copies or substantial portions of the Software.
  29
  30 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  31 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  32 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  33 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  34 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  35 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  36 # THE SOFTWARE.
  37
  38 # This is bad form, but there are enough differences that it's impractical to do
  39 # otherwise:
  40 # rubocop:disable all
  41
  42 module Mastodon
  43   module MigrationHelpers
  44     # Stub for Database.postgresql? from GitLab
  45     def self.postgresql?
  46       ActiveRecord::Base.configurations[Rails.env]['adapter'].casecmp('postgresql').zero?
  47     end
  48
  49     # Stub for Database.mysql? from GitLab
  50     def self.mysql?
  51       ActiveRecord::Base.configurations[Rails.env]['adapter'].casecmp('mysql2').zero?
  52     end
  53
  54     # Model that can be used for querying permissions of a SQL user.
  55     class Grant < ActiveRecord::Base
  56       self.table_name =
  57         if Mastodon::MigrationHelpers.postgresql?
  58           'information_schema.role_table_grants'
  59         else
  60           'mysql.user'
  61         end
  62
  63       def self.scope_to_current_user
  64         if Mastodon::MigrationHelpers.postgresql?
  65           where('grantee = user')
  66         else
  67           where("CONCAT(User, '@', Host) = current_user()")
  68         end
  69       end
  70
  71       # Returns true if the current user can create and execute triggers on the
  72       # given table.
  73       def self.create_and_execute_trigger?(table)
  74         priv =
  75           if Mastodon::MigrationHelpers.postgresql?
  76             where(privilege_type: 'TRIGGER', table_name: table)
  77           else
  78             where(Trigger_priv: 'Y')
  79           end
  80
  81         priv.scope_to_current_user.any?
  82       end
  83     end
  84
  85     BACKGROUND_MIGRATION_BATCH_SIZE = 1000 # Number of rows to process per job
  86     BACKGROUND_MIGRATION_JOB_BUFFER_SIZE = 1000 # Number of jobs to bulk queue at a time
  87
  88     # Gets an estimated number of rows for a table
  89     def estimate_rows_in_table(table_name)
  90       exec_query('SELECT reltuples FROM pg_class WHERE relname = ' +
  91         "'#{table_name}'").to_a.first['reltuples']
  92     end
  93
  94     # Adds `created_at` and `updated_at` columns with timezone information.
  95     #
  96     # This method is an improved version of Rails' built-in method `add_timestamps`.
  97     #
  98     # Available options are:
  99     # default - The default value for the column.
 100     # null - When set to `true` the column will allow NULL values.
 101     #        The default is to not allow NULL values.
 102     def add_timestamps_with_timezone(table_name, **options)
 103       options[:null] = false if options[:null].nil?
 104
 105       [:created_at, :updated_at].each do |column_name|
 106         if options[:default] && transaction_open?
 107           raise '`add_timestamps_with_timezone` with default value cannot be run inside a transaction. ' \
 108             'You can disable transactions by calling `disable_ddl_transaction!` ' \
 109             'in the body of your migration class'
 110         end
 111
 112         # If default value is presented, use `add_column_with_default` method instead.
 113         if options[:default]
 114           add_column_with_default(
 115             table_name,
 116             column_name,
 117             :datetime_with_timezone,
 118             default: options[:default],
 119             allow_null: options[:null]
 120           )
 121         else
 122           add_column(table_name, column_name, :datetime_with_timezone, options)
 123         end
 124       end
 125     end
 126
 127     # Creates a new index, concurrently when supported
 128     #
 129     # On PostgreSQL this method creates an index concurrently, on MySQL this
 130     # creates a regular index.
 131     #
 132     # Example:
 133     #
 134     #     add_concurrent_index :users, :some_column
 135     #
 136     # See Rails' `add_index` for more info on the available arguments.
 137     def add_concurrent_index(table_name, column_name, **options)
 138       if transaction_open?
 139         raise 'add_concurrent_index can not be run inside a transaction, ' \
 140           'you can disable transactions by calling disable_ddl_transaction! ' \
 141           'in the body of your migration class'
 142       end
 143
 144       if MigrationHelpers.postgresql?
 145         options = options.merge({ algorithm: :concurrently })
 146         disable_statement_timeout
 147       end
 148
 149       add_index(table_name, column_name, options)
 150     end
 151
 152     # Removes an existed index, concurrently when supported
 153     #
 154     # On PostgreSQL this method removes an index concurrently.
 155     #
 156     # Example:
 157     #
 158     #     remove_concurrent_index :users, :some_column
 159     #
 160     # See Rails' `remove_index` for more info on the available arguments.
 161     def remove_concurrent_index(table_name, column_name, **options)
 162       if transaction_open?
 163         raise 'remove_concurrent_index can not be run inside a transaction, ' \
 164           'you can disable transactions by calling disable_ddl_transaction! ' \
 165           'in the body of your migration class'
 166       end
 167
 168       if supports_drop_index_concurrently?
 169         options = options.merge({ algorithm: :concurrently })
 170         disable_statement_timeout
 171       end
 172
 173       remove_index(table_name, options.merge({ column: column_name }))
 174     end
 175
 176     # Removes an existing index, concurrently when supported
 177     #
 178     # On PostgreSQL this method removes an index concurrently.
 179     #
 180     # Example:
 181     #
 182     #     remove_concurrent_index :users, "index_X_by_Y"
 183     #
 184     # See Rails' `remove_index` for more info on the available arguments.
 185     def remove_concurrent_index_by_name(table_name, index_name, **options)
 186       if transaction_open?
 187         raise 'remove_concurrent_index_by_name can not be run inside a transaction, ' \
 188           'you can disable transactions by calling disable_ddl_transaction! ' \
 189           'in the body of your migration class'
 190       end
 191
 192       if supports_drop_index_concurrently?
 193         options = options.merge({ algorithm: :concurrently })
 194         disable_statement_timeout
 195       end
 196
 197       remove_index(table_name, options.merge({ name: index_name }))
 198     end
 199
 200     # Only available on Postgresql >= 9.2
 201     def supports_drop_index_concurrently?
 202       return false unless MigrationHelpers.postgresql?
 203
 204       version = select_one("SELECT current_setting('server_version_num') AS v")['v'].to_i
 205
 206       version >= 90200
 207     end
 208
 209     # Adds a foreign key with only minimal locking on the tables involved.
 210     #
 211     # This method only requires minimal locking when using PostgreSQL. When
 212     # using MySQL this method will use Rails' default `add_foreign_key`.
 213     #
 214     # source - The source table containing the foreign key.
 215     # target - The target table the key points to.
 216     # column - The name of the column to create the foreign key on.
 217     # on_delete - The action to perform when associated data is removed,
 218     #             defaults to "CASCADE".
 219     def add_concurrent_foreign_key(source, target, column:, on_delete: :cascade, target_col: 'id')
 220       # Transactions would result in ALTER TABLE locks being held for the
 221       # duration of the transaction, defeating the purpose of this method.
 222       if transaction_open?
 223         raise 'add_concurrent_foreign_key can not be run inside a transaction'
 224       end
 225
 226       # While MySQL does allow disabling of foreign keys it has no equivalent
 227       # of PostgreSQL's "VALIDATE CONSTRAINT". As a result we'll just fall
 228       # back to the normal foreign key procedure.
 229       if MigrationHelpers.mysql?
 230         return add_foreign_key(source, target,
 231                                column: column,
 232                                on_delete: on_delete)
 233       else
 234         on_delete = 'SET NULL' if on_delete == :nullify
 235       end
 236
 237       disable_statement_timeout
 238
 239       key_name = concurrent_foreign_key_name(source, column, target_col)
 240
 241       # Using NOT VALID allows us to create a key without immediately
 242       # validating it. This means we keep the ALTER TABLE lock only for a
 243       # short period of time. The key _is_ enforced for any newly created
 244       # data.
 245       execute <<-EOF.strip_heredoc
 246       ALTER TABLE #{source}
 247       ADD CONSTRAINT #{key_name}
 248       FOREIGN KEY (#{column})
 249       REFERENCES #{target} (#{target_col})
 250       #{on_delete ? "ON DELETE #{on_delete.upcase}" : ''}
 251       NOT VALID;
 252       EOF
 253
 254       # Validate the existing constraint. This can potentially take a very
 255       # long time to complete, but fortunately does not lock the source table
 256       # while running.
 257       execute("ALTER TABLE #{source} VALIDATE CONSTRAINT #{key_name};")
 258     end
 259
 260     # Returns the name for a concurrent foreign key.
 261     #
 262     # PostgreSQL constraint names have a limit of 63 bytes. The logic used
 263     # here is based on Rails' foreign_key_name() method, which unfortunately
 264     # is private so we can't rely on it directly.
 265     def concurrent_foreign_key_name(table, column, target_col)
 266       "fk_#{Digest::SHA256.hexdigest("#{table}_#{column}_#{target_col}_fk").first(10)}"
 267     end
 268
 269     # Long-running migrations may take more than the timeout allowed by
 270     # the database. Disable the session's statement timeout to ensure
 271     # migrations don't get killed prematurely. (PostgreSQL only)
 272     def disable_statement_timeout
 273       execute('SET statement_timeout TO 0') if MigrationHelpers.postgresql?
 274     end
 275
 276     # Updates the value of a column in batches.
 277     #
 278     # This method updates the table in batches of 5% of the total row count.
 279     # This method will continue updating rows until no rows remain.
 280     #
 281     # When given a block this method will yield two values to the block:
 282     #
 283     # 1. An instance of `Arel::Table` for the table that is being updated.
 284     # 2. The query to run as an Arel object.
 285     #
 286     # By supplying a block one can add extra conditions to the queries being
 287     # executed. Note that the same block is used for _all_ queries.
 288     #
 289     # Example:
 290     #
 291     #     update_column_in_batches(:projects, :foo, 10) do |table, query|
 292     #       query.where(table[:some_column].eq('hello'))
 293     #     end
 294     #
 295     # This would result in this method updating only rows where
 296     # `projects.some_column` equals "hello".
 297     #
 298     # table - The name of the table.
 299     # column - The name of the column to update.
 300     # value - The value for the column.
 301     #
 302     # Rubocop's Metrics/AbcSize metric is disabled for this method as Rubocop
 303     # determines this method to be too complex while there's no way to make it
 304     # less "complex" without introducing extra methods (which actually will
 305     # make things _more_ complex).
 306     #
 307     # rubocop: disable Metrics/AbcSize
 308     def update_column_in_batches(table_name, column, value)
 309       if transaction_open?
 310         raise 'update_column_in_batches can not be run inside a transaction, ' \
 311           'you can disable transactions by calling disable_ddl_transaction! ' \
 312           'in the body of your migration class'
 313       end
 314
 315       table = Arel::Table.new(table_name)
 316
 317       total = estimate_rows_in_table(table_name).to_i
 318       if total == 0
 319         count_arel = table.project(Arel.star.count.as('count'))
 320         count_arel = yield table, count_arel if block_given?
 321
 322         total = exec_query(count_arel.to_sql).to_hash.first['count'].to_i
 323
 324         return if total == 0
 325       end
 326
 327       # Update in batches of 5% until we run out of any rows to update.
 328       batch_size = ((total / 100.0) * 5.0).ceil
 329       max_size = 1000
 330
 331       # The upper limit is 1000 to ensure we don't lock too many rows. For
 332       # example, for "merge_requests" even 1% of the table is around 35 000
 333       # rows for GitLab.com.
 334       batch_size = max_size if batch_size > max_size
 335
 336       start_arel = table.project(table[:id]).order(table[:id].asc).take(1)
 337       start_arel = yield table, start_arel if block_given?
 338       first_row = exec_query(start_arel.to_sql).to_hash.first
 339       # In case there are no rows but we didn't catch it in the estimated size:
 340       return unless first_row
 341       start_id = first_row['id'].to_i
 342
 343       say "Migrating #{table_name}.#{column} (~#{total.to_i} rows)"
 344
 345       started_time = Time.now
 346       last_time = Time.now
 347       migrated = 0
 348       loop do
 349         stop_row = nil
 350
 351         suppress_messages do
 352           stop_arel = table.project(table[:id])
 353             .where(table[:id].gteq(start_id))
 354             .order(table[:id].asc)
 355             .take(1)
 356             .skip(batch_size)
 357
 358           stop_arel = yield table, stop_arel if block_given?
 359           stop_row = exec_query(stop_arel.to_sql).to_hash.first
 360
 361           update_arel = Arel::UpdateManager.new
 362             .table(table)
 363             .set([[table[column], value]])
 364             .where(table[:id].gteq(start_id))
 365
 366           if stop_row
 367             stop_id = stop_row['id'].to_i
 368             start_id = stop_id
 369             update_arel = update_arel.where(table[:id].lt(stop_id))
 370           end
 371
 372           update_arel = yield table, update_arel if block_given?
 373
 374           execute(update_arel.to_sql)
 375         end
 376
 377         migrated += batch_size
 378         if Time.now - last_time > 1
 379           status = "Migrated #{migrated} rows"
 380
 381           percentage = 100.0 * migrated / total
 382           status += " (~#{sprintf('%.2f', percentage)}%, "
 383
 384           remaining_time = (100.0 - percentage) * (Time.now - started_time) / percentage
 385
 386           status += "#{(remaining_time / 60).to_i}:"
 387           status += sprintf('%02d', remaining_time.to_i % 60)
 388           status += ' remaining, '
 389
 390           # Tell users not to interrupt if we're almost done.
 391           if remaining_time > 10
 392             status += 'safe to interrupt'
 393           else
 394             status += 'DO NOT interrupt'
 395           end
 396
 397           status += ')'
 398
 399           say status, true
 400           last_time = Time.now
 401         end
 402
 403         # There are no more rows left to update.
 404         break unless stop_row
 405       end
 406     end
 407
 408     # Adds a column with a default value without locking an entire table.
 409     #
 410     # This method runs the following steps:
 411     #
 412     # 1. Add the column with a default value of NULL.
 413     # 2. Change the default value of the column to the specified value.
 414     # 3. Update all existing rows in batches.
 415     # 4. Set a `NOT NULL` constraint on the column if desired (the default).
 416     #
 417     # These steps ensure a column can be added to a large and commonly used
 418     # table without locking the entire table for the duration of the table
 419     # modification.
 420     #
 421     # table - The name of the table to update.
 422     # column - The name of the column to add.
 423     # type - The column type (e.g. `:integer`).
 424     # default - The default value for the column.
 425     # limit - Sets a column limit. For example, for :integer, the default is
 426     #         4-bytes. Set `limit: 8` to allow 8-byte integers.
 427     # allow_null - When set to `true` the column will allow NULL values, the
 428     #              default is to not allow NULL values.
 429     #
 430     # This method can also take a block which is passed directly to the
 431     # `update_column_in_batches` method.
 432     def add_column_with_default(table, column, type, default:, limit: nil, allow_null: false, &block)
 433       if transaction_open?
 434         raise 'add_column_with_default can not be run inside a transaction, ' \
 435           'you can disable transactions by calling disable_ddl_transaction! ' \
 436           'in the body of your migration class'
 437       end
 438
 439       disable_statement_timeout
 440
 441       transaction do
 442         if limit
 443           add_column(table, column, type, default: nil, limit: limit)
 444         else
 445           add_column(table, column, type, default: nil)
 446         end
 447
 448         # Changing the default before the update ensures any newly inserted
 449         # rows already use the proper default value.
 450         change_column_default(table, column, default)
 451       end
 452
 453       begin
 454         update_column_in_batches(table, column, default, &block)
 455
 456         change_column_null(table, column, false) unless allow_null
 457       # We want to rescue _all_ exceptions here, even those that don't inherit
 458       # from StandardError.
 459       rescue Exception => error # rubocop: disable all
 460         remove_column(table, column)
 461
 462         raise error
 463       end
 464     end
 465
 466     # Renames a column without requiring downtime.
 467     #
 468     # Concurrent renames work by using database triggers to ensure both the
 469     # old and new column are in sync. However, this method will _not_ remove
 470     # the triggers or the old column automatically; this needs to be done
 471     # manually in a post-deployment migration. This can be done using the
 472     # method `cleanup_concurrent_column_rename`.
 473     #
 474     # table - The name of the database table containing the column.
 475     # old - The old column name.
 476     # new - The new column name.
 477     # type - The type of the new column. If no type is given the old column's
 478     #        type is used.
 479     def rename_column_concurrently(table, old, new, type: nil)
 480       if transaction_open?
 481         raise 'rename_column_concurrently can not be run inside a transaction'
 482       end
 483
 484       check_trigger_permissions!(table)
 485       trigger_name = rename_trigger_name(table, old, new)
 486
 487       # If we were in the middle of update_column_in_batches, we should remove
 488       # the old column and start over, as we have no idea where we were.
 489       if column_for(table, new)
 490         if MigrationHelpers.postgresql?
 491           remove_rename_triggers_for_postgresql(table, trigger_name)
 492         else
 493           remove_rename_triggers_for_mysql(trigger_name)
 494         end
 495
 496         remove_column(table, new)
 497       end
 498
 499       old_col = column_for(table, old)
 500       new_type = type || old_col.type
 501
 502       col_opts = {
 503         precision: old_col.precision,
 504         scale: old_col.scale,
 505       }
 506
 507       # We may be trying to reset the limit on an integer column type, so let
 508       # Rails handle that.
 509       unless [:bigint, :integer].include?(new_type)
 510         col_opts[:limit] = old_col.limit
 511       end
 512
 513       add_column(table, new, new_type, col_opts)
 514
 515       # We set the default value _after_ adding the column so we don't end up
 516       # updating any existing data with the default value. This isn't
 517       # necessary since we copy over old values further down.
 518       change_column_default(table, new, old_col.default) if old_col.default
 519
 520       quoted_table = quote_table_name(table)
 521       quoted_old = quote_column_name(old)
 522       quoted_new = quote_column_name(new)
 523
 524       if MigrationHelpers.postgresql?
 525         install_rename_triggers_for_postgresql(trigger_name, quoted_table,
 526                                                quoted_old, quoted_new)
 527       else
 528         install_rename_triggers_for_mysql(trigger_name, quoted_table,
 529                                           quoted_old, quoted_new)
 530       end
 531
 532       update_column_in_batches(table, new, Arel::Table.new(table)[old])
 533
 534       change_column_null(table, new, false) unless old_col.null
 535
 536       copy_indexes(table, old, new)
 537       copy_foreign_keys(table, old, new)
 538     end
 539
 540     # Changes the type of a column concurrently.
 541     #
 542     # table - The table containing the column.
 543     # column - The name of the column to change.
 544     # new_type - The new column type.
 545     def change_column_type_concurrently(table, column, new_type)
 546       temp_column = rename_column_name(column)
 547
 548       rename_column_concurrently(table, column, temp_column, type: new_type)
 549
 550       # Primary keys don't necessarily have an associated index.
 551       if ActiveRecord::Base.get_primary_key(table) == column.to_s
 552         old_pk_index_name = "index_#{table}_on_#{column}"
 553         new_pk_index_name = "index_#{table}_on_#{column}_cm"
 554
 555         unless indexes_for(table, column).find{|i| i.name == old_pk_index_name}
 556           add_concurrent_index(table, [temp_column], {
 557             unique: true,
 558             name: new_pk_index_name
 559           })
 560         end
 561       end
 562     end
 563
 564     # Performs cleanup of a concurrent type change.
 565     #
 566     # table - The table containing the column.
 567     # column - The name of the column to change.
 568     # new_type - The new column type.
 569     def cleanup_concurrent_column_type_change(table, column)
 570       temp_column = rename_column_name(column)
 571
 572       # Wait for the indices to be built
 573       indexes_for(table, column).each do |index|
 574         expected_name = index.name + '_cm'
 575
 576         puts "Waiting for index #{expected_name}"
 577         sleep 1 until indexes_for(table, temp_column).find {|i| i.name == expected_name }
 578       end
 579
 580       was_primary = (ActiveRecord::Base.get_primary_key(table) == column.to_s)
 581       old_default_fn = column_for(table, column).default_function
 582
 583       old_fks = []
 584       if was_primary
 585         # Get any foreign keys pointing at this column we need to recreate, and
 586         # remove the old ones.
 587         # Based on code from:
 588         # http://errorbank.blogspot.com/2011/03/list-all-foreign-keys-references-for.html
 589         old_fks_res = execute <<-EOF.strip_heredoc
 590           select m.relname as src_table,
 591             (select a.attname
 592               from pg_attribute a
 593               where a.attrelid = m.oid
 594                 and a.attnum = o.conkey[1]
 595                 and a.attisdropped = false) as src_col,
 596             o.conname as name,
 597             o.confdeltype as on_delete
 598           from pg_constraint o
 599           left join pg_class f on f.oid = o.confrelid
 600           left join pg_class c on c.oid = o.conrelid
 601           left join pg_class m on m.oid = o.conrelid
 602           where o.contype = 'f'
 603             and o.conrelid in (
 604               select oid from pg_class c where c.relkind = 'r')
 605             and f.relname = '#{table}';
 606           EOF
 607         old_fks = old_fks_res.to_a
 608         old_fks.each do |old_fk|
 609           add_concurrent_foreign_key(
 610             old_fk['src_table'],
 611             table,
 612             column: old_fk['src_col'],
 613             target_col: temp_column,
 614             on_delete: extract_foreign_key_action(old_fk['on_delete'])
 615           )
 616
 617           remove_foreign_key(old_fk['src_table'], name: old_fk['name'])
 618         end
 619       end
 620
 621       # If there was a sequence owned by the old column, make it owned by the
 622       # new column, as it will otherwise be deleted when we get rid of the
 623       # old column.
 624       if (seq_match = /^nextval\('([^']*)'(::text|::regclass)?\)/.match(old_default_fn))
 625         seq_name = seq_match[1]
 626         execute("ALTER SEQUENCE #{seq_name} OWNED BY #{table}.#{temp_column}")
 627       end
 628
 629       transaction do
 630         # This has to be performed in a transaction as otherwise we might have
 631         # inconsistent data.
 632
 633         cleanup_concurrent_column_rename(table, column, temp_column)
 634         rename_column(table, temp_column, column)
 635
 636         # If there was an old default function, we didn't copy it. Do that now
 637         # in the transaction, so we don't miss anything.
 638         change_column_default(table, column, -> { old_default_fn }) if old_default_fn
 639       end
 640
 641       # Rename any indices back to what they should be.
 642       indexes_for(table, column).each do |index|
 643         next unless index.name.end_with?('_cm')
 644
 645         real_index_name = index.name.sub(/_cm$/, '')
 646         rename_index(table, index.name, real_index_name)
 647       end
 648
 649       # Rename any foreign keys back to names based on the real column.
 650       foreign_keys_for(table, column).each do |fk|
 651         old_fk_name = concurrent_foreign_key_name(fk.from_table, temp_column, 'id')
 652         new_fk_name = concurrent_foreign_key_name(fk.from_table, column, 'id')
 653         execute("ALTER TABLE #{fk.from_table} RENAME CONSTRAINT " +
 654           "#{old_fk_name} TO #{new_fk_name}")
 655       end
 656
 657       # Rename any foreign keys from other tables to names based on the real
 658       # column.
 659       old_fks.each do |old_fk|
 660         old_fk_name = concurrent_foreign_key_name(old_fk['src_table'],
 661           old_fk['src_col'], temp_column)
 662         new_fk_name = concurrent_foreign_key_name(old_fk['src_table'],
 663           old_fk['src_col'], column)
 664         execute("ALTER TABLE #{old_fk['src_table']} RENAME CONSTRAINT " +
 665           "#{old_fk_name} TO #{new_fk_name}")
 666       end
 667
 668       # If the old column was a primary key, mark the new one as a primary key.
 669       if was_primary
 670         execute("ALTER TABLE #{table} ADD PRIMARY KEY USING INDEX " +
 671           "index_#{table}_on_#{column}")
 672       end
 673     end
 674
 675     # Cleans up a concurrent column name.
 676     #
 677     # This method takes care of removing previously installed triggers as well
 678     # as removing the old column.
 679     #
 680     # table - The name of the database table.
 681     # old - The name of the old column.
 682     # new - The name of the new column.
 683     def cleanup_concurrent_column_rename(table, old, new)
 684       trigger_name = rename_trigger_name(table, old, new)
 685
 686       check_trigger_permissions!(table)
 687
 688       if MigrationHelpers.postgresql?
 689         remove_rename_triggers_for_postgresql(table, trigger_name)
 690       else
 691         remove_rename_triggers_for_mysql(trigger_name)
 692       end
 693
 694       remove_column(table, old)
 695     end
 696
 697     # Performs a concurrent column rename when using PostgreSQL.
 698     def install_rename_triggers_for_postgresql(trigger, table, old, new)
 699       execute <<-EOF.strip_heredoc
 700       CREATE OR REPLACE FUNCTION #{trigger}()
 701       RETURNS trigger AS
 702       $BODY$
 703       BEGIN
 704         NEW.#{new} := NEW.#{old};
 705         RETURN NEW;
 706       END;
 707       $BODY$
 708       LANGUAGE 'plpgsql'
 709       VOLATILE
 710       EOF
 711
 712       execute <<-EOF.strip_heredoc
 713       CREATE TRIGGER #{trigger}
 714       BEFORE INSERT OR UPDATE
 715       ON #{table}
 716       FOR EACH ROW
 717       EXECUTE PROCEDURE #{trigger}()
 718       EOF
 719     end
 720
 721     # Installs the triggers necessary to perform a concurrent column rename on
 722     # MySQL.
 723     def install_rename_triggers_for_mysql(trigger, table, old, new)
 724       execute <<-EOF.strip_heredoc
 725       CREATE TRIGGER #{trigger}_insert
 726       BEFORE INSERT
 727       ON #{table}
 728       FOR EACH ROW
 729       SET NEW.#{new} = NEW.#{old}
 730       EOF
 731
 732       execute <<-EOF.strip_heredoc
 733       CREATE TRIGGER #{trigger}_update
 734       BEFORE UPDATE
 735       ON #{table}
 736       FOR EACH ROW
 737       SET NEW.#{new} = NEW.#{old}
 738       EOF
 739     end
 740
 741     # Removes the triggers used for renaming a PostgreSQL column concurrently.
 742     def remove_rename_triggers_for_postgresql(table, trigger)
 743       execute("DROP TRIGGER IF EXISTS #{trigger} ON #{table}")
 744       execute("DROP FUNCTION IF EXISTS #{trigger}()")
 745     end
 746
 747     # Removes the triggers used for renaming a MySQL column concurrently.
 748     def remove_rename_triggers_for_mysql(trigger)
 749       execute("DROP TRIGGER IF EXISTS #{trigger}_insert")
 750       execute("DROP TRIGGER IF EXISTS #{trigger}_update")
 751     end
 752
 753     # Returns the (base) name to use for triggers when renaming columns.
 754     def rename_trigger_name(table, old, new)
 755       'trigger_' + Digest::SHA256.hexdigest("#{table}_#{old}_#{new}").first(12)
 756     end
 757
 758     # Returns the name to use for temporary rename columns.
 759     def rename_column_name(base)
 760       base.to_s + '_cm'
 761     end
 762
 763     # Returns an Array containing the indexes for the given column
 764     def indexes_for(table, column)
 765       column = column.to_s
 766
 767       indexes(table).select { |index| index.columns.include?(column) }
 768     end
 769
 770     # Returns an Array containing the foreign keys for the given column.
 771     def foreign_keys_for(table, column)
 772       column = column.to_s
 773
 774       foreign_keys(table).select { |fk| fk.column == column }
 775     end
 776
 777     # Copies all indexes for the old column to a new column.
 778     #
 779     # table - The table containing the columns and indexes.
 780     # old - The old column.
 781     # new - The new column.
 782     def copy_indexes(table, old, new)
 783       old = old.to_s
 784       new = new.to_s
 785
 786       indexes_for(table, old).each do |index|
 787         new_columns = index.columns.map do |column|
 788           column == old ? new : column
 789         end
 790
 791         # This is necessary as we can't properly rename indexes such as
 792         # "ci_taggings_idx".
 793         name = index.name + '_cm'
 794
 795         # If the order contained the old column, map it to the new one.
 796         order = index.orders
 797         if order.key?(old)
 798           order[new] = order.delete(old)
 799         end
 800
 801         options = {
 802           unique: index.unique,
 803           name: name,
 804           length: index.lengths,
 805           order: order
 806         }
 807
 808         # These options are not supported by MySQL, so we only add them if
 809         # they were previously set.
 810         options[:using] = index.using if index.using
 811         options[:where] = index.where if index.where
 812
 813         add_concurrent_index(table, new_columns, options)
 814       end
 815     end
 816
 817     # Copies all foreign keys for the old column to the new column.
 818     #
 819     # table - The table containing the columns and indexes.
 820     # old - The old column.
 821     # new - The new column.
 822     def copy_foreign_keys(table, old, new)
 823       foreign_keys_for(table, old).each do |fk|
 824         add_concurrent_foreign_key(fk.from_table,
 825                                    fk.to_table,
 826                                    column: new,
 827                                    on_delete: fk.on_delete)
 828       end
 829     end
 830
 831     # Returns the column for the given table and column name.
 832     def column_for(table, name)
 833       name = name.to_s
 834
 835       columns(table).find { |column| column.name == name }
 836     end
 837
 838     # This will replace the first occurance of a string in a column with
 839     # the replacement
 840     # On postgresql we can use `regexp_replace` for that.
 841     # On mysql we find the location of the pattern, and overwrite it
 842     # with the replacement
 843     def replace_sql(column, pattern, replacement)
 844       quoted_pattern = Arel::Nodes::Quoted.new(pattern.to_s)
 845       quoted_replacement = Arel::Nodes::Quoted.new(replacement.to_s)
 846
 847       if MigrationHelpers.mysql?
 848         locate = Arel::Nodes::NamedFunction
 849           .new('locate', [quoted_pattern, column])
 850         insert_in_place = Arel::Nodes::NamedFunction
 851           .new('insert', [column, locate, pattern.size, quoted_replacement])
 852
 853         Arel::Nodes::SqlLiteral.new(insert_in_place.to_sql)
 854       else
 855         replace = Arel::Nodes::NamedFunction
 856           .new("regexp_replace", [column, quoted_pattern, quoted_replacement])
 857         Arel::Nodes::SqlLiteral.new(replace.to_sql)
 858       end
 859     end
 860
 861     def remove_foreign_key_without_error(*args)
 862       remove_foreign_key(*args)
 863     rescue ArgumentError
 864     end
 865
 866     def sidekiq_queue_migrate(queue_from, to:)
 867       while sidekiq_queue_length(queue_from) > 0
 868         Sidekiq.redis do |conn|
 869           conn.rpoplpush "queue:#{queue_from}", "queue:#{to}"
 870         end
 871       end
 872     end
 873
 874     def sidekiq_queue_length(queue_name)
 875       Sidekiq.redis do |conn|
 876         conn.llen("queue:#{queue_name}")
 877       end
 878     end
 879
 880     def check_trigger_permissions!(table)
 881       unless Grant.create_and_execute_trigger?(table)
 882         dbname = ActiveRecord::Base.configurations[Rails.env]['database']
 883         user = ActiveRecord::Base.configurations[Rails.env]['username'] || ENV['USER']
 884
 885         raise <<-EOF
 886 Your database user is not allowed to create, drop, or execute triggers on the
 887 table #{table}.
 888
 889 If you are using PostgreSQL you can solve this by logging in to the GitLab
 890 database (#{dbname}) using a super user and running:
 891
 892     ALTER #{user} WITH SUPERUSER
 893
 894 For MySQL you instead need to run:
 895
 896     GRANT ALL PRIVILEGES ON *.* TO #{user}@'%'
 897
 898 Both queries will grant the user super user permissions, ensuring you don't run
 899 into similar problems in the future (e.g. when new tables are created).
 900         EOF
 901       end
 902     end
 903
 904     # Bulk queues background migration jobs for an entire table, batched by ID range.
 905     # "Bulk" meaning many jobs will be pushed at a time for efficiency.
 906     # If you need a delay interval per job, then use `queue_background_migration_jobs_by_range_at_intervals`.
 907     #
 908     # model_class - The table being iterated over
 909     # job_class_name - The background migration job class as a string
 910     # batch_size - The maximum number of rows per job
 911     #
 912     # Example:
 913     #
 914     #     class Route < ActiveRecord::Base
 915     #       include EachBatch
 916     #       self.table_name = 'routes'
 917     #     end
 918     #
 919     #     bulk_queue_background_migration_jobs_by_range(Route, 'ProcessRoutes')
 920     #
 921     # Where the model_class includes EachBatch, and the background migration exists:
 922     #
 923     #     class Gitlab::BackgroundMigration::ProcessRoutes
 924     #       def perform(start_id, end_id)
 925     #         # do something
 926     #       end
 927     #     end
 928     def bulk_queue_background_migration_jobs_by_range(model_class, job_class_name, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE)
 929       raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id')
 930
 931       jobs = []
 932
 933       model_class.each_batch(of: batch_size) do |relation|
 934         start_id, end_id = relation.pluck('MIN(id), MAX(id)').first
 935
 936         if jobs.length >= BACKGROUND_MIGRATION_JOB_BUFFER_SIZE
 937           # Note: This code path generally only helps with many millions of rows
 938           # We push multiple jobs at a time to reduce the time spent in
 939           # Sidekiq/Redis operations. We're using this buffer based approach so we
 940           # don't need to run additional queries for every range.
 941           BackgroundMigrationWorker.perform_bulk(jobs)
 942           jobs.clear
 943         end
 944
 945         jobs << [job_class_name, [start_id, end_id]]
 946       end
 947
 948       BackgroundMigrationWorker.perform_bulk(jobs) unless jobs.empty?
 949     end
 950
 951     # Queues background migration jobs for an entire table, batched by ID range.
 952     # Each job is scheduled with a `delay_interval` in between.
 953     # If you use a small interval, then some jobs may run at the same time.
 954     #
 955     # model_class - The table being iterated over
 956     # job_class_name - The background migration job class as a string
 957     # delay_interval - The duration between each job's scheduled time (must respond to `to_f`)
 958     # batch_size - The maximum number of rows per job
 959     #
 960     # Example:
 961     #
 962     #     class Route < ActiveRecord::Base
 963     #       include EachBatch
 964     #       self.table_name = 'routes'
 965     #     end
 966     #
 967     #     queue_background_migration_jobs_by_range_at_intervals(Route, 'ProcessRoutes', 1.minute)
 968     #
 969     # Where the model_class includes EachBatch, and the background migration exists:
 970     #
 971     #     class Gitlab::BackgroundMigration::ProcessRoutes
 972     #       def perform(start_id, end_id)
 973     #         # do something
 974     #       end
 975     #     end
 976     def queue_background_migration_jobs_by_range_at_intervals(model_class, job_class_name, delay_interval, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE)
 977       raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id')
 978
 979       model_class.each_batch(of: batch_size) do |relation, index|
 980         start_id, end_id = relation.pluck('MIN(id), MAX(id)').first
 981
 982         # `BackgroundMigrationWorker.bulk_perform_in` schedules all jobs for
 983         # the same time, which is not helpful in most cases where we wish to
 984         # spread the work over time.
 985         BackgroundMigrationWorker.perform_in(delay_interval * index, job_class_name, [start_id, end_id])
 986       end
 987     end
 988
 989     private
 990
 991     # https://github.com/rails/rails/blob/v5.2.0/activerecord/lib/active_record/connection_adapters/postgresql/schema_statements.rb#L678-L684
 992     def extract_foreign_key_action(specifier)
 993       case specifier
 994       when 'c'; :cascade
 995       when 'n'; :nullify
 996       when 'r'; :restrict
 997       end
 998     end
 999   end
1000 end
1001
1002 # rubocop:enable all