Add missing indexes to duplicated columns (#570)

Previously, when an indexes column was duplicated, the index was lost during the duplication process. This PR adds the missing indexes to the duplicated columns. The example I added removes NOT NULL constraint from a column that has a hash index with a custom fillfactor. The schema of the table before starting the migration: ``` postgres=# \d fruits Table "public.fruits" Column | Type | Collation | Nullable | Default --------+------------------------+-----------+----------+------------------------------------------------ id | integer | | not null | nextval('_pgroll_new_fruits_id_seq'::regclass) name | character varying(255) | | not null | size | fruit_size | | not null | 'small'::fruit_size Indexes: "_pgroll_new_fruits_pkey" PRIMARY KEY, btree (id) REPLICA IDENTITY "_pgroll_new_fruits_name_key" UNIQUE CONSTRAINT, btree (name) "idx_fruits_id_gt_10" btree (id) WHERE id > 10 "idx_fruits_name" hash (name) WITH (fillfactor='70') "idx_fruits_unique_name" UNIQUE, btree (name) ``` Without the fix in my PR starting the example migration is incorrect. Notice that the duplicated column `_pgroll_new_name` is does not have a hash index: ``` postgres=# \d fruits Table "public.fruits" Column | Type | Collation | Nullable | Default ------------------+------------------------+-----------+----------+------------------------------------------------ id | integer | | not null | nextval('_pgroll_new_fruits_id_seq'::regclass) name | character varying(255) | | not null | size | fruit_size | | not null | 'small'::fruit_size _pgroll_new_name | character varying(255) | | | Indexes: "_pgroll_new_fruits_pkey" PRIMARY KEY, btree (id) REPLICA IDENTITY "_pgroll_dup__pgroll_new_fruits_name_key" UNIQUE, btree (_pgroll_new_name) "_pgroll_new_fruits_name_key" UNIQUE CONSTRAINT, btree (name) "idx_fruits_id_gt_10" btree (id) WHERE id > 10 "idx_fruits_name" hash (name) WITH (fillfactor='70') "idx_fruits_unique_name" UNIQUE, btree (name) Triggers: _pgroll_trigger_fruits__pgroll_new_name BEFORE INSERT OR UPDATE ON fruits FOR EACH ROW EXECUTE FUNCTION _pgroll_trigger_fruits__pgroll_new_name() _pgroll_trigger_fruits_name BEFORE INSERT OR UPDATE ON fruits FOR EACH ROW EXECUTE FUNCTION _pgroll_trigger_fruits_name() ``` After completing the migration the hash index on `name` disappears: ``` postgres=# \d fruits Table "public.fruits" Column | Type | Collation | Nullable | Default --------+------------------------+-----------+----------+------------------------------------------------ id | integer | | not null | nextval('_pgroll_new_fruits_id_seq'::regclass) size | fruit_size | | not null | 'small'::fruit_size name | character varying(255) | | | Indexes: "_pgroll_new_fruits_pkey" PRIMARY KEY, btree (id) REPLICA IDENTITY "_pgroll_new_fruits_name_key" UNIQUE CONSTRAINT, btree (name) "idx_fruits_id_gt_10" btree (id) WHERE id > 10 ``` With my change starting the migration creates the missing hash index on the duplicated column. See `_pgroll_dup_idx_fruits_name` index on `_pgroll_new_name`: ``` postgres=# \d fruits Table "public.fruits" Column | Type | Collation | Nullable | Default ------------------+------------------------+-----------+----------+------------------------------------------------ id | integer | | not null | nextval('_pgroll_new_fruits_id_seq'::regclass) name | character varying(255) | | not null | size | fruit_size | | not null | 'small'::fruit_size _pgroll_new_name | character varying(255) | | | Indexes: "_pgroll_new_fruits_pkey" PRIMARY KEY, btree (id) REPLICA IDENTITY "_pgroll_dup__pgroll_new_fruits_name_key" UNIQUE, btree (_pgroll_new_name) "_pgroll_dup_idx_fruits_name" hash (_pgroll_new_name) WITH (fillfactor='70') "_pgroll_dup_idx_fruits_unique_name" UNIQUE, btree (_pgroll_new_name) "_pgroll_new_fruits_name_key" UNIQUE CONSTRAINT, btree (name) "idx_fruits_id_gt_10" btree (id) WHERE id > 10 "idx_fruits_name" hash (name) WITH (fillfactor='70') "idx_fruits_unique_name" UNIQUE, btree (name) Triggers: _pgroll_trigger_fruits__pgroll_new_name BEFORE INSERT OR UPDATE ON fruits FOR EACH ROW EXECUTE FUNCTION _pgroll_trigger_fruits__pgroll_new_name() _pgroll_trigger_fruits_name BEFORE INSERT OR UPDATE ON fruits FOR EACH ROW EXECUTE FUNCTION _pgroll_trigger_fruits_name() ``` Completing the migration preserves the index on `name` and removes the NOT NULL constraint: ``` postgres=# \d fruits Table "public.fruits" Column | Type | Collation | Nullable | Default --------+------------------------+-----------+----------+------------------------------------------------ id | integer | | not null | nextval('_pgroll_new_fruits_id_seq'::regclass) size | fruit_size | | not null | 'small'::fruit_size name | character varying(255) | | | Indexes: "_pgroll_new_fruits_pkey" PRIMARY KEY, btree (id) REPLICA IDENTITY "_pgroll_new_fruits_name_key" UNIQUE CONSTRAINT, btree (name) "idx_fruits_id_gt_10" btree (id) WHERE id > 10 "idx_fruits_name" hash (name) WITH (fillfactor='70') "idx_fruits_unique_name" UNIQUE CONSTRAINT, btree (name) ``` Related to #227 --------- Co-authored-by: Andrew Farries <[email protected]>
xataio · Jan 8, 2025 · 67f4a08 · 67f4a08
1 parent 952fb23
commit 67f4a08
Show file tree

Hide file tree

Showing 5 changed files with 137 additions and 16 deletions.
diff --git a/examples/.ledger b/examples/.ledger
@@ -46,3 +46,4 @@
 46_alter_column_drop_default.json
 47_add_table_foreign_key_constraint.json
 48_drop_tickets_check.json
+49_unset_not_null_on_indexed_column.json
diff --git a/examples/49_unset_not_null_on_indexed_column.json b/examples/49_unset_not_null_on_indexed_column.json
@@ -0,0 +1,14 @@
+{
+  "name": "49_unset_not_null_on_indexed_column",
+  "operations": [
+    {
+      "alter_column": {
+        "table": "fruits",
+        "column": "name",
+        "nullable": true,
+        "up": "SELECT CASE WHEN name IS NULL THEN 'unknown fruit' ELSE name END",
+        "down": "name"
+      }
+    }
+  ]
+}
diff --git a/pkg/migrations/duplicate.go b/pkg/migrations/duplicate.go
@@ -147,6 +147,13 @@ func (d *Duplicator) Duplicate(ctx context.Context) error {
 		}
 	}
 
+	// Generate SQL to duplicate any indexes on the columns.
+	for _, sql := range d.stmtBuilder.duplicateIndexes(d.withoutConstraint, colNames...) {
+		if _, err := d.conn.ExecContext(ctx, sql); err != nil {
+			return err
+		}
+	}
+
 	return nil
 }
 
@@ -204,6 +211,45 @@ func (d *duplicatorStmtBuilder) duplicateForeignKeyConstraints(withoutConstraint
 	return stmts
 }
 
+func (d *duplicatorStmtBuilder) duplicateIndexes(withoutConstraint []string, colNames ...string) []string {
+	stmts := make([]string, 0, len(d.table.Indexes))
+	for _, idx := range d.table.Indexes {
+		if slices.Contains(withoutConstraint, idx.Name) {
+			continue
+		}
+		if _, ok := d.table.UniqueConstraints[idx.Name]; ok && idx.Unique {
+			// unique constraints are duplicated as unique indexes
+			continue
+		}
+
+		if duplicatedMember, columns := d.allConstraintColumns(idx.Columns, colNames...); duplicatedMember {
+			stmtFmt := "CREATE INDEX CONCURRENTLY %s ON %s"
+			if idx.Unique {
+				stmtFmt = "CREATE UNIQUE INDEX CONCURRENTLY %s ON %s"
+			}
+			stmt := fmt.Sprintf(stmtFmt, pq.QuoteIdentifier(DuplicationName(idx.Name)), pq.QuoteIdentifier(d.table.Name))
+			if idx.Method != "" {
+				stmt += fmt.Sprintf(" USING %s", string(idx.Method))
+			}
+
+			stmt += fmt.Sprintf(" (%s)", strings.Join(quoteColumnNames(columns), ", "))
+
+			if storageParamStart := strings.Index(idx.Definition, " WITH ("); storageParamStart != -1 {
+				end := strings.Index(idx.Definition[storageParamStart:], ")")
+				stmt += idx.Definition[storageParamStart : storageParamStart+end+1]
+			}
+
+			if idx.Predicate != nil {
+				pred := strings.Replace(*idx.Predicate, strings.Join(idx.Columns, ", "), strings.Join(quoteColumnNames(columns), ", "), 1)
+				stmt += fmt.Sprintf(" WHERE %s", pred)
+			}
+
+			stmts = append(stmts, stmt)
+		}
+	}
+	return stmts
+}
+
 // duplicatedConstraintColumns returns a new slice of constraint columns with
 // the columns that are duplicated replaced with temporary names.
 func (d *duplicatorStmtBuilder) duplicatedConstraintColumns(constraintColumns []string, duplicatedColumns ...string) []string {

diff --git a/pkg/migrations/duplicate_test.go b/pkg/migrations/duplicate_test.go
@@ -36,6 +36,23 @@ var table = &schema.Table{
 		"fk_city":      {Name: "fk_city", Columns: []string{"city"}, ReferencedTable: "cities", ReferencedColumns: []string{"id"}, OnDelete: "NO ACTION"},
 		"fk_name_nick": {Name: "fk_name_nick", Columns: []string{"name", "nick"}, ReferencedTable: "users", ReferencedColumns: []string{"name", "nick"}, OnDelete: "CASCADE"},
 	},
+	Indexes: map[string]schema.Index{
+		"idx_no_kate": {
+			Name:       "idx_no_kate",
+			Columns:    []string{"name"},
+			Definition: `CREATE INDEX "idx_name" ON "test_table" USING hash ("name") WITH (fillfactor = 70) WHERE "name" != 'Kate'`,
+			Predicate:  ptr("name != 'Kate'"),
+			Method:     "hash",
+		},
+		"idx_email": {
+			Name:    "idx_email",
+			Columns: []string{"email"},
+		},
+		"idx_name_city": {
+			Name:    "idx_name_city",
+			Columns: []string{"name", "city"},
+		},
+	},
 }
 
 func TestDuplicateStmtBuilderCheckConstraints(t *testing.T) {
@@ -174,3 +191,46 @@ func TestDuplicateStmtBuilderForeignKeyConstraints(t *testing.T) {
 		})
 	}
 }
+
+func TestDuplicateStmtBuilderIndexes(t *testing.T) {
+	d := &duplicatorStmtBuilder{table}
+	for name, testCases := range map[string]struct {
+		columns       []string
+		expectedStmts []string
+	}{
+		"single column duplicated": {
+			columns:       []string{"nick"},
+			expectedStmts: []string{},
+		},
+		"single-column index with single column duplicated": {
+			columns:       []string{"email"},
+			expectedStmts: []string{`CREATE INDEX CONCURRENTLY "_pgroll_dup_idx_email" ON "test_table" ("_pgroll_new_email")`},
+		},
+		"single-column index with multiple column duplicated": {
+			columns:       []string{"email", "description"},
+			expectedStmts: []string{`CREATE INDEX CONCURRENTLY "_pgroll_dup_idx_email" ON "test_table" ("_pgroll_new_email")`},
+		},
+		"multi-column index with single column duplicated": {
+			columns:       []string{"name"},
+			expectedStmts: []string{`CREATE INDEX CONCURRENTLY "_pgroll_dup_idx_name_city" ON "test_table" ("_pgroll_new_name", "city")`, `CREATE INDEX CONCURRENTLY "_pgroll_dup_idx_no_kate" ON "test_table" USING hash ("_pgroll_new_name") WITH (fillfactor = 70) WHERE "_pgroll_new_name" != 'Kate'`},
+		},
+		"multi-column index with multiple unrelated column duplicated": {
+			columns:       []string{"name", "description"},
+			expectedStmts: []string{`CREATE INDEX CONCURRENTLY "_pgroll_dup_idx_name_city" ON "test_table" ("_pgroll_new_name", "city")`, `CREATE INDEX CONCURRENTLY "_pgroll_dup_idx_no_kate" ON "test_table" USING hash ("_pgroll_new_name") WITH (fillfactor = 70) WHERE "_pgroll_new_name" != 'Kate'`},
+		},
+		"multi-column index with multiple columns": {
+			columns:       []string{"name", "city"},
+			expectedStmts: []string{`CREATE INDEX CONCURRENTLY "_pgroll_dup_idx_name_city" ON "test_table" ("_pgroll_new_name", "_pgroll_new_city")`, `CREATE INDEX CONCURRENTLY "_pgroll_dup_idx_no_kate" ON "test_table" USING hash ("_pgroll_new_name") WITH (fillfactor = 70) WHERE "_pgroll_new_name" != 'Kate'`},
+		},
+	} {
+		t.Run(name, func(t *testing.T) {
+			stmts := d.duplicateIndexes(nil, testCases.columns...)
+			assert.Equal(t, len(testCases.expectedStmts), len(stmts))
+			for _, stmt := range stmts {
+				assert.True(t, slices.Contains(testCases.expectedStmts, stmt))
+			}
+		})
+	}
+}
+
+func ptr[T any](x T) *T { return &x }
diff --git a/pkg/migrations/rename.go b/pkg/migrations/rename.go
@@ -120,42 +120,42 @@ func RenameDuplicatedColumn(ctx context.Context, conn db.DB, table *schema.Table
 		}
 	}
 
-	// Rename any `UNIQUE` indexes on the duplicated column and use them to
+	// Rename any indexes on the duplicated column and use unique indexes to
 	// create `UNIQUE` constraints.
-	for _, ui := range table.Indexes {
-		if !IsDuplicatedName(ui.Name) {
-			continue
-		}
-		if !ui.Unique {
+	for _, idx := range table.Indexes {
+		if !IsDuplicatedName(idx.Name) {
 			continue
 		}
 
-		if slices.Contains(ui.Columns, TemporaryName(column.Name)) {
-			// Rename the unique index to its original name
+		if slices.Contains(idx.Columns, TemporaryName(column.Name)) {
+			// Rename the index to its original name
 			renameIndexSQL := fmt.Sprintf(cRenameIndexSQL,
-				pq.QuoteIdentifier(ui.Name),
-				pq.QuoteIdentifier(StripDuplicationPrefix(ui.Name)),
+				pq.QuoteIdentifier(idx.Name),
+				pq.QuoteIdentifier(StripDuplicationPrefix(idx.Name)),
 			)
 
 			_, err = conn.ExecContext(ctx, renameIndexSQL)
 			if err != nil {
-				return fmt.Errorf("failed to rename unique index %q: %w", ui.Name, err)
+				return fmt.Errorf("failed to rename index %q: %w", idx.Name, err)
 			}
+		}
 
+		if _, ok := table.UniqueConstraints[StripDuplicationPrefix(idx.Name)]; idx.Unique && ok {
 			// Create a unique constraint using the unique index
 			createUniqueConstraintSQL := fmt.Sprintf(cCreateUniqueConstraintSQL,
 				pq.QuoteIdentifier(table.Name),
-				pq.QuoteIdentifier(StripDuplicationPrefix(ui.Name)),
-				pq.QuoteIdentifier(StripDuplicationPrefix(ui.Name)),
+				pq.QuoteIdentifier(StripDuplicationPrefix(idx.Name)),
+				pq.QuoteIdentifier(StripDuplicationPrefix(idx.Name)),
 			)
 
 			_, err = conn.ExecContext(ctx, createUniqueConstraintSQL)
 			if err != nil {
-				return fmt.Errorf("failed to create unique constraint from index %q: %w", ui.Name, err)
+				return fmt.Errorf("failed to create unique constraint from index %q: %w", idx.Name, err)
 			}
-			// Index no longer exists, remove it from the table
-			delete(table.Indexes, ui.Name)
 		}
+
+		// Index no longer exists, remove it from the table
+		delete(table.Indexes, idx.Name)
 	}
 
 	return nil