Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

db-dump: set sequence values when importing a database dump #10204

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions crates/crates_io_database_dump/src/configuration.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use serde::Deserialize;
use serde::{Deserialize, Serialize};
use std::collections::{BTreeMap, VecDeque};

/// An enum indicating whether a column is included in the database dumps.
Expand All @@ -15,7 +15,9 @@ pub enum ColumnVisibility {
/// and should list all tables the current tables refers to with foreign key
/// constraints on public columns. The `filter` field is a valid SQL expression
/// used in a `WHERE` clause to filter the rows of the table. The `columns`
/// field maps column names to their respective visibilities.
/// field maps column names to their respective visibilities. The `sequence`
/// field, if present, defines the sequence used by the table when generating
/// IDs, along with the ID column.
#[derive(Clone, Debug, Default, Deserialize)]
pub struct TableConfig {
#[serde(default)]
Expand All @@ -24,6 +26,13 @@ pub struct TableConfig {
pub columns: BTreeMap<String, ColumnVisibility>,
#[serde(default)]
pub column_defaults: BTreeMap<String, String>,
pub sequence: Option<SequenceConfig>,
}

#[derive(Clone, Debug, Default, Deserialize, Serialize)]
pub struct SequenceConfig {
pub column: String,
pub name: String,
}

/// Maps table names to the respective configurations. Used to load `dump_db.toml`.
Expand Down
21 changes: 21 additions & 0 deletions crates/crates_io_database_dump/src/dump-db.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ description = "public"
crates_cnt = "public"
created_at = "public"
path = "public"
[categories.sequence]
column = "id"
name = "categories_id_seq"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead of manually declaring them here, would it be possible to derive them from the database schema in some way?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is doable as follows, which is modified from https://stackoverflow.com/a/55414721:

select tbl.relname as table_name, 
       col.attname as column_name,
       s.relname as sequence_name
from pg_class s
  join pg_namespace sn on sn.oid = s.relnamespace 
  join pg_depend d on d.refobjid = s.oid and d.refclassid='pg_class'::regclass 
  join pg_attrdef ad on ad.oid = d.objid and d.classid = 'pg_attrdef'::regclass
  join pg_attribute col on col.attrelid = ad.adrelid and col.attnum = ad.adnum
  join pg_class tbl on tbl.oid = ad.adrelid 
  join pg_namespace ts on ts.oid = tbl.relnamespace 
where s.relkind = 'S'
--  and s.relname = 'your_sequence_name_her'
  and d.deptype in ('a', 'n');

From the result, you should see something similar to the following:

      table_name       | column_name |        sequence_name         
-----------------------+-------------+------------------------------
 api_tokens            | id          | api_tokens_id_seq
 background_jobs       | id          | background_jobs_id_seq
 categories            | id          | categories_id_seq
 crates                | id          | packages_id_seq
 deleted_crates        | id          | deleted_crates_id_seq
 dependencies          | id          | dependencies_id_seq
 emails                | id          | emails_id_seq
 keywords              | id          | keywords_id_seq
 teams                 | id          | teams_id_seq
 users                 | id          | users_id_seq
 version_owner_actions | id          | version_owner_actions_id_seq
 versions              | id          | versions_id_seq
(12 rows)


[crate_downloads.columns]
crate_id = "public"
Expand Down Expand Up @@ -87,6 +90,9 @@ textsearchable_index_col = "private" # This Postgres specific and can be derived
repository = "public"
max_upload_size = "public"
max_features = "public"
[crates.sequence]
column = "id"
name = "packages_id_seq"

[crates_categories]
dependencies = ["categories", "crates"]
Expand Down Expand Up @@ -130,6 +136,9 @@ features = "public"
target = "public"
kind = "public"
explicit_name = "public"
[dependencies.sequence]
column = "id"
name = "dependencies_id_seq"

[__diesel_schema_migrations.columns]
version = "private"
Expand All @@ -152,6 +161,9 @@ id = "public"
keyword = "public"
crates_cnt = "public"
created_at = "public"
[keywords.sequence]
column = "id"
name = "keywords_id_seq"

[metadata.columns]
total_downloads = "public"
Expand Down Expand Up @@ -186,6 +198,9 @@ github_id = "public"
name = "public"
avatar = "public"
org_id = "public"
[teams.sequence]
column = "id"
name = "teams_id_seq"

[users]
filter = """
Expand All @@ -207,6 +222,9 @@ is_admin = "private"
publish_notifications = "private"
[users.column_defaults]
gh_access_token = "''"
[users.sequence]
column = "id"
name = "users_id_seq"

[version_downloads]
dependencies = ["versions"]
Expand Down Expand Up @@ -253,6 +271,9 @@ documentation = "public"
repository = "public"
categories = "public"
keywords = "public"
[versions.sequence]
column = "id"
name = "versions_id_seq"

[versions_published_by.columns]
version_id = "private"
Expand Down
13 changes: 13 additions & 0 deletions crates/crates_io_database_dump/src/dump-import.sql.j2
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,19 @@ BEGIN;
{% for cd in table.column_defaults %}
ALTER TABLE "{{table.name}}" ALTER COLUMN "{{cd.column}}" DROP DEFAULT;
{%- endfor %}
{%- endfor %}

-- Set sequence values.
{% for table in tables -%}
{% if table.sequence %}
SELECT setval(
'{{table.sequence.name}}',
COALESCE(
(SELECT MAX("{{table.sequence.column}}") FROM "{{table.name}}")::BIGINT,
1
)
);
{% endif %}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this needs an update of the corresponding test snapshot :)

{%- endfor %}

-- Reenable triggers on each table.
Expand Down
4 changes: 3 additions & 1 deletion crates/crates_io_database_dump/src/gen_scripts.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::configuration::{ColumnVisibility, TableConfig, VisibilityConfig};
use crate::configuration::{ColumnVisibility, SequenceConfig, TableConfig, VisibilityConfig};
use anyhow::Context;
use serde::Serialize;
use std::{fs::File, path::Path};
Expand All @@ -18,6 +18,7 @@ struct HandlebarsTableContext<'a> {
filter: Option<String>,
columns: String,
column_defaults: Vec<ColumnDefault<'a>>,
sequence: Option<&'a SequenceConfig>,
}

#[derive(Debug, Serialize)]
Expand Down Expand Up @@ -52,6 +53,7 @@ impl TableConfig {
filter,
columns,
column_defaults,
sequence: self.sequence.as_ref(),
})
}
}
Expand Down
Loading