Skip to content

Commit 5420515

Browse files
committed
Auto merge of #2073 - jtgeibel:prod/read-only-replica, r=sgrif
Use the read-only replica for some queries in production Now that we have a read-only replica of the database, we can offload some read-only queries to it. This commit series updates the `Config` and `App` structs to support a primary database and and optional read-only replica. If the `READ_ONLY_REPLICA_URL` environment variable is set, a database pool of read-only connections will be initialized. If the variable is not set, converted endpoints will fall back to the primary database pool (and are given read/write connections). Tests do not configure a read-only replica and each test still operates within a single transaction. Potential over-use of the read-only pool (in an endpoint that needs write access) can be detected in staging, by configuring `READ_ONLY_REPLICA_URL` to the same value as `DATABASE_URL`. Current set of Endpoints: * `/crates/:crate_id/downloads` and `/crates/:crate_id/:version/downloads` * Search endpoint * Summary endpoint for front page * `/crates/:crate_id` * `/crates/:crate_id/reverse_dependencies` * `/crates/:crate_id/versions` r? @sgrif
2 parents 9cc23fa + c503489 commit 5420515

File tree

13 files changed

+109
-47
lines changed

13 files changed

+109
-47
lines changed

src/app.rs

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,11 @@ use scheduled_thread_pool::ScheduledThreadPool;
1313
// The db, oauth, and git2 types don't implement debug.
1414
#[allow(missing_debug_implementations)]
1515
pub struct App {
16-
/// The database connection pool
17-
pub diesel_database: db::DieselPool,
16+
/// The primary database connection pool
17+
pub primary_database: db::DieselPool,
18+
19+
/// The read-only replica database connection pool
20+
pub read_only_replica_database: Option<db::DieselPool>,
1821

1922
/// The GitHub OAuth2 configuration
2023
pub github: BasicClient,
@@ -78,29 +81,53 @@ impl App {
7881
_ => 1,
7982
};
8083

84+
// Used as the connection and statement timeout value for the database pool(s)
8185
let db_connection_timeout = match (dotenv::var("DB_TIMEOUT"), config.env) {
8286
(Ok(num), _) => num.parse().expect("couldn't parse DB_TIMEOUT"),
8387
(_, Env::Production) => 10,
8488
(_, Env::Test) => 1,
8589
_ => 30,
8690
};
91+
92+
// Determine if the primary pool is also read-only
8793
let read_only_mode = dotenv::var("READ_ONLY_MODE").is_ok();
88-
let connection_config = db::ConnectionConfig {
94+
let primary_db_connection_config = db::ConnectionConfig {
8995
statement_timeout: db_connection_timeout,
9096
read_only: read_only_mode,
9197
};
9298

9399
let thread_pool = Arc::new(ScheduledThreadPool::new(db_helper_threads));
94100

95-
let diesel_db_config = r2d2::Pool::builder()
101+
let primary_db_config = r2d2::Pool::builder()
96102
.max_size(db_pool_size)
97103
.min_idle(db_min_idle)
98104
.connection_timeout(Duration::from_secs(db_connection_timeout))
99-
.connection_customizer(Box::new(connection_config))
100-
.thread_pool(thread_pool);
105+
.connection_customizer(Box::new(primary_db_connection_config))
106+
.thread_pool(thread_pool.clone());
107+
108+
let primary_database = db::diesel_pool(&config.db_url, config.env, primary_db_config);
109+
110+
let read_only_replica_database = if let Some(url) = &config.replica_db_url {
111+
let replica_db_connection_config = db::ConnectionConfig {
112+
statement_timeout: db_connection_timeout,
113+
read_only: true,
114+
};
115+
116+
let replica_db_config = r2d2::Pool::builder()
117+
.max_size(db_pool_size)
118+
.min_idle(db_min_idle)
119+
.connection_timeout(Duration::from_secs(db_connection_timeout))
120+
.connection_customizer(Box::new(replica_db_connection_config))
121+
.thread_pool(thread_pool);
122+
123+
Some(db::diesel_pool(&url, config.env, replica_db_config))
124+
} else {
125+
None
126+
};
101127

102128
App {
103-
diesel_database: db::diesel_pool(&config.db_url, config.env, diesel_db_config),
129+
primary_database,
130+
read_only_replica_database,
104131
github,
105132
session_key: config.session_key.clone(),
106133
git_repo_checkout: config.git_repo_checkout.clone(),

src/config.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ pub struct Config {
1010
pub gh_client_id: String,
1111
pub gh_client_secret: String,
1212
pub db_url: String,
13+
pub replica_db_url: Option<String>,
1314
pub env: Env,
1415
pub max_upload_size: u64,
1516
pub max_unpack_size: u64,
@@ -33,14 +34,15 @@ impl Default for Config {
3334
/// - `MIRROR`: Is this instance of cargo_registry a mirror of crates.io.
3435
/// - `HEROKU`: Is this instance of cargo_registry currently running on Heroku.
3536
/// - `S3_BUCKET`: The S3 bucket used to store crate files. If not present during development,
36-
/// cargo_registry will fall back to a local uploader.
37+
/// cargo_registry will fall back to a local uploader.
3738
/// - `S3_REGION`: The region in which the bucket was created. Optional if US standard.
3839
/// - `S3_ACCESS_KEY`: The access key to interact with S3. Optional if running a mirror.
3940
/// - `S3_SECRET_KEY`: The secret key to interact with S3. Optional if running a mirror.
4041
/// - `SESSION_KEY`: The key used to sign and encrypt session cookies.
4142
/// - `GH_CLIENT_ID`: The client ID of the associated GitHub application.
4243
/// - `GH_CLIENT_SECRET`: The client secret of the associated GitHub application.
4344
/// - `DATABASE_URL`: The URL of the postgres database to use.
45+
/// - `READ_ONLY_REPLICA_URL`: The URL of an optional postgres read-only replica database.
4446
/// - `BLOCKED_TRAFFIC`: A list of headers and environment variables to use for blocking
4547
///. traffic. See the `block_traffic` module for more documentation.
4648
fn default() -> Config {
@@ -129,6 +131,7 @@ impl Default for Config {
129131
gh_client_id: env("GH_CLIENT_ID"),
130132
gh_client_secret: env("GH_CLIENT_SECRET"),
131133
db_url: env("DATABASE_URL"),
134+
replica_db_url: dotenv::var("READ_ONLY_REPLICA_URL").ok(),
132135
env: cargo_env,
133136
max_upload_size: 10 * 1024 * 1024, // 10 MB default file upload size limit
134137
max_unpack_size: 512 * 1024 * 1024, // 512 MB max when decompressed

src/controllers/krate/downloads.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ pub fn downloads(req: &mut dyn Request) -> AppResult<Response> {
1919
use diesel::sql_types::BigInt;
2020

2121
let crate_name = &req.params()["crate_id"];
22-
let conn = req.db_conn()?;
22+
let conn = req.db_read_only()?;
2323
let krate = Crate::by_name(crate_name).first::<Crate>(&*conn)?;
2424

2525
let mut versions = krate.all_versions().load::<Version>(&*conn)?;

src/controllers/krate/metadata.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use crate::models::krate::ALL_COLUMNS;
2121
pub fn summary(req: &mut dyn Request) -> AppResult<Response> {
2222
use crate::schema::crates::dsl::*;
2323

24-
let conn = req.db_conn()?;
24+
let conn = req.db_read_only()?;
2525
let num_crates = crates.count().get_result(&*conn)?;
2626
let num_downloads = metadata::table
2727
.select(metadata::total_downloads)
@@ -103,7 +103,7 @@ pub fn summary(req: &mut dyn Request) -> AppResult<Response> {
103103
/// Handles the `GET /crates/:crate_id` route.
104104
pub fn show(req: &mut dyn Request) -> AppResult<Response> {
105105
let name = &req.params()["crate_id"];
106-
let conn = req.db_conn()?;
106+
let conn = req.db_read_only()?;
107107
let krate = Crate::by_name(name).first::<Crate>(&*conn)?;
108108

109109
let mut versions_and_publishers = krate
@@ -199,7 +199,7 @@ pub fn readme(req: &mut dyn Request) -> AppResult<Response> {
199199
// this information already, but ember is definitely requesting it
200200
pub fn versions(req: &mut dyn Request) -> AppResult<Response> {
201201
let crate_name = &req.params()["crate_id"];
202-
let conn = req.db_conn()?;
202+
let conn = req.db_read_only()?;
203203
let krate = Crate::by_name(crate_name).first::<Crate>(&*conn)?;
204204
let mut versions_and_publishers: Vec<(Version, Option<User>)> = krate
205205
.all_versions()
@@ -230,7 +230,7 @@ pub fn reverse_dependencies(req: &mut dyn Request) -> AppResult<Response> {
230230
use diesel::dsl::any;
231231

232232
let name = &req.params()["crate_id"];
233-
let conn = req.db_conn()?;
233+
let conn = req.db_read_only()?;
234234
let krate = Crate::by_name(name).first::<Crate>(&*conn)?;
235235
let (rev_deps, total) = krate.reverse_dependencies(&*conn, &req.query())?;
236236
let rev_deps: Vec<_> = rev_deps

src/controllers/krate/publish.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ pub fn publish(req: &mut dyn Request) -> AppResult<Response> {
4141

4242
let new_crate = parse_new_headers(req)?;
4343

44-
let conn = app.diesel_database.get()?;
44+
let conn = app.primary_database.get()?;
4545
let ids = req.authenticate(&conn)?;
4646
let user = ids.find_user(&conn)?;
4747

src/controllers/krate/search.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ use crate::models::krate::{canon_crate_name, ALL_COLUMNS};
3636
pub fn search(req: &mut dyn Request) -> AppResult<Response> {
3737
use diesel::sql_types::{Bool, Text};
3838

39-
let conn = req.db_conn()?;
39+
let conn = req.db_read_only()?;
4040
let params = req.query();
4141
let sort = params.get("sort").map(|s| &**s);
4242
let include_yanked = params

src/controllers/version.rs

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,28 +6,27 @@ pub mod yank;
66
use super::prelude::*;
77

88
use crate::db::DieselPooledConn;
9-
use crate::models::{Crate, CrateVersions, Version};
10-
use crate::schema::versions;
9+
use crate::models::{Crate, Version};
1110

1211
fn version_and_crate(req: &dyn Request) -> AppResult<(DieselPooledConn<'_>, Version, Crate)> {
13-
let crate_name = &req.params()["crate_id"];
14-
let semver = &req.params()["version"];
15-
if semver::Version::parse(semver).is_err() {
16-
return Err(cargo_err(&format_args!("invalid semver: {}", semver)));
17-
};
12+
let crate_name = extract_crate_name(req);
13+
let semver = extract_semver(req)?;
1814

1915
let conn = req.db_conn()?;
2016
let krate = Crate::by_name(crate_name).first::<Crate>(&*conn)?;
21-
let version = krate
22-
.all_versions()
23-
.filter(versions::num.eq(semver))
24-
.first(&*conn)
25-
.map_err(|_| {
26-
cargo_err(&format_args!(
27-
"crate `{}` does not have a version `{}`",
28-
crate_name, semver
29-
))
30-
})?;
17+
let version = krate.find_version(&conn, semver)?;
3118

3219
Ok((conn, version, krate))
3320
}
21+
22+
fn extract_crate_name(req: &dyn Request) -> &str {
23+
&req.params()["crate_id"]
24+
}
25+
26+
fn extract_semver(req: &dyn Request) -> AppResult<&str> {
27+
let semver = &req.params()["version"];
28+
if semver::Version::parse(semver).is_err() {
29+
return Err(cargo_err(&format_args!("invalid semver: {}", semver)));
30+
};
31+
Ok(semver)
32+
}

src/controllers/version/downloads.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use crate::models::{Crate, VersionDownload};
1010
use crate::schema::*;
1111
use crate::views::EncodableVersionDownload;
1212

13-
use super::version_and_crate;
13+
use super::{extract_crate_name, extract_semver};
1414

1515
/// Handles the `GET /crates/:crate_id/:version/download` route.
1616
/// This returns a URL to the location where the crate is stored.
@@ -69,7 +69,14 @@ fn increment_download_counts(
6969

7070
/// Handles the `GET /crates/:crate_id/:version/downloads` route.
7171
pub fn downloads(req: &mut dyn Request) -> AppResult<Response> {
72-
let (conn, version, _) = version_and_crate(req)?;
72+
let crate_name = extract_crate_name(req);
73+
let semver = extract_semver(req)?;
74+
75+
let conn = req.db_read_only()?;
76+
let version = Crate::by_name(crate_name)
77+
.first::<Crate>(&*conn)?
78+
.find_version(&conn, semver)?;
79+
7380
let cutoff_end_date = req
7481
.query()
7582
.get("before_date")

src/db.rs

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -84,16 +84,25 @@ pub fn diesel_pool(
8484
}
8585

8686
pub trait RequestTransaction {
87-
/// Return the lazily initialized postgres connection for this request.
88-
///
89-
/// The connection will live for the lifetime of the request.
90-
// FIXME: This description does not match the implementation below.
87+
/// Obtain a read/write database connection from the primary pool
9188
fn db_conn(&self) -> Result<DieselPooledConn<'_>, r2d2::PoolError>;
89+
90+
/// Obtain a readonly database connection from the replica pool
91+
///
92+
/// If there is no replica pool, the primary pool is used instead.
93+
fn db_read_only(&self) -> Result<DieselPooledConn<'_>, r2d2::PoolError>;
9294
}
9395

9496
impl<T: Request + ?Sized> RequestTransaction for T {
9597
fn db_conn(&self) -> Result<DieselPooledConn<'_>, r2d2::PoolError> {
96-
self.app().diesel_database.get()
98+
self.app().primary_database.get().map_err(Into::into)
99+
}
100+
101+
fn db_read_only(&self) -> Result<DieselPooledConn<'_>, r2d2::PoolError> {
102+
match &self.app().read_only_replica_database {
103+
Some(pool) => pool.get().map_err(Into::into),
104+
None => self.app().primary_database.get().map_err(Into::into),
105+
}
97106
}
98107
}
99108

src/middleware/log_connection_pool_status.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,12 @@ impl Middleware for LogConnectionPoolStatus {
3737
if last_log_time.elapsed() >= Duration::from_secs(60) {
3838
*last_log_time = Instant::now();
3939
println!(
40-
"connection_pool_status=\"{:?}\" in_flight_requests={}",
41-
self.app.diesel_database.state(),
40+
"primary_pool_status=\"{:?}\" read_only_pool_status=\"{:?}\" in_flight_requests={}",
41+
self.app.primary_database.state(),
42+
self.app
43+
.read_only_replica_database
44+
.as_ref()
45+
.map(|pool| pool.state()),
4246
in_flight_requests
4347
);
4448
}

src/models/krate.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,18 @@ impl Crate {
243243
crates::table.select(ALL_COLUMNS)
244244
}
245245

246+
pub fn find_version(&self, conn: &PgConnection, version: &str) -> AppResult<Version> {
247+
self.all_versions()
248+
.filter(versions::num.eq(version))
249+
.first(conn)
250+
.map_err(|_| {
251+
cargo_err(&format_args!(
252+
"crate `{}` does not have a version `{}`",
253+
self.name, version
254+
))
255+
})
256+
}
257+
246258
pub fn valid_name(name: &str) -> bool {
247259
let under_max_length = name.chars().take(MAX_NAME_LENGTH + 1).count() <= MAX_NAME_LENGTH;
248260
Crate::valid_ident(name) && under_max_length

src/tests/all.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ fn simple_config() -> Config {
133133
gh_client_id: dotenv::var("GH_CLIENT_ID").unwrap_or_default(),
134134
gh_client_secret: dotenv::var("GH_CLIENT_SECRET").unwrap_or_default(),
135135
db_url: env("TEST_DATABASE_URL"),
136+
replica_db_url: None,
136137
env: Env::Test,
137138
max_upload_size: 3000,
138139
max_unpack_size: 2000,
@@ -159,7 +160,7 @@ fn build_app(
159160
};
160161

161162
let app = App::new(&config, client);
162-
t!(t!(app.diesel_database.get()).begin_test_transaction());
163+
t!(t!(app.primary_database.get()).begin_test_transaction());
163164
let app = Arc::new(app);
164165
let handler = cargo_registry::build_handler(Arc::clone(&app));
165166
(app, handler)
@@ -278,8 +279,8 @@ fn multiple_live_references_to_the_same_connection_can_be_checked_out() {
278279
use std::ptr;
279280

280281
let (app, _) = app();
281-
let conn1 = app.diesel_database.get().unwrap();
282-
let conn2 = app.diesel_database.get().unwrap();
282+
let conn1 = app.primary_database.get().unwrap();
283+
let conn2 = app.primary_database.get().unwrap();
283284
let conn1_ref: &PgConnection = &conn1;
284285
let conn2_ref: &PgConnection = &conn2;
285286

src/tests/util.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ impl Drop for TestAppInner {
7979

8080
// Manually verify that all jobs have completed successfully
8181
// This will catch any tests that enqueued a job but forgot to initialize the runner
82-
let conn = self.app.diesel_database.get().unwrap();
82+
let conn = self.app.primary_database.get().unwrap();
8383
let job_count: i64 = background_jobs.count().get_result(&*conn).unwrap();
8484
assert_eq!(
8585
0, job_count,
@@ -122,7 +122,7 @@ impl TestApp {
122122
/// connection before making any API calls. Once the closure returns, the connection is
123123
/// dropped, ensuring it is returned to the pool and available for any future API calls.
124124
pub fn db<T, F: FnOnce(&PgConnection) -> T>(&self, f: F) -> T {
125-
let conn = self.0.app.diesel_database.get().unwrap();
125+
let conn = self.0.app.primary_database.get().unwrap();
126126
f(&conn)
127127
}
128128

@@ -220,7 +220,7 @@ impl TestAppBuilder {
220220
let (app, middle) = crate::build_app(self.config, self.proxy);
221221

222222
let runner = if self.build_job_runner {
223-
let connection_pool = app.diesel_database.clone();
223+
let connection_pool = app.primary_database.clone();
224224
let repository_config = RepositoryConfig {
225225
index_location: Url::from_file_path(&git::bare()).unwrap(),
226226
credentials: Credentials::Missing,

0 commit comments

Comments
 (0)