Skip to content

Commit

Permalink
tikv-ctl: allow get_all_regions from remote, drop-unapplied-raftlog (t…
Browse files Browse the repository at this point in the history
…ikv#10887)

* debug: allow get_all_regions from remote tikv-ctl

Signed-off-by: Andy Lok <[email protected]>

* tikv-ctl: add remove-regionns and drop-unallied-raftlog

Signed-off-by: Andy Lok <[email protected]>

* add --all-regions flag to tikv-ctl raft region

Signed-off-by: Andy Lok <[email protected]>

* add promote learner for unsafe recover

Signed-off-by: Connor <[email protected]>

* fix arg

Signed-off-by: Connor <[email protected]>

* update

Signed-off-by: Andy Lok <[email protected]>

* add --json option for tikv-ctl raft region

Signed-off-by: iosmanthus <[email protected]>

* fix rustfmt

Signed-off-by: iosmanthus <[email protected]>

* better json

Signed-off-by: Andy Lok <[email protected]>

* fix test

Signed-off-by: Andy Lok <[email protected]>

* fix clippy

Signed-off-by: Andy Lok <[email protected]>

* address comment

Signed-off-by: Andy Lok <[email protected]>

* address comment

Signed-off-by: Andy Lok <[email protected]>

* address comment

Signed-off-by: Andy Lok <[email protected]>

* improve format

Signed-off-by: Andy Lok <[email protected]>

* update kvproto

Signed-off-by: Andy Lok <[email protected]>

* fix clippy

Signed-off-by: Andy Lok <[email protected]>

* update kvproto

Signed-off-by: Andy Lok <[email protected]>

* address comment

Signed-off-by: Andy Lok <[email protected]>

* fix panic

Signed-off-by: Andy Lok <[email protected]>

Co-authored-by: Connor <[email protected]>
Co-authored-by: iosmanthus <[email protected]>
Co-authored-by: Ti Chi Robot <[email protected]>
  • Loading branch information
4 people authored Sep 7, 2021
1 parent a284376 commit fd1385f
Show file tree
Hide file tree
Showing 8 changed files with 451 additions and 81 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

216 changes: 171 additions & 45 deletions cmd/tikv-ctl/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ use raft_log_engine::RaftLogEngine;
use raftstore::store::INIT_EPOCH_CONF_VER;
use regex::Regex;
use security::{SecurityConfig, SecurityManager};
use serde_json::json;
use server::setup::initial_logger;
use std::borrow::ToOwned;
use std::cmp::Ordering;
Expand Down Expand Up @@ -180,7 +181,7 @@ trait DebugExecutor {
}

fn dump_all_region_size(&self, cfs: Vec<&str>) {
let regions = self.get_all_meta_regions();
let regions = self.get_all_regions_in_store();
let regions_number = regions.len();
let mut total_size = 0;
for region in regions {
Expand All @@ -190,30 +191,71 @@ trait DebugExecutor {
println!("total region size: {}", convert_gbmb(total_size as u64));
}

fn dump_region_info(&self, region: u64, skip_tombstone: bool) {
let r = self.get_region_info(region);
if skip_tombstone {
let region_state = r.region_local_state.as_ref();
if region_state.map_or(false, |s| s.get_state() == PeerState::Tombstone) {
return;
fn dump_region_info(&self, region_ids: Option<Vec<u64>>, skip_tombstone: bool) {
let region_ids = region_ids.unwrap_or_else(|| self.get_all_regions_in_store());
let mut region_objects = serde_json::map::Map::new();
for region_id in region_ids {
let r = self.get_region_info(region_id);
if skip_tombstone {
let region_state = r.region_local_state.as_ref();
if region_state.map_or(false, |s| s.get_state() == PeerState::Tombstone) {
return;
}
}
let region_object = json!({
"region_id": region_id,
"region_local_state": r.region_local_state.map(|s| {
let r = s.get_region();
let region_epoch = r.get_region_epoch();
let peers = r.get_peers();
json!({
"region": json!({
"id": r.get_id(),
"start_key": hex::encode_upper(r.get_start_key()),
"end_key": hex::encode_upper(r.get_end_key()),
"region_epoch": json!({
"conf_ver": region_epoch.get_conf_ver(),
"version": region_epoch.get_version()
}),
"peers": peers.iter().map(|p| json!({
"id": p.get_id(),
"store_id": p.get_store_id(),
"role": format!("{:?}", p.get_role()),
})).collect::<Vec<_>>(),
}),
})
}),
"raft_local_state": r.raft_local_state.map(|s| {
let hard_state = s.get_hard_state();
json!({
"hard_state": json!({
"term": hard_state.get_term(),
"vote": hard_state.get_vote(),
"commit": hard_state.get_commit(),
}),
"last_index": s.get_last_index(),
})
}),
"raft_apply_state": r.raft_apply_state.map(|s| {
let truncated_state = s.get_truncated_state();
json!({
"applied_index": s.get_applied_index(),
"commit_index": s.get_commit_index(),
"commit_term": s.get_commit_term(),
"truncated_state": json!({
"index": truncated_state.get_index(),
"term": truncated_state.get_term(),
})
})
})
});
region_objects.insert(region_id.to_string(), region_object);
}
let region_state_key = keys::region_state_key(region);
let raft_state_key = keys::raft_state_key(region);
let apply_state_key = keys::apply_state_key(region);
println!("region id: {}", region);
println!("region state key: {}", escape(&region_state_key));
println!("region state: {:?}", r.region_local_state);
println!("raft state key: {}", escape(&raft_state_key));
println!("raft state: {:?}", r.raft_local_state);
println!("apply state key: {}", escape(&apply_state_key));
println!("apply state: {:?}", r.raft_apply_state);
}

fn dump_all_region_info(&self, skip_tombstone: bool) {
for region in self.get_all_meta_regions() {
self.dump_region_info(region, skip_tombstone);
}
println!(
"{}",
serde_json::to_string_pretty(&json!({ "region_infos": region_objects })).unwrap()
);
}

fn dump_raft_log(&self, region: u64, index: u64) {
Expand Down Expand Up @@ -532,7 +574,14 @@ trait DebugExecutor {
}

/// Recover the cluster when given `store_ids` are failed.
fn remove_fail_stores(&self, store_ids: Vec<u64>, region_ids: Option<Vec<u64>>);
fn remove_fail_stores(
&self,
store_ids: Vec<u64>,
region_ids: Option<Vec<u64>>,
promote_learner: bool,
);

fn drop_unapplied_raftlog(&self, region_ids: Option<Vec<u64>>);

/// Recreate the region with metadata from pd, but alloc new id for it.
fn recreate_region(&self, sec_mgr: Arc<SecurityManager>, pd_cfg: &PdConfig, region_id: u64);
Expand Down Expand Up @@ -572,7 +621,7 @@ trait DebugExecutor {
self.recover_all(threads, read_only);
}

fn get_all_meta_regions(&self) -> Vec<u64>;
fn get_all_regions_in_store(&self) -> Vec<u64>;

fn get_value_by_key(&self, cf: &str, key: Vec<u8>) -> Vec<u8>;

Expand Down Expand Up @@ -623,8 +672,10 @@ impl DebugExecutor for DebugClient {
process::exit(-1);
}

fn get_all_meta_regions(&self) -> Vec<u64> {
unimplemented!();
fn get_all_regions_in_store(&self) -> Vec<u64> {
DebugClient::get_all_regions_in_store(self, &GetAllRegionsInStoreRequest::default())
.unwrap_or_else(|e| perror_and_exit("DebugClient::get_all_regions_in_store", e))
.take_regions()
}

fn get_value_by_key(&self, cf: &str, key: Vec<u8>) -> Vec<u8> {
Expand Down Expand Up @@ -760,12 +811,16 @@ impl DebugExecutor for DebugClient {
unimplemented!("only available for local mode");
}

fn remove_fail_stores(&self, _: Vec<u64>, _: Option<Vec<u64>>) {
self.check_local_mode();
fn remove_fail_stores(&self, _: Vec<u64>, _: Option<Vec<u64>>, _: bool) {
unimplemented!("only available for local mode");
}

fn drop_unapplied_raftlog(&self, _: Option<Vec<u64>>) {
unimplemented!("only available for local mode");
}

fn recreate_region(&self, _: Arc<SecurityManager>, _: &PdConfig, _: u64) {
self.check_local_mode();
unimplemented!("only available for local mode");
}

fn check_region_consistency(&self, region_id: u64) {
Expand Down Expand Up @@ -820,9 +875,9 @@ impl DebugExecutor for DebugClient {
impl<ER: RaftEngine> DebugExecutor for Debugger<ER> {
fn check_local_mode(&self) {}

fn get_all_meta_regions(&self) -> Vec<u64> {
self.get_all_meta_regions()
.unwrap_or_else(|e| perror_and_exit("Debugger::get_all_meta_regions", e))
fn get_all_regions_in_store(&self) -> Vec<u64> {
self.get_all_regions_in_store()
.unwrap_or_else(|e| perror_and_exit("Debugger::get_all_regions_in_store", e))
}

fn get_value_by_key(&self, cf: &str, key: Vec<u8>) -> Vec<u8> {
Expand Down Expand Up @@ -938,9 +993,21 @@ impl<ER: RaftEngine> DebugExecutor for Debugger<ER> {
println!("all regions are healthy")
}

fn remove_fail_stores(&self, store_ids: Vec<u64>, region_ids: Option<Vec<u64>>) {
fn remove_fail_stores(
&self,
store_ids: Vec<u64>,
region_ids: Option<Vec<u64>>,
promote_learner: bool,
) {
println!("removing stores {:?} from configurations...", store_ids);
self.remove_failed_stores(store_ids, region_ids)
self.remove_failed_stores(store_ids, region_ids, promote_learner)
.unwrap_or_else(|e| perror_and_exit("Debugger::remove_fail_stores", e));
println!("success");
}

fn drop_unapplied_raftlog(&self, region_ids: Option<Vec<u64>>) {
println!("removing unapplied raftlog on region {:?} ...", region_ids);
self.drop_unapplied_raftlog(region_ids)
.unwrap_or_else(|e| perror_and_exit("Debugger::remove_fail_stores", e));
println!("success");
}
Expand Down Expand Up @@ -1202,10 +1269,25 @@ fn main() {
SubCommand::with_name("region")
.about("print region info")
.arg(
Arg::with_name("region")
.short("r")
Arg::with_name("regions")
.aliases(&["region"])
.required_unless("all-regions")
.conflicts_with("all-regions")
.takes_value(true)
.help("Set the region id, if not specified, print all regions"),
.short("r")
.multiple(true)
.use_delimiter(true)
.require_delimiter(true)
.value_delimiter(",")
.help("Print info for these regions"),
)
.arg(
Arg::with_name("all-regions")
.required_unless("regions")
.conflicts_with("regions")
.long("all-regions")
.takes_value(false)
.help("Print info for all regions"),
)
.arg(
Arg::with_name("skip-tombstone")
Expand Down Expand Up @@ -1564,7 +1646,7 @@ fn main() {
.about("Unsafely recover when the store can not start normally, this recover may lose data")
.subcommand(
SubCommand::with_name("remove-fail-stores")
.about("Unsafely recover the cluster when the majority replicas are failed")
.about("Remove the failed machines from the peer list for the regions")
.arg(
Arg::with_name("stores")
.required(true)
Expand All @@ -1588,6 +1670,37 @@ fn main() {
.value_delimiter(",")
.help("Only for these regions"),
)
.arg(
Arg::with_name("promote-learner")
.long("promote-learner")
.takes_value(false)
.required(false)
.help("Promote learner to voter"),
)
.arg(
Arg::with_name("all-regions")
.required_unless("regions")
.conflicts_with("regions")
.long("all-regions")
.takes_value(false)
.help("Do the command for all regions"),
)
)
.subcommand(
SubCommand::with_name("drop-unapplied-raftlog")
.about("Remove unapplied raftlogs on the regions")
.arg(
Arg::with_name("regions")
.required_unless("all-regions")
.conflicts_with("all-regions")
.takes_value(true)
.short("r")
.multiple(true)
.use_delimiter(true)
.require_delimiter(true)
.value_delimiter(",")
.help("Only for these regions"),
)
.arg(
Arg::with_name("all-regions")
.required_unless("regions")
Expand Down Expand Up @@ -1963,7 +2076,7 @@ fn main() {
println!("{}", escape(&from_hex(hex).unwrap()));
return;
} else if let Some(escaped) = matches.value_of("escaped-to-hex") {
println!("{}", log_wrappers::hex_encode_upper(unescape(escaped)));
println!("{}", hex::encode_upper(unescape(escaped)));
return;
} else if let Some(encoded) = matches.value_of("decode") {
match Key::from_encoded(unescape(encoded)).into_raw() {
Expand Down Expand Up @@ -2104,11 +2217,13 @@ fn main() {
debug_executor.dump_raft_log(id, index);
} else if let Some(matches) = matches.subcommand_matches("region") {
let skip_tombstone = matches.is_present("skip-tombstone");
if let Some(id) = matches.value_of("region") {
debug_executor.dump_region_info(id.parse().unwrap(), skip_tombstone);
} else {
debug_executor.dump_all_region_info(skip_tombstone);
}
let regions = matches.values_of("regions").map(|values| {
values
.map(str::parse)
.collect::<Result<Vec<_>, _>>()
.expect("parse regions fail")
});
debug_executor.dump_region_info(regions, skip_tombstone);
} else {
let _ = app.print_help();
}
Expand Down Expand Up @@ -2246,7 +2361,18 @@ fn main() {
.collect::<Result<Vec<_>, _>>()
.expect("parse regions fail")
});
debug_executor.remove_fail_stores(store_ids, region_ids);
debug_executor.remove_fail_stores(
store_ids,
region_ids,
matches.is_present("promote-learner"),
);
} else if let Some(matches) = matches.subcommand_matches("drop-unapplied-raftlog") {
let region_ids = matches.values_of("regions").map(|ids| {
ids.map(str::parse)
.collect::<Result<Vec<_>, _>>()
.expect("parse regions fail")
});
debug_executor.drop_unapplied_raftlog(region_ids);
} else {
println!("{}", matches.usage());
}
Expand Down
7 changes: 7 additions & 0 deletions components/engine_traits/src/misc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,17 @@ use crate::range::Range;

#[derive(Clone, Debug)]
pub enum DeleteStrategy {
/// Delete the SST files that are fullly fit in range. However, the SST files that are partially
/// overlapped with the range will not be touched.
DeleteFiles,
/// Delete the data stored in Titan.
DeleteBlobs,
/// Scan for keys and then delete. Useful when we know the keys in range are not too many.
DeleteByKey,
/// Delete by range. Note that this is experimental and you should check whether it is enbaled
/// in config before using it.
DeleteByRange,
/// Delete by ingesting a SST file with deletions. Useful when the number of ranges is too many.
DeleteByWriter { sst_path: String },
}

Expand Down
1 change: 1 addition & 0 deletions components/raftstore/src/store/worker/region.rs
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,7 @@ where

fn delete_all_in_range(&self, ranges: &[Range]) -> Result<()> {
for cf in self.engine.cf_names() {
// CF_LOCK usually contains fewer keys than other CFs, so we delete them by key.
let strategy = if cf == CF_LOCK {
DeleteStrategy::DeleteByKey
} else if self.use_delete_range {
Expand Down
5 changes: 2 additions & 3 deletions components/server/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ sse = ["tikv/sse"]
mem-profiling = ["tikv/mem-profiling"]
failpoints = ["tikv/failpoints"]
bcc-iosnoop = ["tikv/bcc-iosnoop"]
cloud-aws = [ "encryption_export/cloud-aws" ]
cloud-gcp = [ "encryption_export/cloud-gcp" ]
cloud-aws = ["encryption_export/cloud-aws"]
cloud-gcp = ["encryption_export/cloud-gcp"]
protobuf-codec = [
"protobuf/bytes",
"backup/protobuf-codec",
Expand Down Expand Up @@ -66,7 +66,6 @@ test-engines-rocksdb = [
test-engines-panic = [
"tikv/test-engines-panic",
]

nortcheck = ["engine_rocks/nortcheck"]

[dependencies]
Expand Down
4 changes: 2 additions & 2 deletions scripts/check-redact-log
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ function error_msg() {
}

if [[ "$(uname)" == "Darwin" ]] ; then
if grep -r -n --color=always --include '*.rs' --exclude hex.rs --exclude-dir target 'encode_upper' . | grep -v log_wrappers ; then
if grep -r -n --color=always --include '*.rs' --exclude hex.rs --exclude-dir tikv-ctl --exclude-dir target 'encode_upper' . | grep -v log_wrappers ; then
error_msg
exit 1
fi
else
if grep -r -n -P '(?<!hex_)encode_upper' -C 1 --color=always --include \*.rs --exclude hex.rs --exclude-dir target . ; then
if grep -r -n -P '(?<!hex_)encode_upper' -C 1 --color=always --include \*.rs --exclude hex.rs --exclude-dir tikv-ctl --exclude-dir target . ; then
error_msg
exit 1
fi
Expand Down
Loading

0 comments on commit fd1385f

Please sign in to comment.