From f8e8a93145fa2e4b513b86723114f19bd32d1988 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Fri, 27 Sep 2024 14:57:33 +0800 Subject: [PATCH 1/2] br: pre-check TiKV disk space before download (#17238) (#17569) close tikv/tikv#17224 Add a disk usage check when execute `download` and `apply` RPC from br. When the disk is not `Normal`, the request would be rejected. Signed-off-by: ti-chi-bot Signed-off-by: hillium Co-authored-by: ris <79858083+RidRisR@users.noreply.github.com> Co-authored-by: hillium --- components/error_code/src/sst_importer.rs | 8 +- components/sst_importer/src/errors.rs | 10 ++ src/import/sst_service.rs | 16 ++- tests/failpoints/cases/test_import_service.rs | 42 +++++++- tests/integrations/import/test_apply_log.rs | 101 ++++++++++++++++++ 5 files changed, 172 insertions(+), 5 deletions(-) create mode 100644 tests/integrations/import/test_apply_log.rs diff --git a/components/error_code/src/sst_importer.rs b/components/error_code/src/sst_importer.rs index 001f4f146f6..b092796d467 100644 --- a/components/error_code/src/sst_importer.rs +++ b/components/error_code/src/sst_importer.rs @@ -22,5 +22,11 @@ define_error_codes!( TTL_LEN_NOT_EQUALS_TO_PAIRS => ("TtlLenNotEqualsToPairs", "", ""), INCOMPATIBLE_API_VERSION => ("IncompatibleApiVersion", "", ""), INVALID_KEY_MODE => ("InvalidKeyMode", "", ""), - RESOURCE_NOT_ENOUTH => ("ResourceNotEnough", "", "") + RESOURCE_NOT_ENOUTH => ("ResourceNotEnough", "", ""), + SUSPENDED => ("Suspended", + "this request has been suspended.", + "Probably there are some export tools don't support exporting data inserted by `ingest`(say, snapshot backup). Check the user manual and stop them."), + REQUEST_TOO_NEW => ("RequestTooNew", "", ""), + REQUEST_TOO_OLD => ("RequestTooOld", "", ""), + DISK_SPACE_NOT_ENOUGH => ("DiskSpaceNotEnough", "", "") ); diff --git a/components/sst_importer/src/errors.rs b/components/sst_importer/src/errors.rs index 7ff940fff12..28eb26ef218 100644 --- a/components/sst_importer/src/errors.rs +++ b/components/sst_importer/src/errors.rs @@ -125,6 +125,12 @@ pub enum Error { #[error("resource is not enough {0}")] ResourceNotEnough(String), + + #[error("imports are suspended for {time_to_lease_expire:?}")] + Suspended { time_to_lease_expire: Duration }, + + #[error("TiKV disk space is not enough.")] + DiskSpaceNotEnough, } impl Error { @@ -197,6 +203,10 @@ impl ErrorCodeExt for Error { Error::IncompatibleApiVersion => error_code::sst_importer::INCOMPATIBLE_API_VERSION, Error::InvalidKeyMode { .. } => error_code::sst_importer::INVALID_KEY_MODE, Error::ResourceNotEnough(_) => error_code::sst_importer::RESOURCE_NOT_ENOUTH, + Error::Suspended { .. } => error_code::sst_importer::SUSPENDED, + Error::RequestTooNew(_) => error_code::sst_importer::REQUEST_TOO_NEW, + Error::RequestTooOld(_) => error_code::sst_importer::REQUEST_TOO_OLD, + Error::DiskSpaceNotEnough => error_code::sst_importer::DISK_SPACE_NOT_ENOUGH, } } } diff --git a/src/import/sst_service.rs b/src/import/sst_service.rs index 0c81873c130..ce98bd42259 100644 --- a/src/import/sst_service.rs +++ b/src/import/sst_service.rs @@ -35,8 +35,11 @@ use tikv_kv::{ }; use tikv_util::{ config::ReadableSize, - future::create_stream_with_buffer, - sys::thread::ThreadBuildWrapper, + future::{create_stream_with_buffer, paired_future_callback}, + sys::{ + disk::{get_disk_status, DiskUsage}, + thread::ThreadBuildWrapper, + }, time::{Instant, Limiter}, HandyRwLock, }; @@ -883,6 +886,10 @@ impl ImportSst for ImportSstService { .observe(start.saturating_elapsed().as_secs_f64()); let mut resp = ApplyResponse::default(); + if get_disk_status(0) != DiskUsage::Normal { + resp.set_error(Error::DiskSpaceNotEnough.into()); + return crate::send_rpc_response!(Ok(resp), sink, label, start); + } match Self::apply_imp(req, importer, applier, limiter, max_raft_size).await { Ok(Some(r)) => resp.set_range(r), @@ -924,6 +931,11 @@ impl ImportSst for ImportSstService { sst_importer::metrics::IMPORTER_DOWNLOAD_DURATION .with_label_values(&["queue"]) .observe(start.saturating_elapsed().as_secs_f64()); + if get_disk_status(0) != DiskUsage::Normal { + let mut resp = DownloadResponse::default(); + resp.set_error(Error::DiskSpaceNotEnough.into()); + return crate::send_rpc_response!(Ok(resp), sink, label, timer); + } // FIXME: download() should be an async fn, to allow BR to cancel // a download task. diff --git a/tests/failpoints/cases/test_import_service.rs b/tests/failpoints/cases/test_import_service.rs index a2487456108..58ac666bdac 100644 --- a/tests/failpoints/cases/test_import_service.rs +++ b/tests/failpoints/cases/test_import_service.rs @@ -8,12 +8,13 @@ use std::{ use file_system::calc_crc32; use futures::{executor::block_on, stream, SinkExt}; use grpcio::{Result, WriteFlags}; -use kvproto::import_sstpb::*; +use grpcio::{ChannelBuilder, Environment, Result, WriteFlags}; +use kvproto::{disk_usage::DiskUsage, import_sstpb::*, tikvpb_grpc::TikvClient}; use tempfile::{Builder, TempDir}; use test_raftstore::Simulator; use test_sst_importer::*; use tikv::config::TikvConfig; -use tikv_util::{config::ReadableSize, HandyRwLock}; +use tikv_util::{config::ReadableSize, sys::disk, HandyRwLock}; #[allow(dead_code)] #[path = "../../integrations/import/util.rs"] @@ -90,6 +91,43 @@ fn upload_sst(import: &ImportSstClient, meta: &SstMeta, data: &[u8]) -> Result Date: Thu, 5 Dec 2024 17:12:19 +0800 Subject: [PATCH 2/2] fmt Signed-off-by: Calvin Neo --- tests/failpoints/cases/test_import_service.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/failpoints/cases/test_import_service.rs b/tests/failpoints/cases/test_import_service.rs index 58ac666bdac..4bebb5143d3 100644 --- a/tests/failpoints/cases/test_import_service.rs +++ b/tests/failpoints/cases/test_import_service.rs @@ -7,7 +7,6 @@ use std::{ use file_system::calc_crc32; use futures::{executor::block_on, stream, SinkExt}; -use grpcio::{Result, WriteFlags}; use grpcio::{ChannelBuilder, Environment, Result, WriteFlags}; use kvproto::{disk_usage::DiskUsage, import_sstpb::*, tikvpb_grpc::TikvClient}; use tempfile::{Builder, TempDir};