From bf17de11a9eb9863e63a323b5649ff8e7b09be7a Mon Sep 17 00:00:00 2001 From: Kould Date: Fri, 4 Jul 2025 19:08:17 +0800 Subject: [PATCH 01/25] feat: impl Task for private --- Cargo.lock | 8 + src/common/exception/src/exception_code.rs | 14 + src/meta/app/src/principal/mod.rs | 13 + src/meta/app/src/principal/task.rs | 135 ++++++ src/meta/app/src/principal/task_ident.rs | 45 ++ src/meta/app/src/principal/task_run_ident.rs | 45 ++ src/meta/proto-conv/src/lib.rs | 1 + .../src/task_from_to_protobuf_impl.rs | 198 +++++++++ src/meta/protos/proto/task.proto | 84 ++++ src/query/config/src/config.rs | 10 + src/query/config/src/inner.rs | 5 + src/query/management/Cargo.toml | 5 + src/query/management/src/lib.rs | 1 + src/query/management/src/task/errors.rs | 122 ++++++ src/query/management/src/task/mod.rs | 22 + src/query/management/src/task/task_mgr.rs | 398 ++++++++++++++++++ src/query/service/Cargo.toml | 2 + src/query/service/src/global_services.rs | 8 +- .../interpreters/interpreter_task_alter.rs | 140 +----- .../interpreters/interpreter_task_create.rs | 92 +--- .../interpreters/interpreter_task_describe.rs | 42 +- .../src/interpreters/interpreter_task_drop.rs | 35 +- .../interpreters/interpreter_task_execute.rs | 26 +- .../interpreters/interpreter_tasks_show.rs | 47 +-- src/query/service/src/interpreters/mod.rs | 1 + .../service/src/interpreters/task/cloud.rs | 358 ++++++++++++++++ .../service/src/interpreters/task/mod.rs | 124 ++++++ .../service/src/interpreters/task/private.rs | 171 ++++++++ src/query/service/src/lib.rs | 1 + src/query/service/src/task_service.rs | 322 ++++++++++++++ src/query/storages/system/src/tasks_table.rs | 40 +- src/query/users/Cargo.toml | 1 + src/query/users/src/lib.rs | 1 + src/query/users/src/user_api.rs | 27 ++ src/query/users/src/user_task.rs | 78 ++++ 35 files changed, 2255 insertions(+), 367 deletions(-) create mode 100644 src/meta/app/src/principal/task.rs create mode 100644 src/meta/app/src/principal/task_ident.rs create mode 100644 src/meta/app/src/principal/task_run_ident.rs create mode 100644 src/meta/proto-conv/src/task_from_to_protobuf_impl.rs create mode 100644 src/meta/protos/proto/task.proto create mode 100644 src/query/management/src/task/errors.rs create mode 100644 src/query/management/src/task/mod.rs create mode 100644 src/query/management/src/task/task_mgr.rs create mode 100644 src/query/service/src/interpreters/task/cloud.rs create mode 100644 src/query/service/src/interpreters/task/mod.rs create mode 100644 src/query/service/src/interpreters/task/private.rs create mode 100644 src/query/service/src/task_service.rs create mode 100644 src/query/users/src/user_task.rs diff --git a/Cargo.lock b/Cargo.lock index d95a92b13bef2..37e6bcb3a3e82 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3581,6 +3581,10 @@ version = "0.1.0" dependencies = [ "async-backtrace", "async-trait", + "chrono", + "chrono-tz 0.8.6", + "cron", + "databend-common-ast", "databend-common-base", "databend-common-exception", "databend-common-expression", @@ -3602,6 +3606,7 @@ dependencies = [ "serde", "serde_json", "thiserror 1.0.69", + "tokio", ] [[package]] @@ -4785,6 +4790,7 @@ dependencies = [ "databend-common-meta-store", "databend-common-meta-types", "enumflags2", + "futures", "itertools 0.13.0", "jwt-simple", "log", @@ -5231,6 +5237,7 @@ dependencies = [ "chrono", "chrono-tz 0.8.6", "concurrent-queue", + "cron", "ctor", "dashmap 6.1.0", "databend-common-ast", @@ -5365,6 +5372,7 @@ dependencies = [ "tempfile", "tokio", "tokio-stream", + "tokio-util", "toml 0.8.22", "tonic", "tower 0.5.2", diff --git a/src/common/exception/src/exception_code.rs b/src/common/exception/src/exception_code.rs index 3ad491c1a56cd..c01f97c6031ad 100644 --- a/src/common/exception/src/exception_code.rs +++ b/src/common/exception/src/exception_code.rs @@ -391,6 +391,20 @@ build_exceptions! { UDFDataError(2607), } +// Task Errors [2611-2614] +build_exceptions! { + /// Unknown Task + UnknownTask(2611), + /// Task already exists + TaskAlreadyExists(2612), + /// Task timezone invalid + TaskTimezoneInvalid(2613), + /// Task cron invalid + TaskCronInvalid(2614), + /// Task schedule and after conflict + TaskScheduleAndAfterConflict(2615), +} + // Search and External Service Errors [1901-1903, 1910] build_exceptions! { /// Tantivy error diff --git a/src/meta/app/src/principal/mod.rs b/src/meta/app/src/principal/mod.rs index 3aca90eab0d67..43ad14e83a8d8 100644 --- a/src/meta/app/src/principal/mod.rs +++ b/src/meta/app/src/principal/mod.rs @@ -48,6 +48,9 @@ pub mod procedure_id_to_name; pub mod procedure_identity; pub mod procedure_name_ident; pub mod stage_file_ident; +pub mod task; +pub mod task_ident; +pub mod task_run_ident; pub mod tenant_ownership_object_ident; pub mod tenant_user_ident; pub mod user_defined_file_format_ident; @@ -86,6 +89,16 @@ pub use role_info::RoleInfo; pub use role_info::RoleInfoSerdeError; pub use stage_file_ident::StageFileIdent; pub use stage_file_path::StageFilePath; +pub use task::AfterTaskState; +pub use task::ScheduleOptions; +pub use task::ScheduleType; +pub use task::State; +pub use task::Status; +pub use task::Task; +pub use task::TaskRun; +pub use task::WarehouseOptions; +pub use task_ident::TaskIdent; +pub use task_ident::TaskIdentRaw; pub use tenant_ownership_object_ident::TenantOwnershipObjectIdent; pub use tenant_user_ident::TenantUserIdent; pub use udf_ident::UdfIdent; diff --git a/src/meta/app/src/principal/task.rs b/src/meta/app/src/principal/task.rs new file mode 100644 index 0000000000000..26a49f010a9f7 --- /dev/null +++ b/src/meta/app/src/principal/task.rs @@ -0,0 +1,135 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::BTreeMap; +use std::collections::HashSet; +use std::sync::Arc; + +use chrono::DateTime; +use chrono::Utc; + +pub const EMPTY_TASK_ID: u64 = 0; + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum ScheduleType { + IntervalType = 0, + CronType = 1, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum Status { + Suspended = 0, + Started = 1, +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum State { + Scheduled = 0, + Executing = 1, + Succeeded = 2, + Failed = 3, + Cancelled = 4, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ScheduleOptions { + pub interval: Option, + pub cron: Option, + pub time_zone: Option, + pub schedule_type: ScheduleType, + pub milliseconds_interval: Option, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct WarehouseOptions { + pub warehouse: Option, + pub using_warehouse_size: Option, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct Task { + pub task_id: u64, + pub task_name: String, + pub query_text: String, + pub when_condition: Option, + pub after: Vec, + pub comment: Option, + // expired useless + pub owner: String, + pub owner_user: String, + pub schedule_options: Option, + pub warehouse_options: Option, + pub next_scheduled_at: Option>, + pub suspend_task_after_num_failures: Option, + // TODO + pub error_integration: Option, + pub status: Status, + pub created_at: DateTime, + pub updated_at: DateTime, + pub last_suspended_at: Option>, + // TODO + pub session_params: BTreeMap, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct TaskRun { + pub task: Task, + pub run_id: u64, + pub attempt_number: i32, + pub state: State, + pub scheduled_at: DateTime, + pub completed_at: Option>, + pub error_code: i64, + pub error_message: Option, + // expired useless + pub root_task_id: u64, +} + +impl TaskRun { + pub fn key(&self) -> String { + format!("{}@{}", self.task.task_name, self.run_id) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct AfterTaskInfo { + pub afters: Arc>, +} + +pub struct AfterTaskState { + waiting: HashSet, +} + +impl From<&Task> for AfterTaskInfo { + fn from(value: &Task) -> Self { + AfterTaskInfo { + afters: Arc::new(value.after.clone()), + } + } +} + +impl AfterTaskState { + pub fn completed_task(&mut self, task_name: &str) -> bool { + self.waiting.remove(task_name); + self.waiting.is_empty() + } +} + +impl From<&AfterTaskInfo> for AfterTaskState { + fn from(value: &AfterTaskInfo) -> Self { + Self { + waiting: HashSet::from_iter(value.afters.to_vec()), + } + } +} diff --git a/src/meta/app/src/principal/task_ident.rs b/src/meta/app/src/principal/task_ident.rs new file mode 100644 index 0000000000000..45722cfb489b6 --- /dev/null +++ b/src/meta/app/src/principal/task_ident.rs @@ -0,0 +1,45 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::tenant_key::ident::TIdent; + +pub type TaskIdent = TIdent; + +pub type TaskIdentRaw = TIdent; + +pub use kvapi_impl::Resource; + +mod kvapi_impl { + use databend_common_meta_kvapi::kvapi; + + use crate::principal::task::Task; + use crate::principal::task_ident::TaskIdent; + use crate::tenant_key::resource::TenantResource; + + pub struct Resource; + impl TenantResource for Resource { + const PREFIX: &'static str = "__fd_tasks"; + const TYPE: &'static str = "TaskIdent"; + const HAS_TENANT: bool = true; + type ValueType = Task; + } + + impl kvapi::Value for Task { + type KeyType = TaskIdent; + + fn dependency_keys(&self, _key: &Self::KeyType) -> impl IntoIterator { + [] + } + } +} diff --git a/src/meta/app/src/principal/task_run_ident.rs b/src/meta/app/src/principal/task_run_ident.rs new file mode 100644 index 0000000000000..34faa0e3acdfc --- /dev/null +++ b/src/meta/app/src/principal/task_run_ident.rs @@ -0,0 +1,45 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::tenant_key::ident::TIdent; + +pub type TaskRunIdent = TIdent; + +pub type TaskRunIdentRaw = TIdent; + +pub use kvapi_impl::Resource; + +mod kvapi_impl { + use databend_common_meta_kvapi::kvapi; + + use crate::principal::task_run_ident::TaskRunIdent; + use crate::principal::TaskRun; + use crate::tenant_key::resource::TenantResource; + + pub struct Resource; + impl TenantResource for Resource { + const PREFIX: &'static str = "__fd_task_runs"; + const TYPE: &'static str = "TaskRunIdent"; + const HAS_TENANT: bool = true; + type ValueType = TaskRun; + } + + impl kvapi::Value for TaskRun { + type KeyType = TaskRunIdent; + + fn dependency_keys(&self, _key: &Self::KeyType) -> impl IntoIterator { + [] + } + } +} diff --git a/src/meta/proto-conv/src/lib.rs b/src/meta/proto-conv/src/lib.rs index 34721ea6d4f82..b3225879b59dd 100644 --- a/src/meta/proto-conv/src/lib.rs +++ b/src/meta/proto-conv/src/lib.rs @@ -81,6 +81,7 @@ mod schema_from_to_protobuf_impl; mod sequence_from_to_protobuf_impl; mod stage_from_to_protobuf_impl; mod table_from_to_protobuf_impl; +mod task_from_to_protobuf_impl; mod tenant_quota_from_to_protobuf_impl; mod tident_from_to_protobuf_impl; mod token_from_to_protobuf_impl; diff --git a/src/meta/proto-conv/src/task_from_to_protobuf_impl.rs b/src/meta/proto-conv/src/task_from_to_protobuf_impl.rs new file mode 100644 index 0000000000000..614f757b09536 --- /dev/null +++ b/src/meta/proto-conv/src/task_from_to_protobuf_impl.rs @@ -0,0 +1,198 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use chrono::DateTime; +use chrono::Utc; +use databend_common_meta_app::principal as mt; +use databend_common_meta_app::principal::task::Status; +use databend_common_protos::pb; + +use crate::reader_check_msg; +use crate::FromToProto; +use crate::Incompatible; +use crate::MIN_READER_VER; +use crate::VER; + +impl FromToProto for mt::Task { + type PB = pb::Task; + fn get_pb_ver(p: &Self::PB) -> u64 { + p.ver + } + fn from_pb(p: pb::Task) -> Result { + reader_check_msg(p.ver, p.min_reader_ver)?; + + let status = match p.status { + 0 => Status::Suspended, + 1 => Status::Started, + s => { + return Err(Incompatible::new(format!("Status can not be {s}"))); + } + }; + let schedule = match p.schedule_options { + None => None, + Some(ref s) => { + if !p.after.is_empty() { + None + } else { + let schedule_type = match s.schedule_type { + 0 => mt::ScheduleType::IntervalType, + 1 => mt::ScheduleType::CronType, + s => { + return Err(Incompatible::new(format!("ScheduleType can not be {s}"))); + } + }; + + Some(mt::ScheduleOptions { + interval: s.interval, + cron: s.cron.clone(), + time_zone: s.time_zone.clone(), + schedule_type, + milliseconds_interval: s.milliseconds_interval, + }) + } + } + }; + let warehouse = match p.warehouse_options { + None => None, + Some(ref w) => Some(mt::WarehouseOptions { + warehouse: w.warehouse.clone(), + using_warehouse_size: w.using_warehouse_size.clone(), + }), + }; + Ok(Self { + task_id: p.task_id, + task_name: p.task_name, + query_text: p.query_text, + when_condition: p.when_condition.clone(), + after: p.after, + comment: p.comment, + owner: p.owner, + owner_user: p.owner_user, + schedule_options: schedule, + warehouse_options: warehouse, + next_scheduled_at: match p.next_scheduled_at { + Some(c) => Some(DateTime::::from_pb(c)?), + None => None, + }, + suspend_task_after_num_failures: p.suspend_task_after_num_failures.map(|v| v as u64), + error_integration: p.error_integration.clone(), + status, + created_at: DateTime::::from_pb(p.created_at)?, + updated_at: DateTime::::from_pb(p.updated_at)?, + last_suspended_at: match p.last_suspended_at { + Some(c) => Some(DateTime::::from_pb(c)?), + None => None, + }, + session_params: p.session_parameters, + }) + } + + fn to_pb(&self) -> Result { + Ok(pb::Task { + ver: VER, + min_reader_ver: MIN_READER_VER, + task_id: self.task_id, + task_name: self.task_name.clone(), + query_text: self.query_text.clone(), + comment: self.comment.clone(), + owner: self.owner.clone(), + schedule_options: match &self.schedule_options { + None => None, + Some(s) => Some(pb::ScheduleOptions { + interval: s.interval, + cron: s.cron.clone(), + time_zone: s.time_zone.clone(), + schedule_type: s.schedule_type as i32, + milliseconds_interval: s.milliseconds_interval, + }), + }, + warehouse_options: match &self.warehouse_options { + None => None, + Some(w) => Some(pb::WarehouseOptions { + warehouse: w.warehouse.clone(), + using_warehouse_size: w.using_warehouse_size.clone(), + }), + }, + next_scheduled_at: match &self.next_scheduled_at { + None => None, + Some(d) => Some(d.to_pb()?), + }, + suspend_task_after_num_failures: self.suspend_task_after_num_failures.map(|v| v as i32), + status: self.status as i32, + created_at: self.created_at.to_pb()?, + updated_at: self.updated_at.to_pb()?, + last_suspended_at: match &self.last_suspended_at { + None => None, + Some(d) => Some(d.to_pb()?), + }, + after: self.after.clone(), + when_condition: self.when_condition.clone(), + session_parameters: self.session_params.clone(), + error_integration: self.error_integration.clone(), + owner_user: self.owner_user.clone(), + }) + } +} + +impl FromToProto for mt::TaskRun { + type PB = pb::TaskRun; + fn get_pb_ver(p: &Self::PB) -> u64 { + p.ver + } + + fn from_pb(p: Self::PB) -> Result + where Self: Sized { + Ok(mt::TaskRun { + task: mt::Task::from_pb( + p.task + .ok_or_else(|| Incompatible::new("State can not be empty"))?, + )?, + run_id: p.run_id, + attempt_number: p.attempt_number, + state: match p.state { + 0 => mt::State::Scheduled, + 1 => mt::State::Executing, + 2 => mt::State::Succeeded, + 3 => mt::State::Failed, + 4 => mt::State::Cancelled, + n => return Err(Incompatible::new(format!("State can not be {n}"))), + }, + scheduled_at: DateTime::::from_pb(p.scheduled_at)?, + completed_at: p.completed_at.map(DateTime::::from_pb).transpose()?, + error_code: p.error_code, + error_message: p.error_message, + root_task_id: p.root_task_id, + }) + } + + fn to_pb(&self) -> Result { + Ok(pb::TaskRun { + ver: VER, + min_reader_ver: MIN_READER_VER, + run_id: self.run_id, + attempt_number: self.attempt_number, + state: self.state as i32, + scheduled_at: self.scheduled_at.to_pb()?, + completed_at: self + .completed_at + .as_ref() + .map(DateTime::::to_pb) + .transpose()?, + error_code: self.error_code, + error_message: self.error_message.clone(), + root_task_id: self.root_task_id, + task: Some(self.task.to_pb()?), + }) + } +} diff --git a/src/meta/protos/proto/task.proto b/src/meta/protos/proto/task.proto new file mode 100644 index 0000000000000..2947efdc53b11 --- /dev/null +++ b/src/meta/protos/proto/task.proto @@ -0,0 +1,84 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package databend_proto; + +message WarehouseOptions { + optional string warehouse = 1; // warehouse or using_warehouse_size + optional string using_warehouse_size = 2; +} + +message ScheduleOptions { + enum ScheduleType { + interval_type = 0; + cron_type = 1; + } + optional int32 interval = 1; // secs, INTERVAL= '5 second' means execute sql every 5 secs + optional string cron = 2; // CRON = '0 2 * * *' means Every night at 2 AM. UTC time zone. + optional string time_zone = 3; // "UTC..." + ScheduleType schedule_type = 4; + optional uint64 milliseconds_interval = 5; // milliseconds level interval +} + +message TaskRun { + uint64 ver = 100; + uint64 min_reader_ver = 101; + + enum State { + Scheduled = 0; + Executing = 1; + Succeeded = 2; + Failed = 3; + Cancelled = 4; + } + uint64 run_id = 1; + int32 attempt_number = 2; + State state = 3; + string scheduled_at = 4; + optional string completed_at = 5; + int64 error_code = 6; + optional string error_message = 7; + uint64 root_task_id = 8; + Task task = 9; +} + +message Task { + uint64 ver = 100; + uint64 min_reader_ver = 101; + + enum Status { + Suspended = 0; + Started = 1; + } + uint64 task_id = 1; + string task_name = 2; + string query_text = 4; + optional string comment = 5; + string owner = 6; + ScheduleOptions schedule_options = 7; + WarehouseOptions warehouse_options = 8; + optional string next_scheduled_at = 9; + optional int32 suspend_task_after_num_failures = 10; //SUSPEND_TASK_AFTER_NUM_FAILURES + Status status = 12; + string created_at = 14; // RFC 3339 format time + string updated_at = 15; + optional string last_suspended_at = 16; + repeated string after = 17; + optional string when_condition = 18; + map session_parameters = 19; + optional string error_integration = 20; + string owner_user = 21; +} diff --git a/src/query/config/src/config.rs b/src/query/config/src/config.rs index 7a4329633b86d..cab5f61b368fe 100644 --- a/src/query/config/src/config.rs +++ b/src/query/config/src/config.rs @@ -1937,6 +1937,12 @@ pub struct QueryConfig { #[clap(long, value_name = "VALUE", default_value = "50")] pub max_cached_queries_profiles: usize, + #[clap(long, value_name = "VALUE", default_value = "false")] + pub enable_private_task: bool, + + #[clap(long, value_name = "VALUE", default_value = "1024")] + pub tasks_channel_len: usize, + /// A list of network that not to be checked by network policy. #[clap(long, value_name = "VALUE")] pub network_policy_whitelist: Vec, @@ -2043,6 +2049,8 @@ impl TryInto for QueryConfig { cloud_control_grpc_server_address: self.cloud_control_grpc_server_address, cloud_control_grpc_timeout: self.cloud_control_grpc_timeout, max_cached_queries_profiles: self.max_cached_queries_profiles, + enable_private_task: self.enable_private_task, + tasks_channel_len: self.tasks_channel_len, network_policy_whitelist: self.network_policy_whitelist, settings: self .settings @@ -2156,6 +2164,8 @@ impl From for QueryConfig { cloud_control_grpc_server_address: inner.cloud_control_grpc_server_address, cloud_control_grpc_timeout: inner.cloud_control_grpc_timeout, max_cached_queries_profiles: inner.max_cached_queries_profiles, + enable_private_task: inner.enable_private_task, + tasks_channel_len: inner.tasks_channel_len, network_policy_whitelist: inner.network_policy_whitelist, settings: HashMap::new(), resources_management: None, diff --git a/src/query/config/src/inner.rs b/src/query/config/src/inner.rs index 2807e1aaee817..2578de5e54a33 100644 --- a/src/query/config/src/inner.rs +++ b/src/query/config/src/inner.rs @@ -252,6 +252,9 @@ pub struct QueryConfig { pub cloud_control_grpc_timeout: u64, pub max_cached_queries_profiles: usize, + pub enable_private_task: bool, + pub tasks_channel_len: usize, + pub network_policy_whitelist: Vec, pub settings: HashMap, @@ -342,6 +345,8 @@ impl Default for QueryConfig { cloud_control_grpc_timeout: 0, data_retention_time_in_days_max: 90, max_cached_queries_profiles: 50, + enable_private_task: false, + tasks_channel_len: 1024, network_policy_whitelist: Vec::new(), settings: HashMap::new(), resources_management: None, diff --git a/src/query/management/Cargo.toml b/src/query/management/Cargo.toml index b69d9c56d9f7c..2928dd6a369d9 100644 --- a/src/query/management/Cargo.toml +++ b/src/query/management/Cargo.toml @@ -9,6 +9,10 @@ edition = { workspace = true } [dependencies] async-backtrace = { workspace = true } async-trait = { workspace = true } +chrono = { workspace = true } +chrono-tz = { workspace = true } +cron = { workspace = true } +databend-common-ast = { workspace = true } databend-common-base = { workspace = true } databend-common-exception = { workspace = true } databend-common-functions = { workspace = true } @@ -29,6 +33,7 @@ prost = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } thiserror = { workspace = true } +tokio = { workspace = true } [dev-dependencies] databend-common-expression = { workspace = true } diff --git a/src/query/management/src/lib.rs b/src/query/management/src/lib.rs index 8aaba387f15a0..bfabce3a9eaa1 100644 --- a/src/query/management/src/lib.rs +++ b/src/query/management/src/lib.rs @@ -32,6 +32,7 @@ mod warehouse; mod client_session; pub mod errors; mod procedure; +pub mod task; mod workload; pub use client_session::ClientSessionMgr; diff --git a/src/query/management/src/task/errors.rs b/src/query/management/src/task/errors.rs new file mode 100644 index 0000000000000..703728729ad1c --- /dev/null +++ b/src/query/management/src/task/errors.rs @@ -0,0 +1,122 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::Display; + +use databend_common_exception::ErrorCode; +use databend_common_meta_types::MetaError; + +use crate::errors::TenantError; + +/// Task logic error, unrelated to the backend service providing Task management, or dependent component. +#[derive(Clone, Debug, thiserror::Error)] +pub enum TaskError { + // NOTE: do not expose tenant in a for-user error message. + #[error("Task not found: '{name}'; {context}")] + NotFound { + tenant: String, + name: String, + context: String, + }, + + // NOTE: do not expose tenant in a for-user error message. + #[error("Task already exists: '{name}'; {reason}")] + Exists { + tenant: String, + name: String, + reason: String, + }, + + // NOTE: do not expose tenant in a for-user error message. + #[error("Task timezone invalid: '{name}'; {reason}")] + InvalidTimezone { + tenant: String, + name: String, + reason: String, + }, + + // NOTE: do not expose tenant in a for-user error message. + #[error("Task cron invalid: '{name}'; {reason}")] + InvalidCron { + tenant: String, + name: String, + reason: String, + }, + + #[error("Task cannot have both `SCHEDULE` and `AFTER`: '{name}'")] + ScheduleAndAfterConflict { tenant: String, name: String }, +} + +impl From for ErrorCode { + fn from(value: TaskError) -> Self { + let s = value.to_string(); + match value { + TaskError::NotFound { .. } => ErrorCode::UnknownTask(s), + TaskError::Exists { .. } => ErrorCode::TaskAlreadyExists(s), + TaskError::InvalidTimezone { .. } => ErrorCode::TaskAlreadyExists(s), + TaskError::InvalidCron { .. } => ErrorCode::TaskCronInvalid(s), + TaskError::ScheduleAndAfterConflict { .. } => { + ErrorCode::TaskScheduleAndAfterConflict(s) + } + } + } +} + +/// The error occurred during accessing API providing Task management. +#[derive(Clone, Debug, thiserror::Error)] +pub enum TaskApiError { + #[error("TenantError: '{0}'")] + TenantError(#[from] TenantError), + + #[error("MetaService error: {meta_err}; {context}")] + MetaError { + meta_err: MetaError, + context: String, + }, +} + +impl From for TaskApiError { + fn from(meta_err: MetaError) -> Self { + TaskApiError::MetaError { + meta_err, + context: "".to_string(), + } + } +} + +impl From for ErrorCode { + fn from(value: TaskApiError) -> Self { + match value { + TaskApiError::TenantError(e) => ErrorCode::from(e), + TaskApiError::MetaError { meta_err, context } => { + ErrorCode::from(meta_err).add_message_back(context) + } + } + } +} + +impl TaskApiError { + pub fn append_context(self, context: impl Display) -> Self { + match self { + TaskApiError::TenantError(e) => TaskApiError::TenantError(e.append_context(context)), + TaskApiError::MetaError { + meta_err, + context: old, + } => TaskApiError::MetaError { + meta_err, + context: format!("{}; {}", old, context), + }, + } + } +} diff --git a/src/query/management/src/task/mod.rs b/src/query/management/src/task/mod.rs new file mode 100644 index 0000000000000..1d254702fc89a --- /dev/null +++ b/src/query/management/src/task/mod.rs @@ -0,0 +1,22 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod errors; +mod task_mgr; + +pub use errors::TaskApiError; +pub use errors::TaskError; +pub use task_mgr::TaskChannel; +pub use task_mgr::TaskMessage; +pub use task_mgr::TaskMgr; diff --git a/src/query/management/src/task/task_mgr.rs b/src/query/management/src/task/task_mgr.rs new file mode 100644 index 0000000000000..ac8b3cf100376 --- /dev/null +++ b/src/query/management/src/task/task_mgr.rs @@ -0,0 +1,398 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::ops::Deref; +use std::str::FromStr; +use std::sync::Arc; + +use chrono::Utc; +use chrono_tz::Tz; +use cron::Schedule; +use databend_common_ast::ast::AlterTaskOptions; +use databend_common_ast::ast::ScheduleOptions; +use databend_common_base::base::tokio::sync::mpsc; +use databend_common_base::base::GlobalInstance; +use databend_common_exception::ErrorCode; +use databend_common_meta_api::kv_pb_api::KVPbApi; +use databend_common_meta_api::kv_pb_api::UpsertPB; +use databend_common_meta_app::principal::task; +use databend_common_meta_app::principal::task_run_ident::TaskRunIdent; +use databend_common_meta_app::principal::ScheduleType; +use databend_common_meta_app::principal::Status; +use databend_common_meta_app::principal::Task; +use databend_common_meta_app::principal::TaskIdent; +use databend_common_meta_app::principal::TaskRun; +use databend_common_meta_app::schema::CreateOption; +use databend_common_meta_app::tenant::Tenant; +use databend_common_meta_kvapi::kvapi; +use databend_common_meta_kvapi::kvapi::DirName; +use databend_common_meta_types::MatchSeq; +use databend_common_meta_types::MetaError; +use databend_common_meta_types::With; +use futures::StreamExt; +use futures::TryStreamExt; + +use crate::task::errors::TaskApiError; +use crate::task::errors::TaskError; + +pub enum TaskMessage { + ExecuteTask(Task), + ScheduleTask(Task), + DeleteTask(String), + AfterTask(Task), +} + +pub struct TaskChannel { + tx: mpsc::Sender, +} + +#[derive(Clone)] +pub struct TaskMgr { + kv_api: Arc>, + tenant: Tenant, +} + +impl TaskChannel { + pub fn instance() -> Arc { + GlobalInstance::get() + } +} + +impl Deref for TaskChannel { + type Target = mpsc::Sender; + + fn deref(&self) -> &Self::Target { + &self.tx + } +} + +impl TaskChannel { + pub fn init(len: usize) -> Result, ErrorCode> { + let (tx, rx) = mpsc::channel(len); + + GlobalInstance::set(Arc::new(TaskChannel { tx })); + Ok(rx) + } +} + +impl TaskMgr { + pub fn create(kv_api: Arc>, tenant: &Tenant) -> Self { + TaskMgr { + kv_api, + tenant: tenant.clone(), + } + } + + #[async_backtrace::framed] + #[fastrace::trace] + pub async fn create_task( + &self, + mut task: Task, + create_option: &CreateOption, + ) -> Result, TaskApiError> { + task.created_at = Utc::now(); + + self.create_task_inner(task, create_option).await + } + + async fn create_task_inner( + &self, + task: Task, + create_option: &CreateOption, + ) -> Result, TaskApiError> { + assert!(task.after.is_empty() || task.schedule_options.is_none()); + // check + if let Some(schedule_options) = &task.schedule_options { + match schedule_options.schedule_type { + ScheduleType::IntervalType => (), + ScheduleType::CronType => { + if let Err(e) = schedule_options.time_zone.as_ref().unwrap().parse::() { + return Ok(Err(TaskError::InvalidTimezone { + tenant: self.tenant.tenant_name().to_string(), + name: task.task_name.to_string(), + reason: e.to_string(), + })); + } + if let Err(e) = Schedule::from_str(schedule_options.cron.as_ref().unwrap()) { + return Ok(Err(TaskError::InvalidCron { + tenant: self.tenant.tenant_name().to_string(), + name: task.task_name.to_string(), + reason: e.to_string(), + })); + } + } + } + } + + let seq = MatchSeq::from(*create_option); + + let key = TaskIdent::new(&self.tenant, &task.task_name); + let req = UpsertPB::insert(key, task.clone()).with(seq); + let res = self.kv_api.upsert_pb(&req).await?; + + if let CreateOption::Create = create_option { + if res.prev.is_some() { + let err = TaskError::Exists { + tenant: self.tenant.tenant_name().to_string(), + name: task.task_name.to_string(), + reason: "".to_string(), + }; + return Ok(Err(err)); + } + } + if !task.after.is_empty() { + let _ = TaskChannel::instance().send(TaskMessage::AfterTask(task)); + } else if task.schedule_options.is_some() { + let _ = TaskChannel::instance().send(TaskMessage::ScheduleTask(task)); + } + + Ok(Ok(())) + } + + #[async_backtrace::framed] + #[fastrace::trace] + pub async fn execute_task( + &self, + task_name: &str, + ) -> Result, TaskApiError> { + let key = TaskIdent::new(&self.tenant, task_name); + let Some(task) = self.kv_api.get_pb(&key).await? else { + return Ok(Err(TaskError::NotFound { + tenant: self.tenant.tenant_name().to_string(), + name: task_name.to_string(), + context: "while execute task".to_string(), + })); + }; + let _ = TaskChannel::instance().send(TaskMessage::ExecuteTask(Task::clone(&task))); + + Ok(Ok(())) + } + + #[async_backtrace::framed] + #[fastrace::trace] + pub async fn alter_task( + &self, + task_name: &str, + alter_options: &AlterTaskOptions, + ) -> Result, TaskApiError> { + let key = TaskIdent::new(&self.tenant, task_name); + let Some(task) = self.kv_api.get_pb(&key).await? else { + return Ok(Err(TaskError::NotFound { + tenant: self.tenant.tenant_name().to_string(), + name: task_name.to_string(), + context: "while alter task".to_string(), + })); + }; + let mut task = Task::clone(&task); + task.updated_at = Utc::now(); + + match alter_options { + AlterTaskOptions::Resume => { + task.status = Status::Started; + } + AlterTaskOptions::Suspend => { + task.last_suspended_at = Some(Utc::now()); + task.status = Status::Suspended; + } + AlterTaskOptions::Set { + schedule, + comments, + warehouse, + suspend_task_after_num_failures, + error_integration, + session_parameters, + } => { + task.schedule_options = schedule.clone().map(Self::make_schedule_options); + task.comment = comments.clone(); + task.warehouse_options = Some(Self::make_warehouse_options(warehouse.clone())); + task.suspend_task_after_num_failures = suspend_task_after_num_failures.clone(); + task.error_integration = error_integration.clone(); + if let Some(session_parameters) = session_parameters { + task.session_params = session_parameters.clone(); + } + } + AlterTaskOptions::Unset { .. } => { + todo!() + } + AlterTaskOptions::ModifyAs(sql) => { + task.query_text = sql.to_string(); + } + AlterTaskOptions::ModifyWhen(sql) => { + task.when_condition = Some(sql.to_string()); + } + AlterTaskOptions::AddAfter(afters) => { + if task.schedule_options.is_some() { + return Ok(Err(TaskError::ScheduleAndAfterConflict { + tenant: self.tenant.tenant_name().to_string(), + name: task_name.to_string(), + })); + } + for after in afters { + if task.after.contains(&after) { + continue; + } + task.after.push(after.clone()); + } + } + AlterTaskOptions::RemoveAfter(afters) => { + if task.schedule_options.is_some() { + return Ok(Err(TaskError::ScheduleAndAfterConflict { + tenant: self.tenant.tenant_name().to_string(), + name: task_name.to_string(), + })); + } + task.after.retain(|task| !afters.contains(task)); + } + } + if let Err(e) = self + .create_task_inner(task, &CreateOption::CreateOrReplace) + .await? + { + return Ok(Err(TaskError::NotFound { + tenant: self.tenant.tenant_name().to_string(), + name: task_name.to_string(), + context: format!("while alter task: {}", e), + })); + } + + Ok(Ok(())) + } + + #[async_backtrace::framed] + #[fastrace::trace] + pub async fn describe_task( + &self, + task_name: &str, + ) -> Result, TaskError>, TaskApiError> { + let key = TaskIdent::new(&self.tenant, task_name); + let task = self.kv_api.get_pb(&key).await?; + + Ok(Ok(task.map(|task| Task::clone(&task)))) + } + + #[async_backtrace::framed] + #[fastrace::trace] + pub async fn drop_task(&self, task_name: &str) -> Result, MetaError> { + let key = TaskIdent::new(&self.tenant, task_name); + let req = UpsertPB::delete(key).with(MatchSeq::GE(1)); + let res = self.kv_api.upsert_pb(&req).await?; + + if res.is_changed() { + Ok(res.prev.as_ref().map(|prev| Task::clone(prev))) + } else { + Ok(None) + } + } + + #[async_backtrace::framed] + #[fastrace::trace] + pub async fn list_task(&self) -> Result, MetaError> { + let key = DirName::new(TaskIdent::new(&self.tenant, "")); + let strm = self.kv_api.list_pb_values(&key).await?; + + match strm.try_collect().await { + Ok(tasks) => Ok(tasks), + Err(_) => self.list_task_fallback().await, + } + } + + #[async_backtrace::framed] + #[fastrace::trace] + pub async fn list_task_fallback(&self) -> Result, MetaError> { + let key = TaskIdent::new(&self.tenant, "dummy"); + let dir = DirName::new(key); + let tasks = self + .kv_api + .list_pb_values(&dir) + .await? + .try_collect::>() + .await?; + + Ok(tasks) + } + + #[async_backtrace::framed] + #[fastrace::trace] + pub async fn update_task_run( + &self, + task_run: TaskRun, + ) -> Result, TaskApiError> { + let seq = MatchSeq::from(CreateOption::CreateOrReplace); + let key = TaskRunIdent::new(&self.tenant, task_run.key()); + let req = UpsertPB::insert(key, task_run.clone()).with(seq); + let _ = self.kv_api.upsert_pb(&req).await?; + + Ok(Ok(())) + } + + #[async_backtrace::framed] + #[fastrace::trace] + pub async fn lasted_task_run( + &self, + task_name: &str, + ) -> Result, TaskError>, TaskApiError> { + let key = DirName::new(TaskRunIdent::new(&self.tenant, task_name)); + let result = self + .kv_api + .list_pb_values(&key) + .await? + .next() + .await + .transpose()?; + + Ok(Ok(result)) + } + + #[async_backtrace::framed] + #[fastrace::trace] + pub async fn show_task_runs_full( + &self, + task_name: &str, + ) -> Result, TaskApiError>, TaskApiError> { + let key = DirName::new(TaskRunIdent::new(&self.tenant, task_name)); + let stream = self.kv_api.list_pb_values(&key).await?; + + Ok(Ok(stream.try_collect().await?)) + } + + pub fn make_schedule_options(opt: ScheduleOptions) -> task::ScheduleOptions { + match opt { + ScheduleOptions::IntervalSecs(secs, ms) => { + task::ScheduleOptions { + interval: Some(secs as i32), + // none if ms is 0, else some ms + milliseconds_interval: if ms == 0 { None } else { Some(ms) }, + cron: None, + time_zone: None, + schedule_type: task::ScheduleType::IntervalType, + } + } + + ScheduleOptions::CronExpression(expr, timezone) => task::ScheduleOptions { + interval: None, + milliseconds_interval: None, + cron: Some(expr), + time_zone: timezone, + schedule_type: task::ScheduleType::CronType, + }, + } + } + + pub fn make_warehouse_options(opt: Option) -> task::WarehouseOptions { + task::WarehouseOptions { + warehouse: opt, + using_warehouse_size: None, + } + } +} diff --git a/src/query/service/Cargo.toml b/src/query/service/Cargo.toml index 8f264f45a2e09..09cf06e280256 100644 --- a/src/query/service/Cargo.toml +++ b/src/query/service/Cargo.toml @@ -44,6 +44,7 @@ bytes = { workspace = true } chrono = { workspace = true } chrono-tz = { workspace = true } concurrent-queue = { workspace = true } +cron = { workspace = true } ctor = { workspace = true } dashmap = { workspace = true } databend-common-ast = { workspace = true } @@ -169,6 +170,7 @@ sysinfo = { workspace = true } tempfile = { workspace = true } tokio = { workspace = true } tokio-stream = { workspace = true, features = ["net"] } +tokio-util = { workspace = true } toml = { workspace = true, features = ["parse"] } tonic = { workspace = true } typetag = { workspace = true } diff --git a/src/query/service/src/global_services.rs b/src/query/service/src/global_services.rs index 8024b542a7cac..752aa73574cbd 100644 --- a/src/query/service/src/global_services.rs +++ b/src/query/service/src/global_services.rs @@ -27,6 +27,7 @@ use databend_common_config::InnerConfig; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_exception::StackTrace; +use databend_common_management::task::TaskChannel; use databend_common_management::WorkloadGroupResourceManager; use databend_common_management::WorkloadMgr; use databend_common_meta_app::schema::CatalogType; @@ -56,6 +57,7 @@ use crate::servers::http::v1::ClientSessionManager; use crate::servers::http::v1::HttpQueryManager; use crate::sessions::QueriesQueueManager; use crate::sessions::SessionManager; +use crate::task_service::TaskService; pub struct GlobalServices; @@ -120,7 +122,9 @@ impl GlobalServices { SessionManager::init(config)?; LockManager::init()?; AuthMgr::init(config)?; - + + + let task_rx = TaskChannel::init(GlobalConfig::instance().query.tasks_channel_len)?; // Init user manager. // Builtin users and udfs are created here. { @@ -141,7 +145,7 @@ impl GlobalServices { ) .await?; } - + TaskService::init(task_rx, config.query.tenant_id.clone())?; RoleCacheManager::init()?; DataOperator::init(&config.storage, config.spill.storage_params.clone()).await?; diff --git a/src/query/service/src/interpreters/interpreter_task_alter.rs b/src/query/service/src/interpreters/interpreter_task_alter.rs index 2f45da642685e..71eabd5a6261d 100644 --- a/src/query/service/src/interpreters/interpreter_task_alter.rs +++ b/src/query/service/src/interpreters/interpreter_task_alter.rs @@ -14,24 +14,11 @@ use std::sync::Arc; -use databend_common_ast::ast::AlterTaskOptions; -use databend_common_ast::ast::TaskSql; -use databend_common_catalog::table_context::TableContext; -use databend_common_cloud_control::client_config::make_request; -use databend_common_cloud_control::cloud_api::CloudControlApiProvider; -use databend_common_cloud_control::pb; -use databend_common_cloud_control::pb::alter_task_request::AlterTaskType; -use databend_common_cloud_control::pb::AlterTaskRequest; -use databend_common_cloud_control::pb::WarehouseOptions; -use databend_common_config::GlobalConfig; -use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use databend_common_settings::DefaultSettings; -use databend_common_settings::SettingScope; use databend_common_sql::plans::AlterTaskPlan; -use crate::interpreters::common::get_task_client_config; -use crate::interpreters::common::make_schedule_options; +use crate::interpreters::task::TaskInterpreter; +use crate::interpreters::task::TaskInterpreterFactory; use crate::interpreters::Interpreter; use crate::pipelines::PipelineBuildResult; use crate::sessions::QueryContext; @@ -48,111 +35,7 @@ impl AlterTaskInterpreter { } } -impl AlterTaskInterpreter { - fn build_request(&self) -> AlterTaskRequest { - let plan = self.plan.clone(); - let owner = self - .ctx - .get_current_role() - .unwrap_or_default() - .identity() - .to_string(); - let mut req = AlterTaskRequest { - task_name: plan.task_name, - tenant_id: plan.tenant.tenant_name().to_string(), - owner, - alter_task_type: 0, - if_exist: plan.if_exists, - error_integration: None, - task_sql_type: 0, - query_text: None, - comment: None, - schedule_options: None, - warehouse_options: None, - suspend_task_after_num_failures: None, - when_condition: None, - add_after: vec![], - remove_after: vec![], - set_session_parameters: false, - session_parameters: Default::default(), - script_sql: None, - }; - match plan.alter_options { - AlterTaskOptions::Resume => { - req.alter_task_type = AlterTaskType::Resume as i32; - } - AlterTaskOptions::Suspend => { - req.alter_task_type = AlterTaskType::Suspend as i32; - } - AlterTaskOptions::Set { - schedule, - comments, - warehouse, - suspend_task_after_num_failures, - error_integration, - session_parameters, - } => { - req.alter_task_type = AlterTaskType::Set as i32; - req.schedule_options = schedule.map(make_schedule_options); - req.comment = comments; - req.warehouse_options = warehouse.map(|w| WarehouseOptions { - warehouse: Some(w), - using_warehouse_size: None, - }); - req.error_integration = error_integration; - req.suspend_task_after_num_failures = - suspend_task_after_num_failures.map(|i| i as i32); - if let Some(session_parameters) = session_parameters { - req.set_session_parameters = true; - req.session_parameters = session_parameters; - } - } - AlterTaskOptions::Unset { .. } => { - todo!() - } - AlterTaskOptions::ModifyAs(sql) => { - req.alter_task_type = AlterTaskType::ModifyAs as i32; - match sql { - TaskSql::SingleStatement(stmt) => { - req.task_sql_type = i32::from(pb::TaskSqlType::Sql); - req.query_text = Some(stmt); - } - TaskSql::ScriptBlock(ref sqls) => { - req.task_sql_type = i32::from(pb::TaskSqlType::Script); - req.query_text = Some(format!("{}", sql)); - req.script_sql = Some(pb::ScriptSql { sqls: sqls.clone() }) - } - } - } - AlterTaskOptions::AddAfter(tasks) => { - req.alter_task_type = AlterTaskType::AddAfter as i32; - req.add_after = tasks; - } - AlterTaskOptions::RemoveAfter(tasks) => { - req.alter_task_type = AlterTaskType::RemoveAfter as i32; - req.remove_after = tasks; - } - AlterTaskOptions::ModifyWhen(sql) => { - req.alter_task_type = AlterTaskType::ModifyWhen as i32; - req.when_condition = Some(sql.to_string()); - } - } - req - } - - fn validate_session_parameters(&self) -> Result<()> { - if let AlterTaskOptions::Set { - session_parameters: Some(session_parameters), - .. - } = &self.plan.alter_options - { - for (key, _) in session_parameters.iter() { - DefaultSettings::check_setting_scope(key, SettingScope::Session)?; - } - } - Ok(()) - } -} +impl AlterTaskInterpreter {} #[async_trait::async_trait] impl Interpreter for AlterTaskInterpreter { @@ -167,19 +50,10 @@ impl Interpreter for AlterTaskInterpreter { #[fastrace::trace] #[async_backtrace::framed] async fn execute2(&self) -> Result { - let config = GlobalConfig::instance(); - if config.query.cloud_control_grpc_server_address.is_none() { - return Err(ErrorCode::CloudControlNotEnabled( - "cannot alter task without cloud control enabled, please set cloud_control_grpc_server_address in config", - )); - } - self.validate_session_parameters()?; - let cloud_api = CloudControlApiProvider::instance(); - let task_client = cloud_api.get_task_client(); - let req = self.build_request(); - let config = get_task_client_config(self.ctx.clone(), cloud_api.get_timeout())?; - let req = make_request(req, config); - task_client.alter_task(req).await?; + TaskInterpreterFactory::build() + .alter_task(&self.ctx, &self.plan) + .await?; + Ok(PipelineBuildResult::create()) } } diff --git a/src/query/service/src/interpreters/interpreter_task_create.rs b/src/query/service/src/interpreters/interpreter_task_create.rs index f1a9a204b786b..88bd453bc0967 100644 --- a/src/query/service/src/interpreters/interpreter_task_create.rs +++ b/src/query/service/src/interpreters/interpreter_task_create.rs @@ -14,23 +14,11 @@ use std::sync::Arc; -use databend_common_ast::ast::TaskSql; -use databend_common_catalog::table_context::TableContext; -use databend_common_cloud_control::client_config::make_request; -use databend_common_cloud_control::cloud_api::CloudControlApiProvider; -use databend_common_cloud_control::pb; -use databend_common_cloud_control::pb::CreateTaskRequest; -use databend_common_cloud_control::pb::DropTaskRequest; -use databend_common_config::GlobalConfig; -use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use databend_common_settings::DefaultSettings; -use databend_common_settings::SettingScope; use databend_common_sql::plans::CreateTaskPlan; -use crate::interpreters::common::get_task_client_config; -use crate::interpreters::common::make_schedule_options; -use crate::interpreters::common::make_warehouse_options; +use crate::interpreters::task::TaskInterpreter; +use crate::interpreters::task::TaskInterpreterFactory; use crate::interpreters::Interpreter; use crate::pipelines::PipelineBuildResult; use crate::sessions::QueryContext; @@ -47,55 +35,6 @@ impl CreateTaskInterpreter { } } -impl CreateTaskInterpreter { - fn build_request(&self) -> CreateTaskRequest { - let plan = self.plan.clone(); - let owner = self - .ctx - .get_current_role() - .unwrap_or_default() - .identity() - .to_string(); - let mut req = CreateTaskRequest { - task_name: plan.task_name, - tenant_id: plan.tenant.tenant_name().to_string(), - query_text: "".to_string(), - owner, - comment: plan.comment, - schedule_options: plan.schedule_opts.map(make_schedule_options), - warehouse_options: Some(make_warehouse_options(plan.warehouse)), - error_integration: plan.error_integration, - task_sql_type: 0, - suspend_task_after_num_failures: plan.suspend_task_after_num_failures.map(|x| x as i32), - if_not_exist: plan.create_option.if_not_exist(), - after: plan.after, - when_condition: plan.when_condition, - session_parameters: plan.session_parameters, - script_sql: None, - }; - match plan.sql { - TaskSql::SingleStatement(stmt) => { - req.task_sql_type = i32::from(pb::TaskSqlType::Sql); - req.query_text = stmt; - } - TaskSql::ScriptBlock(ref sqls) => { - req.task_sql_type = i32::from(pb::TaskSqlType::Script); - req.query_text = format!("{}", plan.sql); - req.script_sql = Some(pb::ScriptSql { sqls: sqls.clone() }) - } - } - req - } - - fn validate_session_parameters(&self) -> Result<()> { - let session_parameters = self.plan.session_parameters.clone(); - for (key, _) in session_parameters.iter() { - DefaultSettings::check_setting_scope(key, SettingScope::Session)?; - } - Ok(()) - } -} - #[async_trait::async_trait] impl Interpreter for CreateTaskInterpreter { fn name(&self) -> &str { @@ -109,31 +48,10 @@ impl Interpreter for CreateTaskInterpreter { #[fastrace::trace] #[async_backtrace::framed] async fn execute2(&self) -> Result { - let config = GlobalConfig::instance(); - if config.query.cloud_control_grpc_server_address.is_none() { - return Err(ErrorCode::CloudControlNotEnabled( - "cannot create task without cloud control enabled, please set cloud_control_grpc_server_address in config", - )); - } - self.validate_session_parameters()?; - let cloud_api = CloudControlApiProvider::instance(); - let task_client = cloud_api.get_task_client(); - let req = self.build_request(); - let config = get_task_client_config(self.ctx.clone(), cloud_api.get_timeout())?; - let req = make_request(req, config.clone()); - - // cloud don't support create or replace, let's remove the task in previous - if self.plan.create_option.is_overriding() { - let drop_req = DropTaskRequest { - task_name: self.plan.task_name.clone(), - tenant_id: self.plan.tenant.tenant_name().to_string(), - if_exist: true, - }; - let drop_req = make_request(drop_req, config); - task_client.drop_task(drop_req).await?; - } + TaskInterpreterFactory::build() + .create_task(&self.ctx, &self.plan) + .await?; - task_client.create_task(req).await?; Ok(PipelineBuildResult::create()) } } diff --git a/src/query/service/src/interpreters/interpreter_task_describe.rs b/src/query/service/src/interpreters/interpreter_task_describe.rs index 1625954f60d9e..e0814a0486495 100644 --- a/src/query/service/src/interpreters/interpreter_task_describe.rs +++ b/src/query/service/src/interpreters/interpreter_task_describe.rs @@ -14,16 +14,12 @@ use std::sync::Arc; -use databend_common_cloud_control::client_config::make_request; -use databend_common_cloud_control::cloud_api::CloudControlApiProvider; -use databend_common_cloud_control::pb::DescribeTaskRequest; -use databend_common_config::GlobalConfig; -use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_sql::plans::DescribeTaskPlan; use databend_common_storages_system::parse_tasks_to_datablock; -use crate::interpreters::common::get_task_client_config; +use crate::interpreters::task::TaskInterpreter; +use crate::interpreters::task::TaskInterpreterFactory; use crate::interpreters::Interpreter; use crate::pipelines::PipelineBuildResult; use crate::sessions::QueryContext; @@ -40,17 +36,6 @@ impl DescribeTaskInterpreter { } } -impl DescribeTaskInterpreter { - fn build_request(&self) -> DescribeTaskRequest { - let plan = self.plan.clone(); - DescribeTaskRequest { - task_name: plan.task_name, - tenant_id: plan.tenant.tenant_name().to_string(), - if_exist: false, - } - } -} - #[async_trait::async_trait] impl Interpreter for DescribeTaskInterpreter { fn name(&self) -> &str { @@ -64,23 +49,12 @@ impl Interpreter for DescribeTaskInterpreter { #[fastrace::trace] #[async_backtrace::framed] async fn execute2(&self) -> Result { - let config = GlobalConfig::instance(); - if config.query.cloud_control_grpc_server_address.is_none() { - return Err(ErrorCode::CloudControlNotEnabled( - "cannot describe task without cloud control enabled, please set cloud_control_grpc_server_address in config", - )); - } - let cloud_api = CloudControlApiProvider::instance(); - let task_client = cloud_api.get_task_client(); - let req = self.build_request(); - let config = get_task_client_config(self.ctx.clone(), cloud_api.get_timeout())?; - let req = make_request(req, config); - let resp = task_client.describe_task(req).await?; - if resp.task.is_none() { + let Some(task) = TaskInterpreterFactory::build() + .describe_task(&self.ctx, &self.plan) + .await? + else { return Ok(PipelineBuildResult::create()); - } - let tasks = vec![resp.task.unwrap()]; - let result = parse_tasks_to_datablock(tasks)?; - PipelineBuildResult::from_blocks(vec![result]) + }; + PipelineBuildResult::from_blocks(vec![parse_tasks_to_datablock(vec![task])?]) } } diff --git a/src/query/service/src/interpreters/interpreter_task_drop.rs b/src/query/service/src/interpreters/interpreter_task_drop.rs index c8749399aafb3..ca4f5554ae4aa 100644 --- a/src/query/service/src/interpreters/interpreter_task_drop.rs +++ b/src/query/service/src/interpreters/interpreter_task_drop.rs @@ -14,15 +14,11 @@ use std::sync::Arc; -use databend_common_cloud_control::client_config::make_request; -use databend_common_cloud_control::cloud_api::CloudControlApiProvider; -use databend_common_cloud_control::pb::DropTaskRequest; -use databend_common_config::GlobalConfig; -use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_sql::plans::DropTaskPlan; -use crate::interpreters::common::get_task_client_config; +use crate::interpreters::task::TaskInterpreter; +use crate::interpreters::task::TaskInterpreterFactory; use crate::interpreters::Interpreter; use crate::pipelines::PipelineBuildResult; use crate::sessions::QueryContext; @@ -39,17 +35,6 @@ impl DropTaskInterpreter { } } -impl DropTaskInterpreter { - fn build_request(&self) -> DropTaskRequest { - let plan = self.plan.clone(); - DropTaskRequest { - task_name: plan.task_name, - tenant_id: plan.tenant.tenant_name().to_string(), - if_exist: plan.if_exists, - } - } -} - #[async_trait::async_trait] impl Interpreter for DropTaskInterpreter { fn name(&self) -> &str { @@ -63,18 +48,10 @@ impl Interpreter for DropTaskInterpreter { #[fastrace::trace] #[async_backtrace::framed] async fn execute2(&self) -> Result { - let config = GlobalConfig::instance(); - if config.query.cloud_control_grpc_server_address.is_none() { - return Err(ErrorCode::CloudControlNotEnabled( - "cannot drop task without cloud control enabled, please set cloud_control_grpc_server_address in config", - )); - } - let cloud_api = CloudControlApiProvider::instance(); - let task_client = cloud_api.get_task_client(); - let req = self.build_request(); - let config = get_task_client_config(self.ctx.clone(), cloud_api.get_timeout())?; - let req = make_request(req, config); - task_client.drop_task(req).await?; + TaskInterpreterFactory::build() + .drop_task(&self.ctx, &self.plan) + .await?; + Ok(PipelineBuildResult::create()) } } diff --git a/src/query/service/src/interpreters/interpreter_task_execute.rs b/src/query/service/src/interpreters/interpreter_task_execute.rs index bac2a587f3847..23ccc1a33bb41 100644 --- a/src/query/service/src/interpreters/interpreter_task_execute.rs +++ b/src/query/service/src/interpreters/interpreter_task_execute.rs @@ -14,15 +14,11 @@ use std::sync::Arc; -use databend_common_cloud_control::client_config::make_request; -use databend_common_cloud_control::cloud_api::CloudControlApiProvider; -use databend_common_cloud_control::pb::ExecuteTaskRequest; -use databend_common_config::GlobalConfig; -use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_sql::plans::ExecuteTaskPlan; -use crate::interpreters::common::get_task_client_config; +use crate::interpreters::task::TaskInterpreter; +use crate::interpreters::task::TaskInterpreterFactory; use crate::interpreters::Interpreter; use crate::pipelines::PipelineBuildResult; use crate::sessions::QueryContext; @@ -52,22 +48,10 @@ impl Interpreter for ExecuteTaskInterpreter { #[fastrace::trace] #[async_backtrace::framed] async fn execute2(&self) -> Result { - let config = GlobalConfig::instance(); - if config.query.cloud_control_grpc_server_address.is_none() { - return Err(ErrorCode::CloudControlNotEnabled( - "cannot execute task without cloud control enabled, please set cloud_control_grpc_server_address in config", - )); - } - let cloud_api = CloudControlApiProvider::instance(); - let task_client = cloud_api.get_task_client(); - let req = ExecuteTaskRequest { - task_name: self.plan.task_name.clone(), - tenant_id: self.plan.tenant.tenant_name().to_string(), - }; - let config = get_task_client_config(self.ctx.clone(), cloud_api.get_timeout())?; - let req = make_request(req, config); + TaskInterpreterFactory::build() + .execute_task(&self.ctx, &self.plan) + .await?; - task_client.execute_task(req).await?; Ok(PipelineBuildResult::create()) } } diff --git a/src/query/service/src/interpreters/interpreter_tasks_show.rs b/src/query/service/src/interpreters/interpreter_tasks_show.rs index 3d9fdc299b37b..62e58ee3badc6 100644 --- a/src/query/service/src/interpreters/interpreter_tasks_show.rs +++ b/src/query/service/src/interpreters/interpreter_tasks_show.rs @@ -14,16 +14,12 @@ use std::sync::Arc; -use databend_common_cloud_control::client_config::make_request; -use databend_common_cloud_control::cloud_api::CloudControlApiProvider; -use databend_common_cloud_control::pb::ShowTasksRequest; -use databend_common_config::GlobalConfig; -use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_sql::plans::ShowTasksPlan; use databend_common_storages_system::parse_tasks_to_datablock; -use crate::interpreters::common::get_task_client_config; +use crate::interpreters::task::TaskInterpreter; +use crate::interpreters::task::TaskInterpreterFactory; use crate::interpreters::Interpreter; use crate::pipelines::PipelineBuildResult; use crate::sessions::QueryContext; @@ -40,27 +36,7 @@ impl ShowTasksInterpreter { } } -impl ShowTasksInterpreter { - async fn build_request(&self) -> Result { - let plan = self.plan.clone(); - let available_roles = self - .ctx - .get_current_session() - .get_all_available_roles() - .await?; - let req = ShowTasksRequest { - tenant_id: plan.tenant.tenant_name().to_string(), - name_like: "".to_string(), - result_limit: 10000, // TODO: use plan.limit pushdown - owners: available_roles - .into_iter() - .map(|x| x.identity().to_string()) - .collect(), - task_ids: vec![], - }; - Ok(req) - } -} +impl ShowTasksInterpreter {} #[async_trait::async_trait] impl Interpreter for ShowTasksInterpreter { @@ -75,20 +51,9 @@ impl Interpreter for ShowTasksInterpreter { #[fastrace::trace] #[async_backtrace::framed] async fn execute2(&self) -> Result { - let config = GlobalConfig::instance(); - if config.query.cloud_control_grpc_server_address.is_none() { - return Err(ErrorCode::CloudControlNotEnabled( - "cannot drop task without cloud control enabled, please set cloud_control_grpc_server_address in config", - )); - } - let cloud_api = CloudControlApiProvider::instance(); - let task_client = cloud_api.get_task_client(); - let req = self.build_request().await?; - let config = get_task_client_config(self.ctx.clone(), cloud_api.get_timeout())?; - let req = make_request(req, config); - - let resp = task_client.show_tasks(req).await?; - let tasks = resp.tasks; + let tasks = TaskInterpreterFactory::build() + .show_tasks(&self.ctx, &self.plan) + .await?; let result = parse_tasks_to_datablock(tasks)?; PipelineBuildResult::from_blocks(vec![result]) diff --git a/src/query/service/src/interpreters/mod.rs b/src/query/service/src/interpreters/mod.rs index d221ac3d8c6b8..dfcfd4f2fe572 100644 --- a/src/query/service/src/interpreters/mod.rs +++ b/src/query/service/src/interpreters/mod.rs @@ -170,6 +170,7 @@ mod interpreter_view_create; mod interpreter_view_describe; mod interpreter_view_drop; mod interpreter_virtual_column_refresh; +mod task; mod util; pub use access::ManagementModeAccess; diff --git a/src/query/service/src/interpreters/task/cloud.rs b/src/query/service/src/interpreters/task/cloud.rs new file mode 100644 index 0000000000000..0e389638ee955 --- /dev/null +++ b/src/query/service/src/interpreters/task/cloud.rs @@ -0,0 +1,358 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use databend_common_ast::ast::AlterTaskOptions; +use databend_common_ast::ast::TaskSql; +use databend_common_catalog::table_context::TableContext; +use databend_common_cloud_control::client_config::make_request; +use databend_common_cloud_control::cloud_api::CloudControlApiProvider; +use databend_common_cloud_control::pb; +use databend_common_cloud_control::pb::alter_task_request::AlterTaskType; +use databend_common_cloud_control::pb::AlterTaskRequest; +use databend_common_cloud_control::pb::CreateTaskRequest; +use databend_common_cloud_control::pb::DescribeTaskRequest; +use databend_common_cloud_control::pb::DropTaskRequest; +use databend_common_cloud_control::pb::ExecuteTaskRequest; +use databend_common_cloud_control::pb::ShowTasksRequest; +use databend_common_cloud_control::pb::WarehouseOptions; +use databend_common_cloud_control::task_utils; +use databend_common_config::GlobalConfig; +use databend_common_exception::ErrorCode; +use databend_common_exception::Result; +use databend_common_settings::DefaultSettings; +use databend_common_settings::SettingScope; +use databend_common_sql::plans::AlterTaskPlan; +use databend_common_sql::plans::CreateTaskPlan; +use databend_common_sql::plans::DescribeTaskPlan; +use databend_common_sql::plans::DropTaskPlan; +use databend_common_sql::plans::ExecuteTaskPlan; +use databend_common_sql::plans::ShowTasksPlan; + +use crate::interpreters::common::get_task_client_config; +use crate::interpreters::common::make_schedule_options; +use crate::interpreters::common::make_warehouse_options; +use crate::interpreters::task::TaskInterpreter; +use crate::sessions::QueryContext; + +pub(crate) struct CloudTaskInterpreter; + +impl CloudTaskInterpreter { + fn validate_create_session_parameters(plan: &CreateTaskPlan) -> Result<()> { + let session_parameters = plan.session_parameters.clone(); + for (key, _) in session_parameters.iter() { + DefaultSettings::check_setting_scope(key, SettingScope::Session)?; + } + Ok(()) + } + + fn validate_alter_session_parameters(plan: &AlterTaskPlan) -> Result<()> { + if let AlterTaskOptions::Set { + session_parameters: Some(session_parameters), + .. + } = &plan.alter_options + { + for (key, _) in session_parameters.iter() { + DefaultSettings::check_setting_scope(key, SettingScope::Session)?; + } + } + Ok(()) + } + + fn build_create_request(ctx: &Arc, plan: &CreateTaskPlan) -> CreateTaskRequest { + let plan = plan.clone(); + let owner = ctx + .get_current_role() + .unwrap_or_default() + .identity() + .to_string(); + let mut req = CreateTaskRequest { + task_name: plan.task_name, + tenant_id: plan.tenant.tenant_name().to_string(), + query_text: "".to_string(), + owner, + comment: plan.comment, + schedule_options: plan.schedule_opts.map(make_schedule_options), + warehouse_options: Some(make_warehouse_options(plan.warehouse)), + error_integration: plan.error_integration, + task_sql_type: 0, + suspend_task_after_num_failures: plan.suspend_task_after_num_failures.map(|x| x as i32), + if_not_exist: plan.create_option.if_not_exist(), + after: plan.after, + when_condition: plan.when_condition, + session_parameters: plan.session_parameters, + script_sql: None, + }; + match plan.sql { + TaskSql::SingleStatement(stmt) => { + req.task_sql_type = i32::from(pb::TaskSqlType::Sql); + req.query_text = stmt; + } + TaskSql::ScriptBlock(ref sqls) => { + req.task_sql_type = i32::from(pb::TaskSqlType::Script); + req.query_text = format!("{}", plan.sql); + req.script_sql = Some(pb::ScriptSql { sqls: sqls.clone() }) + } + } + req + } + + fn build_alter_request(ctx: &Arc, plan: &AlterTaskPlan) -> AlterTaskRequest { + let plan = plan.clone(); + let owner = ctx + .get_current_role() + .unwrap_or_default() + .identity() + .to_string(); + let mut req = AlterTaskRequest { + task_name: plan.task_name, + tenant_id: plan.tenant.tenant_name().to_string(), + owner, + alter_task_type: 0, + if_exist: plan.if_exists, + error_integration: None, + task_sql_type: 0, + query_text: None, + comment: None, + schedule_options: None, + warehouse_options: None, + suspend_task_after_num_failures: None, + when_condition: None, + add_after: vec![], + remove_after: vec![], + set_session_parameters: false, + session_parameters: Default::default(), + script_sql: None, + }; + match plan.alter_options { + AlterTaskOptions::Resume => { + req.alter_task_type = AlterTaskType::Resume as i32; + } + AlterTaskOptions::Suspend => { + req.alter_task_type = AlterTaskType::Suspend as i32; + } + AlterTaskOptions::Set { + schedule, + comments, + warehouse, + suspend_task_after_num_failures, + error_integration, + session_parameters, + } => { + req.alter_task_type = AlterTaskType::Set as i32; + req.schedule_options = schedule.map(make_schedule_options); + req.comment = comments; + req.warehouse_options = warehouse.map(|w| WarehouseOptions { + warehouse: Some(w), + using_warehouse_size: None, + }); + req.error_integration = error_integration; + req.suspend_task_after_num_failures = + suspend_task_after_num_failures.map(|i| i as i32); + if let Some(session_parameters) = session_parameters { + req.set_session_parameters = true; + req.session_parameters = session_parameters; + } + } + AlterTaskOptions::Unset { .. } => { + todo!() + } + AlterTaskOptions::ModifyAs(sql) => { + req.alter_task_type = AlterTaskType::ModifyAs as i32; + match sql { + TaskSql::SingleStatement(stmt) => { + req.task_sql_type = i32::from(pb::TaskSqlType::Sql); + req.query_text = Some(stmt); + } + TaskSql::ScriptBlock(ref sqls) => { + req.task_sql_type = i32::from(pb::TaskSqlType::Script); + req.query_text = Some(format!("{}", sql)); + req.script_sql = Some(pb::ScriptSql { sqls: sqls.clone() }) + } + } + } + AlterTaskOptions::AddAfter(tasks) => { + req.alter_task_type = AlterTaskType::AddAfter as i32; + req.add_after = tasks; + } + AlterTaskOptions::RemoveAfter(tasks) => { + req.alter_task_type = AlterTaskType::RemoveAfter as i32; + req.remove_after = tasks; + } + AlterTaskOptions::ModifyWhen(sql) => { + req.alter_task_type = AlterTaskType::ModifyWhen as i32; + req.when_condition = Some(sql.to_string()); + } + } + req + } + + async fn build_show_tasks_request( + ctx: &Arc, + plan: &ShowTasksPlan, + ) -> Result { + let plan = plan.clone(); + let available_roles = ctx.get_current_session().get_all_available_roles().await?; + let req = ShowTasksRequest { + tenant_id: plan.tenant.tenant_name().to_string(), + name_like: "".to_string(), + result_limit: 10000, // TODO: use plan.limit pushdown + owners: available_roles + .into_iter() + .map(|x| x.identity().to_string()) + .collect(), + task_ids: vec![], + }; + Ok(req) + } +} + +impl TaskInterpreter for CloudTaskInterpreter { + async fn create_task(&self, ctx: &Arc, plan: &CreateTaskPlan) -> Result<()> { + let config = GlobalConfig::instance(); + if config.query.cloud_control_grpc_server_address.is_none() { + return Err(ErrorCode::CloudControlNotEnabled( + "cannot create task without cloud control enabled, please set cloud_control_grpc_server_address in config", + )); + } + Self::validate_create_session_parameters(plan)?; + let cloud_api = CloudControlApiProvider::instance(); + let task_client = cloud_api.get_task_client(); + let req = Self::build_create_request(ctx, plan); + let config = get_task_client_config(ctx.clone(), cloud_api.get_timeout())?; + let req = make_request(req, config.clone()); + + // cloud don't support create or replace, let's remove the task in previous + if plan.create_option.is_overriding() { + let drop_req = DropTaskRequest { + task_name: plan.task_name.clone(), + tenant_id: plan.tenant.tenant_name().to_string(), + if_exist: true, + }; + let drop_req = make_request(drop_req, config); + task_client.drop_task(drop_req).await?; + } + + task_client.create_task(req).await?; + Ok(()) + } + + async fn execute_task(&self, ctx: &Arc, plan: &ExecuteTaskPlan) -> Result<()> { + let config = GlobalConfig::instance(); + if config.query.cloud_control_grpc_server_address.is_none() { + return Err(ErrorCode::CloudControlNotEnabled( + "cannot execute task without cloud control enabled, please set cloud_control_grpc_server_address in config", + )); + } + let cloud_api = CloudControlApiProvider::instance(); + let task_client = cloud_api.get_task_client(); + let req = ExecuteTaskRequest { + task_name: plan.task_name.clone(), + tenant_id: plan.tenant.tenant_name().to_string(), + }; + let config = get_task_client_config(ctx.clone(), cloud_api.get_timeout())?; + let req = make_request(req, config); + + task_client.execute_task(req).await?; + + Ok(()) + } + + async fn alter_task(&self, ctx: &Arc, plan: &AlterTaskPlan) -> Result<()> { + let config = GlobalConfig::instance(); + if config.query.cloud_control_grpc_server_address.is_none() { + return Err(ErrorCode::CloudControlNotEnabled( + "cannot alter task without cloud control enabled, please set cloud_control_grpc_server_address in config", + )); + } + Self::validate_alter_session_parameters(plan)?; + let cloud_api = CloudControlApiProvider::instance(); + let task_client = cloud_api.get_task_client(); + let req = Self::build_alter_request(ctx, plan); + let config = get_task_client_config(ctx.clone(), cloud_api.get_timeout())?; + let req = make_request(req, config); + task_client.alter_task(req).await?; + + Ok(()) + } + + async fn describe_task( + &self, + ctx: &Arc, + plan: &DescribeTaskPlan, + ) -> Result> { + let config = GlobalConfig::instance(); + if config.query.cloud_control_grpc_server_address.is_none() { + return Err(ErrorCode::CloudControlNotEnabled( + "cannot describe task without cloud control enabled, please set cloud_control_grpc_server_address in config", + )); + } + let cloud_api = CloudControlApiProvider::instance(); + let task_client = cloud_api.get_task_client(); + let plan = plan.clone(); + let req = DescribeTaskRequest { + task_name: plan.task_name, + tenant_id: plan.tenant.tenant_name().to_string(), + if_exist: false, + }; + let config = get_task_client_config(ctx.clone(), cloud_api.get_timeout())?; + let req = make_request(req, config); + let resp = task_client.describe_task(req).await?; + + resp.task.map(task_utils::Task::try_from).transpose() + } + + async fn drop_task(&self, ctx: &Arc, plan: &DropTaskPlan) -> Result<()> { + let config = GlobalConfig::instance(); + if config.query.cloud_control_grpc_server_address.is_none() { + return Err(ErrorCode::CloudControlNotEnabled( + "cannot drop task without cloud control enabled, please set cloud_control_grpc_server_address in config", + )); + } + let cloud_api = CloudControlApiProvider::instance(); + let task_client = cloud_api.get_task_client(); + let req = DropTaskRequest { + task_name: plan.task_name.clone(), + tenant_id: plan.tenant.tenant_name().to_string(), + if_exist: plan.if_exists, + }; + let config = get_task_client_config(ctx.clone(), cloud_api.get_timeout())?; + let req = make_request(req, config); + task_client.drop_task(req).await?; + + Ok(()) + } + + async fn show_tasks( + &self, + ctx: &Arc, + plan: &ShowTasksPlan, + ) -> Result> { + let config = GlobalConfig::instance(); + if config.query.cloud_control_grpc_server_address.is_none() { + return Err(ErrorCode::CloudControlNotEnabled( + "cannot drop task without cloud control enabled, please set cloud_control_grpc_server_address in config", + )); + } + let cloud_api = CloudControlApiProvider::instance(); + let task_client = cloud_api.get_task_client(); + let req = Self::build_show_tasks_request(ctx, plan).await?; + let config = get_task_client_config(ctx.clone(), cloud_api.get_timeout())?; + let req = make_request(req, config); + + let resp = task_client.show_tasks(req).await?; + resp.tasks.into_iter().map(|t| t.try_into()).try_collect() + } +} diff --git a/src/query/service/src/interpreters/task/mod.rs b/src/query/service/src/interpreters/task/mod.rs new file mode 100644 index 0000000000000..bed1f89a53639 --- /dev/null +++ b/src/query/service/src/interpreters/task/mod.rs @@ -0,0 +1,124 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use databend_common_cloud_control::task_utils; +use databend_common_config::GlobalConfig; +use databend_common_exception::Result; +use databend_common_sql::plans::AlterTaskPlan; +use databend_common_sql::plans::CreateTaskPlan; +use databend_common_sql::plans::DescribeTaskPlan; +use databend_common_sql::plans::DropTaskPlan; +use databend_common_sql::plans::ExecuteTaskPlan; +use databend_common_sql::plans::ShowTasksPlan; + +use crate::interpreters::task::cloud::CloudTaskInterpreter; +use crate::interpreters::task::private::PrivateTaskInterpreter; +use crate::sessions::QueryContext; + +mod cloud; +mod private; + +pub(crate) struct TaskInterpreterFactory; + +impl TaskInterpreterFactory { + pub fn build() -> TaskInterpreterImpl { + // TODO: for test + if true { + // if GlobalConfig::instance().query.enable_private_task { + return TaskInterpreterImpl::Private(PrivateTaskInterpreter); + } + TaskInterpreterImpl::Cloud(CloudTaskInterpreter) + } +} + +pub(crate) enum TaskInterpreterImpl { + Cloud(CloudTaskInterpreter), + Private(PrivateTaskInterpreter), +} + +pub(crate) trait TaskInterpreter { + async fn create_task(&self, ctx: &Arc, plan: &CreateTaskPlan) -> Result<()>; + + async fn execute_task(&self, ctx: &Arc, plan: &ExecuteTaskPlan) -> Result<()>; + + async fn alter_task(&self, ctx: &Arc, plan: &AlterTaskPlan) -> Result<()>; + + async fn describe_task( + &self, + ctx: &Arc, + plan: &DescribeTaskPlan, + ) -> Result>; + + async fn drop_task(&self, ctx: &Arc, plan: &DropTaskPlan) -> Result<()>; + + async fn show_tasks( + &self, + ctx: &Arc, + plan: &ShowTasksPlan, + ) -> Result>; +} + +impl TaskInterpreter for TaskInterpreterImpl { + async fn create_task(&self, ctx: &Arc, plan: &CreateTaskPlan) -> Result<()> { + match self { + TaskInterpreterImpl::Cloud(interpreter) => interpreter.create_task(ctx, plan).await, + TaskInterpreterImpl::Private(interpreter) => interpreter.create_task(ctx, plan).await, + } + } + + async fn execute_task(&self, ctx: &Arc, plan: &ExecuteTaskPlan) -> Result<()> { + match self { + TaskInterpreterImpl::Cloud(interpreter) => interpreter.execute_task(ctx, plan).await, + TaskInterpreterImpl::Private(interpreter) => interpreter.execute_task(ctx, plan).await, + } + } + + async fn alter_task(&self, ctx: &Arc, plan: &AlterTaskPlan) -> Result<()> { + match self { + TaskInterpreterImpl::Cloud(interpreter) => interpreter.alter_task(ctx, plan).await, + TaskInterpreterImpl::Private(interpreter) => interpreter.alter_task(ctx, plan).await, + } + } + + async fn describe_task( + &self, + ctx: &Arc, + plan: &DescribeTaskPlan, + ) -> Result> { + match self { + TaskInterpreterImpl::Cloud(interpreter) => interpreter.describe_task(ctx, plan).await, + TaskInterpreterImpl::Private(interpreter) => interpreter.describe_task(ctx, plan).await, + } + } + + async fn drop_task(&self, ctx: &Arc, plan: &DropTaskPlan) -> Result<()> { + match self { + TaskInterpreterImpl::Cloud(interpreter) => interpreter.drop_task(ctx, plan).await, + TaskInterpreterImpl::Private(interpreter) => interpreter.drop_task(ctx, plan).await, + } + } + + async fn show_tasks( + &self, + ctx: &Arc, + plan: &ShowTasksPlan, + ) -> Result> { + match self { + TaskInterpreterImpl::Cloud(interpreter) => interpreter.show_tasks(ctx, plan).await, + TaskInterpreterImpl::Private(interpreter) => interpreter.show_tasks(ctx, plan).await, + } + } +} diff --git a/src/query/service/src/interpreters/task/private.rs b/src/query/service/src/interpreters/task/private.rs new file mode 100644 index 0000000000000..9012c1becf74e --- /dev/null +++ b/src/query/service/src/interpreters/task/private.rs @@ -0,0 +1,171 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use chrono::Utc; +use databend_common_ast::ast::TaskSql; +use databend_common_catalog::table_context::TableContext; +use databend_common_cloud_control::pb; +use databend_common_cloud_control::task_utils; +use databend_common_exception::Result; +use databend_common_management::task::TaskMgr; +use databend_common_meta_app::principal::task::EMPTY_TASK_ID; +use databend_common_meta_app::principal::Status; +use databend_common_meta_app::principal::Task; +use databend_common_sql::plans::AlterTaskPlan; +use databend_common_sql::plans::CreateTaskPlan; +use databend_common_sql::plans::DescribeTaskPlan; +use databend_common_sql::plans::DropTaskPlan; +use databend_common_sql::plans::ExecuteTaskPlan; +use databend_common_sql::plans::ShowTasksPlan; +use databend_common_users::UserApiProvider; + +use crate::interpreters::task::TaskInterpreter; +use crate::sessions::QueryContext; + +pub(crate) struct PrivateTaskInterpreter; + +impl PrivateTaskInterpreter { + fn task_trans(task: Task) -> Result { + Ok(task_utils::Task { + task_id: task.task_id, + task_name: task.task_name, + query_text: task.query_text, + condition_text: task.when_condition.unwrap_or_default(), + after: task.after, + comment: task.comment, + owner: task.owner, + schedule_options: task + .schedule_options + .map(|schedule_options| { + let options = pb::ScheduleOptions { + interval: schedule_options.interval, + cron: schedule_options.cron, + time_zone: schedule_options.time_zone, + schedule_type: schedule_options.schedule_type as i32, + milliseconds_interval: schedule_options.milliseconds_interval, + }; + task_utils::format_schedule_options(&options) + }) + .transpose()?, + warehouse_options: task.warehouse_options.map(|warehouse_options| { + pb::WarehouseOptions { + warehouse: warehouse_options.warehouse, + using_warehouse_size: warehouse_options.using_warehouse_size, + } + }), + next_scheduled_at: task.next_scheduled_at, + suspend_task_after_num_failures: task.suspend_task_after_num_failures.map(|i| i as i32), + error_integration: task.error_integration, + status: match task.status { + Status::Suspended => task_utils::Status::Suspended, + Status::Started => task_utils::Status::Started, + }, + created_at: task.created_at, + updated_at: task.updated_at, + last_suspended_at: task.last_suspended_at, + session_params: task.session_params, + }) + } +} + +impl TaskInterpreter for PrivateTaskInterpreter { + async fn create_task(&self, ctx: &Arc, plan: &CreateTaskPlan) -> Result<()> { + let plan = plan.clone(); + + let owner = ctx + .get_current_role() + .unwrap_or_default() + .identity() + .to_string(); + + let task = databend_common_meta_app::principal::Task { + task_id: EMPTY_TASK_ID, + task_name: plan.task_name, + query_text: match plan.sql { + TaskSql::SingleStatement(s) => s, + TaskSql::ScriptBlock(_) => format!("{}", plan.sql), + }, + when_condition: plan.when_condition, + after: plan.after.clone(), + comment: plan.comment.clone(), + owner, + owner_user: ctx.get_current_user()?.identity().encode(), + schedule_options: plan.schedule_opts.map(TaskMgr::make_schedule_options), + warehouse_options: Some(TaskMgr::make_warehouse_options(plan.warehouse)), + next_scheduled_at: None, + suspend_task_after_num_failures: plan.suspend_task_after_num_failures, + error_integration: plan.error_integration.clone(), + status: Status::Suspended, + created_at: Utc::now(), + updated_at: Utc::now(), + last_suspended_at: None, + session_params: plan.session_parameters.clone(), + }; + UserApiProvider::instance() + .create_task(&plan.tenant, task, &plan.create_option) + .await?; + + Ok(()) + } + + async fn execute_task(&self, _ctx: &Arc, plan: &ExecuteTaskPlan) -> Result<()> { + UserApiProvider::instance() + .execute_task(&plan.tenant, &plan.task_name) + .await?; + + Ok(()) + } + + async fn alter_task(&self, _ctx: &Arc, plan: &AlterTaskPlan) -> Result<()> { + UserApiProvider::instance() + .alter_task(&plan.tenant, &plan.task_name, &plan.alter_options) + .await?; + + Ok(()) + } + + async fn describe_task( + &self, + _ctx: &Arc, + plan: &DescribeTaskPlan, + ) -> Result> { + let task = UserApiProvider::instance() + .describe_task(&plan.tenant, &plan.task_name) + .await?; + task.map(Self::task_trans).transpose() + } + + async fn drop_task(&self, _ctx: &Arc, plan: &DropTaskPlan) -> Result<()> { + UserApiProvider::instance() + .drop_task(&plan.tenant, &plan.task_name) + .await?; + + Ok(()) + } + + async fn show_tasks( + &self, + _ctx: &Arc, + plan: &ShowTasksPlan, + ) -> Result> { + let tasks = UserApiProvider::instance().show_tasks(&plan.tenant).await?; + + tasks + .into_iter() + .map(Self::task_trans) + .try_collect() + } +} diff --git a/src/query/service/src/lib.rs b/src/query/service/src/lib.rs index a6115a346ab93..e9b463e35b87a 100644 --- a/src/query/service/src/lib.rs +++ b/src/query/service/src/lib.rs @@ -61,6 +61,7 @@ pub mod table_functions; pub mod test_kits; mod global_services; +mod task_service; pub use databend_common_sql as sql; pub use databend_common_storages_factory as storages; diff --git a/src/query/service/src/task_service.rs b/src/query/service/src/task_service.rs new file mode 100644 index 0000000000000..7e4b51da2cef6 --- /dev/null +++ b/src/query/service/src/task_service.rs @@ -0,0 +1,322 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; +use std::str::FromStr; +use std::sync::Arc; +use std::time::Duration; + +use chrono::Utc; +use chrono_tz::Tz; +use cron::Schedule; +use databend_common_ast::ast::AlterTaskOptions; +use databend_common_base::runtime::GlobalQueryRuntime; +use databend_common_base::runtime::TrySpawn; +use databend_common_catalog::session_type::SessionType; +use databend_common_exception::ErrorCode; +use databend_common_exception::Result; +use databend_common_expression::DataBlock; +use databend_common_management::task::TaskChannel; +use databend_common_management::task::TaskMessage; +use databend_common_meta_app::principal::task::AfterTaskInfo; +use databend_common_meta_app::principal::task::EMPTY_TASK_ID; +use databend_common_meta_app::principal::AfterTaskState; +use databend_common_meta_app::principal::ScheduleType; +use databend_common_meta_app::principal::State; +use databend_common_meta_app::principal::Status; +use databend_common_meta_app::principal::Task; +use databend_common_meta_app::principal::TaskRun; +use databend_common_meta_app::principal::UserIdentity; +use databend_common_meta_app::principal::UserInfo; +use databend_common_meta_app::schema::CreateOption; +use databend_common_meta_app::tenant::Tenant; +use databend_common_sql::Planner; +use databend_common_users::UserApiProvider; +use futures_util::lock::Mutex; +use futures_util::TryStreamExt; +use tokio::sync::mpsc::Receiver; +use tokio::time::sleep; +use tokio_util::sync::CancellationToken; + +use crate::interpreters::InterpreterFactory; +use crate::sessions::SessionManager; + +pub struct TaskService; + +impl TaskService { + pub fn init(mut task_rx: Receiver, tenant: Tenant) -> Result<()> { + GlobalQueryRuntime::instance() + .runtime() + .try_spawn(async move { + let mut scheduled_tasks: HashMap = HashMap::new(); + let task_mgr = UserApiProvider::instance().task_api(&tenant); + + // If `task_c` is defined as `AFTER task_a, task_b`, then: + // - task_after_infos["task_c"] = AfterTaskInfo { afters: ["task_a, task_b"] } + let mut task_after_infos = HashMap::::new(); + // - task_dep_infos["task_a"]["task_c"] = AfterTaskInfo { ... } + // - task_dep_infos["task_b"]["task_c"] = AfterTaskInfo { ... } + let mut task_dep_infos = HashMap::>::new(); + // after task state => [task1 name, task2 name], if task succeeded then remove task name on after task state + let task_deps= Arc::new(Mutex::new(HashMap::::new())); + + while let Some(task) = task_rx.recv().await { + match task { + TaskMessage::ScheduleTask(mut task) => { + debug_assert!(task.schedule_options.is_some()); + if let Some(schedule_options) = &task.schedule_options { + // clean old task if alter + if let Some(token) = scheduled_tasks.remove(&task.task_name) { + token.cancel(); + } + match task.status { + Status::Suspended => continue, + Status::Started => () + } + + let token = CancellationToken::new(); + let child_token = token.child_token(); + let task_name = task.task_name.to_string(); + let task_mgr = task_mgr.clone(); + + task_mgr.update_task_run(TaskRun { + task: task.clone(), + run_id: Self::make_run_id(), + attempt_number: task.suspend_task_after_num_failures.unwrap_or(0) as i32, + state: State::Scheduled, + scheduled_at: Utc::now(), + completed_at: None, + error_code: 0, + error_message: None, + root_task_id: EMPTY_TASK_ID, + }).await??; + + match schedule_options.schedule_type { + ScheduleType::IntervalType => { + let mut duration = Duration::from_secs(schedule_options.interval.unwrap() as u64); + if let Some(ms) = &schedule_options.milliseconds_interval { + duration += Duration::from_millis(*ms); + } + + GlobalQueryRuntime::instance() + .runtime() + .spawn(async move { + task.next_scheduled_at = Some(Utc::now() + duration); + loop { + task_mgr.create_task(task.clone(), &CreateOption::CreateOrReplace).await??; + tokio::select! { + _ = sleep(duration) => { + let _ = TaskChannel::instance().send(TaskMessage::ExecuteTask(task.clone())); + } + _ = child_token.cancelled() => { + break; + } + } + } + // TODO: log error + Result::Ok(()) + }); + } + ScheduleType::CronType => { + // SAFETY: check on CreateTask + let cron_expr = schedule_options.cron.as_ref().unwrap(); + let tz = schedule_options.time_zone.as_ref().unwrap().parse::().unwrap(); + let schedule = Schedule::from_str(cron_expr).unwrap(); + + GlobalQueryRuntime::instance() + .runtime() + .spawn(async move { + let mut upcoming = schedule.upcoming(tz); + + for next_time in upcoming { + let now = Utc::now(); + let duration = (next_time.with_timezone(&Utc) - now) + .to_std() + .unwrap_or(Duration::ZERO); + + task.next_scheduled_at = Some(Utc::now() + duration); + tokio::select! { + _ = sleep(duration) => { + let _ = TaskChannel::instance().send(TaskMessage::ExecuteTask(task.clone())); + } + _ = child_token.cancelled() => { + break; + } + } + } + }); + } + } + let _ = scheduled_tasks.insert(task_name, token); + } + } + TaskMessage::ExecuteTask(task) => { + let task_name = task.task_name.clone(); + + // TODO: Meta control query is executed serially through watch key + + let mut task_run = task_mgr.lasted_task_run(&task_name).await?? + .unwrap_or_else(|| TaskRun { + task: task.clone(), + run_id: Self::make_run_id(), + attempt_number: task.suspend_task_after_num_failures.unwrap_or(0) as i32, + state: State::Executing, + scheduled_at: Utc::now(), + completed_at: None, + error_code: 0, + error_message: None, + root_task_id: EMPTY_TASK_ID, + }); + task_mgr.update_task_run(task_run.clone()).await??; + + let task_mgr = task_mgr.clone(); + let tenant = tenant.clone(); + let task_dep_infos = task_dep_infos.clone(); + let task_deps = task_deps.clone(); + let owner = Self::get_task_owner(&task, &tenant).await?; + GlobalQueryRuntime::instance() + .runtime() + .try_spawn(async move { + while task_run.attempt_number >= 0 { + let task_result = Self::spawn_task(task.clone(), owner.clone()).await; + + match task_result { + Ok(()) => { + task_run.state = State::Succeeded; + task_run.completed_at = Some(Utc::now()); + task_mgr.update_task_run(task_run.clone()).await??; + + if let Some(info) = task_dep_infos.get(&task_name) { + let mut guard = task_deps.lock().await; + + for (dep_name, dep_info) in info { + if let Some(after_state) = guard.get_mut(dep_info) { + if after_state.completed_task(&task_name) { + *after_state = AfterTaskState::from(dep_info); + let dep_task = task_mgr.describe_task(dep_name).await?? + .ok_or_else(|| ErrorCode::UnknownTask(dep_name.clone()))?; + + let _ = TaskChannel::instance().send(TaskMessage::ExecuteTask(dep_task)); + } + } + } + } + break; + } + Err(err) => { + task_run.state = State::Failed; + task_run.completed_at = Some(Utc::now()); + task_run.attempt_number -= 1; + task_run.error_code = err.code() as i64; + task_run.error_message = Some(err.message()); + task_mgr.update_task_run(task_run.clone()).await??; + task_run.run_id = Self::make_run_id(); + } + } + task_mgr.alter_task(&task.task_name, &AlterTaskOptions::Suspend).await??; + } + + // TODO: log error + Result::Ok(()) + }, None)?; + } + TaskMessage::DeleteTask(task_name) => { + if let Some(deps) = task_dep_infos.get(&task_name) { + if !deps.is_empty() { + continue + // TODO: return delete failed error + } + } + if let Some(token) = scheduled_tasks.remove(&task_name) { + token.cancel(); + } + } + TaskMessage::AfterTask(task) => { + match task.status { + Status::Suspended => continue, + Status::Started => (), + } + // after info + if let Some(info) = task_after_infos.remove(&task.task_name) { + // dep info + for after in info.afters.iter() { + if let Some(dep_tasks) = task_dep_infos.get_mut(after) { + dep_tasks.remove(&task.task_name); + } + } + task_deps + .lock() + .await + .remove(&info); + } + if task.after.is_empty() { + continue; + } + let task_name = task.task_name.clone(); + let info = AfterTaskInfo::from(&task); + // after info + task_after_infos.insert(task_name.clone(), info.clone()); + // dep info + for after_task in task.after.iter() { + task_dep_infos + .entry(after_task.clone()) + .or_default() + .insert(task_name.clone(), info.clone()); + + task_deps + .lock() + .await + .insert(info.clone(), AfterTaskState::from(&info)); + } + } + } + } + + // TODO: log error + Result::Ok(()) + }, None)?; + Ok(()) + } + + async fn get_task_owner(task: &Task, tenant: &Tenant) -> Result { + UserApiProvider::instance() + .get_user( + tenant, + UserIdentity::parse(&task.owner_user).map_err(|e| { + ErrorCode::MetaServiceError(format!("Failed to parse UserIdentity: {}", e)) + })?, + ) + .await + } + + async fn spawn_task(task: Task, user: UserInfo) -> Result<()> { + let session = SessionManager::instance() + .create_session(SessionType::Local) + .await?; + session.set_authed_user(user, None).await?; + let context = Arc::new(session).create_query_context().await?; + + let mut planner = Planner::new(context.clone()); + let (plan, _) = planner.plan_sql(&task.query_text).await?; + let executor = InterpreterFactory::get(context.clone(), &plan).await?; + let stream = executor.execute(context).await?; + let _ = stream.try_collect::>().await?; + + Ok(()) + } + + fn make_run_id() -> u64 { + Utc::now().timestamp_millis() as u64 + } +} diff --git a/src/query/storages/system/src/tasks_table.rs b/src/query/storages/system/src/tasks_table.rs index a44a26e629b81..e9340254f5550 100644 --- a/src/query/storages/system/src/tasks_table.rs +++ b/src/query/storages/system/src/tasks_table.rs @@ -21,7 +21,7 @@ use databend_common_cloud_control::client_config::build_client_config; use databend_common_cloud_control::client_config::make_request; use databend_common_cloud_control::cloud_api::CloudControlApiProvider; use databend_common_cloud_control::pb::ShowTasksRequest; -use databend_common_cloud_control::pb::Task; +use databend_common_cloud_control::task_utils::Task; use databend_common_config::GlobalConfig; use databend_common_exception::ErrorCode; use databend_common_exception::Result; @@ -36,6 +36,7 @@ use databend_common_meta_app::schema::TableIdent; use databend_common_meta_app::schema::TableInfo; use databend_common_meta_app::schema::TableMeta; use databend_common_sql::plans::task_schema; +use itertools::Itertools; use crate::table::AsyncOneBlockSystemTable; use crate::table::AsyncSystemTable; @@ -59,25 +60,24 @@ pub fn parse_tasks_to_datablock(tasks: Vec) -> Result { let mut last_suspended_on: Vec> = Vec::with_capacity(tasks.len()); let mut session_params: Vec>> = Vec::with_capacity(tasks.len()); for task in tasks { - let tsk: databend_common_cloud_control::task_utils::Task = task.try_into()?; - created_on.push(tsk.created_at.timestamp_micros()); - name.push(tsk.task_name); - id.push(tsk.task_id); - owner.push(tsk.owner); - comment.push(tsk.comment); - warehouse.push(tsk.warehouse_options.and_then(|s| s.warehouse)); - schedule.push(tsk.schedule_options); - status.push(tsk.status.to_string()); - definition.push(tsk.query_text); - condition_text.push(tsk.condition_text); + created_on.push(task.created_at.timestamp_micros()); + name.push(task.task_name); + id.push(task.task_id); + owner.push(task.owner); + comment.push(task.comment); + warehouse.push(task.warehouse_options.and_then(|s| s.warehouse)); + schedule.push(task.schedule_options); + status.push(task.status.to_string()); + definition.push(task.query_text); + condition_text.push(task.condition_text); // join by comma - after.push(tsk.after.into_iter().collect::>().join(",")); - suspend_after_num_failures.push(tsk.suspend_task_after_num_failures.map(|v| v as u64)); - error_integration.push(tsk.error_integration); - next_schedule_time.push(tsk.next_scheduled_at.map(|t| t.timestamp_micros())); - last_committed_on.push(tsk.updated_at.timestamp_micros()); - last_suspended_on.push(tsk.last_suspended_at.map(|t| t.timestamp_micros())); - let serialized_params = serde_json::to_vec(&tsk.session_params).unwrap(); + after.push(task.after.into_iter().collect::>().join(",")); + suspend_after_num_failures.push(task.suspend_task_after_num_failures.map(|v| v as u64)); + error_integration.push(task.error_integration); + next_schedule_time.push(task.next_scheduled_at.map(|t| t.timestamp_micros())); + last_committed_on.push(task.updated_at.timestamp_micros()); + last_suspended_on.push(task.last_suspended_at.map(|t| t.timestamp_micros())); + let serialized_params = serde_json::to_vec(&task.session_params).unwrap(); session_params.push(Some(serialized_params)); } @@ -155,7 +155,7 @@ impl AsyncSystemTable for TasksTable { let resp = task_client.show_tasks(req).await?; let tasks = resp.tasks; - parse_tasks_to_datablock(tasks) + parse_tasks_to_datablock(tasks.into_iter().map(Task::try_from).try_collect()?) } } diff --git a/src/query/users/Cargo.toml b/src/query/users/Cargo.toml index ab8100292b179..23ece2e843a86 100644 --- a/src/query/users/Cargo.toml +++ b/src/query/users/Cargo.toml @@ -27,6 +27,7 @@ databend-common-meta-kvapi = { workspace = true } databend-common-meta-store = { workspace = true } databend-common-meta-types = { workspace = true } enumflags2 = { workspace = true } +futures = { workspace = true } itertools = { workspace = true } jwt-simple = { workspace = true } log = { workspace = true } diff --git a/src/query/users/src/lib.rs b/src/query/users/src/lib.rs index 19f9d595ce55e..181309778098d 100644 --- a/src/query/users/src/lib.rs +++ b/src/query/users/src/lib.rs @@ -33,6 +33,7 @@ pub mod connection; pub mod file_format; pub mod role_cache_mgr; pub mod role_util; +mod user_task; pub use jwt::*; pub use password_policy::*; diff --git a/src/query/users/src/user_api.rs b/src/query/users/src/user_api.rs index d3c9c4005acf8..3bd32249fb2ed 100644 --- a/src/query/users/src/user_api.rs +++ b/src/query/users/src/user_api.rs @@ -23,6 +23,9 @@ use databend_common_config::GlobalConfig; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_grpc::RpcClientConf; +use databend_common_management::task::TaskChannel; +use databend_common_management::task::TaskMessage; +use databend_common_management::task::TaskMgr; use databend_common_management::udf::UdfMgr; use databend_common_management::ClientSessionMgr; use databend_common_management::ConnectionMgr; @@ -38,17 +41,21 @@ use databend_common_management::StageApi; use databend_common_management::StageMgr; use databend_common_management::UserApi; use databend_common_management::UserMgr; +use databend_common_meta_api::kv_pb_api::KVPbApi; use databend_common_meta_app::principal::AuthInfo; use databend_common_meta_app::principal::RoleInfo; +use databend_common_meta_app::principal::TaskIdent; use databend_common_meta_app::principal::UserDefinedFunction; use databend_common_meta_app::tenant::Tenant; use databend_common_meta_app::tenant::TenantQuota; use databend_common_meta_cache::Cache; use databend_common_meta_kvapi::kvapi; +use databend_common_meta_kvapi::kvapi::DirName; use databend_common_meta_store::MetaStore; use databend_common_meta_store::MetaStoreProvider; use databend_common_meta_types::MatchSeq; use databend_common_meta_types::MetaError; +use futures::stream::StreamExt; use log::debug; use crate::builtin::BuiltIn; @@ -137,6 +144,22 @@ impl UserApiProvider { user_mgr.add_role(tenant, public, true).await?; } + { + let task_tx = TaskChannel::instance(); + let key = DirName::new(TaskIdent::new(tenant, "")); + let mut stream = user_mgr.client.list_pb_values(&key).await?; + + while let Some(task) = stream.next().await { + let task = task?; + + if task.schedule_options.is_some() { + let _ = task_tx.send(TaskMessage::ScheduleTask(task)); + } else if !task.after.is_empty() { + let _ = task_tx.send(TaskMessage::AfterTask(task)); + } + } + } + Ok(Arc::new(user_mgr)) } @@ -156,6 +179,10 @@ impl UserApiProvider { UdfMgr::create(self.client.clone(), tenant) } + pub fn task_api(&self, tenant: &Tenant) -> TaskMgr { + TaskMgr::create(self.client.clone(), tenant) + } + pub fn user_api(&self, tenant: &Tenant) -> Arc { let user_mgr = UserMgr::create(self.client.clone(), tenant); Arc::new(user_mgr) diff --git a/src/query/users/src/user_task.rs b/src/query/users/src/user_task.rs new file mode 100644 index 0000000000000..58fa175e0dc6b --- /dev/null +++ b/src/query/users/src/user_task.rs @@ -0,0 +1,78 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use databend_common_ast::ast::AlterTaskOptions; +use databend_common_exception::Result; +use databend_common_meta_app::principal::Task; +use databend_common_meta_app::schema::CreateOption; +use databend_common_meta_app::tenant::Tenant; + +use crate::UserApiProvider; + +impl UserApiProvider { + // Add a new Task. + #[async_backtrace::framed] + pub async fn create_task( + &self, + tenant: &Tenant, + task: Task, + create_option: &CreateOption, + ) -> Result<()> { + let task_api = self.task_api(tenant); + task_api.create_task(task, create_option).await??; + Ok(()) + } + + #[async_backtrace::framed] + pub async fn execute_task(&self, tenant: &Tenant, task_name: &str) -> Result<()> { + let task_api = self.task_api(tenant); + task_api.execute_task(task_name).await??; + Ok(()) + } + + #[async_backtrace::framed] + pub async fn alter_task( + &self, + tenant: &Tenant, + task_name: &str, + alter_options: &AlterTaskOptions, + ) -> Result<()> { + let task_api = self.task_api(tenant); + task_api.alter_task(task_name, alter_options).await??; + Ok(()) + } + + #[async_backtrace::framed] + pub async fn describe_task(&self, tenant: &Tenant, task_name: &str) -> Result> { + let task_api = self.task_api(tenant); + let task = task_api.describe_task(task_name).await??; + Ok(task) + } + + #[async_backtrace::framed] + pub async fn drop_task(&self, tenant: &Tenant, task_name: &str) -> Result<()> { + let task_api = self.task_api(tenant); + task_api.drop_task(task_name).await?; + + Ok(()) + } + + #[async_backtrace::framed] + pub async fn show_tasks(&self, tenant: &Tenant) -> Result> { + let task_api = self.task_api(tenant); + let tasks = task_api.list_task().await?; + + Ok(tasks) + } +} From 4ccb7b7a7c3375c70df1ba9fa87fc3d0d9f36dfb Mon Sep 17 00:00:00 2001 From: Kould Date: Tue, 8 Jul 2025 19:15:58 +0800 Subject: [PATCH 02/25] feat: imp when_condition for Task & store TaskRun on system table --- src/binaries/query/entry.rs | 2 + src/common/exception/src/exception_code.rs | 2 + src/query/management/src/task/task_mgr.rs | 70 +- src/query/service/src/global_services.rs | 7 +- .../service/src/interpreters/task/mod.rs | 3 +- .../service/src/interpreters/task/private.rs | 5 +- src/query/service/src/lib.rs | 2 +- src/query/service/src/task/meta.rs | 133 +++ src/query/service/src/task/mod.rs | 19 + src/query/service/src/task/session.rs | 49 + src/query/service/src/task/task_service.rs | 907 ++++++++++++++++++ src/query/service/src/task_service.rs | 322 ------- src/query/users/src/user_api.rs | 4 +- 13 files changed, 1140 insertions(+), 385 deletions(-) create mode 100644 src/query/service/src/task/meta.rs create mode 100644 src/query/service/src/task/mod.rs create mode 100644 src/query/service/src/task/session.rs create mode 100644 src/query/service/src/task/task_service.rs delete mode 100644 src/query/service/src/task_service.rs diff --git a/src/binaries/query/entry.rs b/src/binaries/query/entry.rs index 29cae01359ad0..fec1cfdde5559 100644 --- a/src/binaries/query/entry.rs +++ b/src/binaries/query/entry.rs @@ -44,6 +44,7 @@ use databend_query::servers::MySQLHandler; use databend_query::servers::MySQLTlsConfig; use databend_query::servers::Server; use databend_query::servers::ShutdownHandle; +use databend_query::task::TaskService; use databend_query::GlobalServices; use log::info; @@ -302,6 +303,7 @@ pub async fn start_services(conf: &InnerConfig) -> Result<(), MainError> { } println!(" system history tables: {}", conf.log.history); } + TaskService::instance().initialized(); println!(); println!( diff --git a/src/common/exception/src/exception_code.rs b/src/common/exception/src/exception_code.rs index c01f97c6031ad..28a9d88e4b921 100644 --- a/src/common/exception/src/exception_code.rs +++ b/src/common/exception/src/exception_code.rs @@ -403,6 +403,8 @@ build_exceptions! { TaskCronInvalid(2614), /// Task schedule and after conflict TaskScheduleAndAfterConflict(2615), + /// Task when condition not met + TaskWhenConditionNotMet(2616), } // Search and External Service Errors [1901-1903, 1910] diff --git a/src/query/management/src/task/task_mgr.rs b/src/query/management/src/task/task_mgr.rs index ac8b3cf100376..96fef417d031b 100644 --- a/src/query/management/src/task/task_mgr.rs +++ b/src/query/management/src/task/task_mgr.rs @@ -103,13 +103,21 @@ impl TaskMgr { ) -> Result, TaskApiError> { task.created_at = Utc::now(); - self.create_task_inner(task, create_option).await + self.create_task_inner(task, create_option, false).await + } + + #[async_backtrace::framed] + #[fastrace::trace] + pub async fn update_task(&self, task: Task) -> Result, TaskApiError> { + self.create_task_inner(task, &CreateOption::CreateOrReplace, true) + .await } async fn create_task_inner( &self, task: Task, create_option: &CreateOption, + without_schedule: bool, ) -> Result, TaskApiError> { assert!(task.after.is_empty() || task.schedule_options.is_none()); // check @@ -152,9 +160,13 @@ impl TaskMgr { } } if !task.after.is_empty() { - let _ = TaskChannel::instance().send(TaskMessage::AfterTask(task)); - } else if task.schedule_options.is_some() { - let _ = TaskChannel::instance().send(TaskMessage::ScheduleTask(task)); + let _ = TaskChannel::instance() + .send(TaskMessage::AfterTask(task)) + .await; + } else if task.schedule_options.is_some() && !without_schedule { + let _ = TaskChannel::instance() + .send(TaskMessage::ScheduleTask(task)) + .await; } Ok(Ok(())) @@ -174,7 +186,9 @@ impl TaskMgr { context: "while execute task".to_string(), })); }; - let _ = TaskChannel::instance().send(TaskMessage::ExecuteTask(Task::clone(&task))); + let _ = TaskChannel::instance() + .send(TaskMessage::ExecuteTask(Task::clone(&task))) + .await; Ok(Ok(())) } @@ -256,7 +270,7 @@ impl TaskMgr { } } if let Err(e) = self - .create_task_inner(task, &CreateOption::CreateOrReplace) + .create_task_inner(task, &CreateOption::CreateOrReplace, false) .await? { return Ok(Err(TaskError::NotFound { @@ -322,50 +336,6 @@ impl TaskMgr { Ok(tasks) } - #[async_backtrace::framed] - #[fastrace::trace] - pub async fn update_task_run( - &self, - task_run: TaskRun, - ) -> Result, TaskApiError> { - let seq = MatchSeq::from(CreateOption::CreateOrReplace); - let key = TaskRunIdent::new(&self.tenant, task_run.key()); - let req = UpsertPB::insert(key, task_run.clone()).with(seq); - let _ = self.kv_api.upsert_pb(&req).await?; - - Ok(Ok(())) - } - - #[async_backtrace::framed] - #[fastrace::trace] - pub async fn lasted_task_run( - &self, - task_name: &str, - ) -> Result, TaskError>, TaskApiError> { - let key = DirName::new(TaskRunIdent::new(&self.tenant, task_name)); - let result = self - .kv_api - .list_pb_values(&key) - .await? - .next() - .await - .transpose()?; - - Ok(Ok(result)) - } - - #[async_backtrace::framed] - #[fastrace::trace] - pub async fn show_task_runs_full( - &self, - task_name: &str, - ) -> Result, TaskApiError>, TaskApiError> { - let key = DirName::new(TaskRunIdent::new(&self.tenant, task_name)); - let stream = self.kv_api.list_pb_values(&key).await?; - - Ok(Ok(stream.try_collect().await?)) - } - pub fn make_schedule_options(opt: ScheduleOptions) -> task::ScheduleOptions { match opt { ScheduleOptions::IntervalSecs(secs, ms) => { diff --git a/src/query/service/src/global_services.rs b/src/query/service/src/global_services.rs index 752aa73574cbd..43d8393ffe46d 100644 --- a/src/query/service/src/global_services.rs +++ b/src/query/service/src/global_services.rs @@ -57,7 +57,7 @@ use crate::servers::http::v1::ClientSessionManager; use crate::servers::http::v1::HttpQueryManager; use crate::sessions::QueriesQueueManager; use crate::sessions::SessionManager; -use crate::task_service::TaskService; +use crate::task::task_service::TaskService; pub struct GlobalServices; @@ -122,8 +122,7 @@ impl GlobalServices { SessionManager::init(config)?; LockManager::init()?; AuthMgr::init(config)?; - - + let task_rx = TaskChannel::init(GlobalConfig::instance().query.tasks_channel_len)?; // Init user manager. // Builtin users and udfs are created here. @@ -145,7 +144,6 @@ impl GlobalServices { ) .await?; } - TaskService::init(task_rx, config.query.tenant_id.clone())?; RoleCacheManager::init()?; DataOperator::init(&config.storage, config.spill.storage_params.clone()).await?; @@ -179,6 +177,7 @@ impl GlobalServices { if config.log.history.on { GlobalHistoryLog::init(config).await?; } + TaskService::init(task_rx, config)?; GLOBAL_QUERIES_MANAGER.set_gc_handle(memory_gc_handle); diff --git a/src/query/service/src/interpreters/task/mod.rs b/src/query/service/src/interpreters/task/mod.rs index bed1f89a53639..9d18aefbf52f6 100644 --- a/src/query/service/src/interpreters/task/mod.rs +++ b/src/query/service/src/interpreters/task/mod.rs @@ -15,7 +15,6 @@ use std::sync::Arc; use databend_common_cloud_control::task_utils; -use databend_common_config::GlobalConfig; use databend_common_exception::Result; use databend_common_sql::plans::AlterTaskPlan; use databend_common_sql::plans::CreateTaskPlan; @@ -37,7 +36,7 @@ impl TaskInterpreterFactory { pub fn build() -> TaskInterpreterImpl { // TODO: for test if true { - // if GlobalConfig::instance().query.enable_private_task { + // if GlobalConfig::instance().query.enable_private_task { return TaskInterpreterImpl::Private(PrivateTaskInterpreter); } TaskInterpreterImpl::Cloud(CloudTaskInterpreter) diff --git a/src/query/service/src/interpreters/task/private.rs b/src/query/service/src/interpreters/task/private.rs index 9012c1becf74e..0322ddeb0ff86 100644 --- a/src/query/service/src/interpreters/task/private.rs +++ b/src/query/service/src/interpreters/task/private.rs @@ -163,9 +163,6 @@ impl TaskInterpreter for PrivateTaskInterpreter { ) -> Result> { let tasks = UserApiProvider::instance().show_tasks(&plan.tenant).await?; - tasks - .into_iter() - .map(Self::task_trans) - .try_collect() + tasks.into_iter().map(Self::task_trans).try_collect() } } diff --git a/src/query/service/src/lib.rs b/src/query/service/src/lib.rs index e9b463e35b87a..69a9ba3b420e5 100644 --- a/src/query/service/src/lib.rs +++ b/src/query/service/src/lib.rs @@ -61,7 +61,7 @@ pub mod table_functions; pub mod test_kits; mod global_services; -mod task_service; +pub mod task; pub use databend_common_sql as sql; pub use databend_common_storages_factory as storages; diff --git a/src/query/service/src/task/meta.rs b/src/query/service/src/task/meta.rs new file mode 100644 index 0000000000000..d4215d9538263 --- /dev/null +++ b/src/query/service/src/task/meta.rs @@ -0,0 +1,133 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; +use std::time::Duration; + +use databend_common_base::runtime::block_on; +use databend_common_exception::Result; +use databend_common_meta_client::ClientHandle; +use databend_common_meta_kvapi::kvapi::KVApi; +use databend_common_meta_semaphore::acquirer::Permit; +use databend_common_meta_semaphore::Semaphore; +use databend_common_meta_types::MatchSeq; +use databend_common_meta_types::Operation; +use databend_common_meta_types::UpsertKV; + +/// refer to [crate::history_tables::meta::PermitGuard] +pub struct PermitGuard { + _permit: Permit, + meta_handle: Arc, + meta_key: String, +} + +impl PermitGuard { + pub fn new(permit: Permit, meta_handle: Arc, meta_key: String) -> Self { + Self { + _permit: permit, + meta_handle, + meta_key, + } + } +} + +impl Drop for PermitGuard { + fn drop(&mut self) { + let meta_handle = self.meta_handle.clone(); + let meta_key = self.meta_key.clone(); + + block_on(async move { + let _ = meta_handle.update_last_execution_timestamp(&meta_key).await; + }); + } +} + +/// refer to [crate::history_tables::meta::HistoryMetaHandle] +pub struct TaskMetaHandle { + meta_client: Arc, + node_id: String, +} + +impl TaskMetaHandle { + pub fn new(meta_client: Arc, node_id: String) -> Self { + Self { + meta_client, + node_id, + } + } + + pub async fn acquire(&self, meta_key: &str, interval: u64) -> Result> { + let acquired_guard = Semaphore::new_acquired( + self.meta_client.clone(), + meta_key, + 1, + self.node_id.clone(), + Duration::from_secs(3), + ) + .await + .map_err(|_e| "acquire semaphore failed from TaskService")?; + if interval == 0 { + return Ok(Some(acquired_guard)); + } + if match self + .meta_client + .get_kv(&format!("{}/last_timestamp", meta_key)) + .await? + { + Some(v) => { + let last: u64 = serde_json::from_slice(&v.data)?; + chrono::Utc::now().timestamp_millis() as u64 + - Duration::from_secs(interval).as_millis() as u64 + > last + } + None => true, + } { + Ok(Some(acquired_guard)) + } else { + drop(acquired_guard); + Ok(None) + } + } + + pub async fn acquire_with_guard( + &self, + meta_key: &str, + interval: u64, + ) -> Result> { + if let Some(permit) = self.acquire(meta_key, interval).await? { + Ok(Some(PermitGuard::new( + permit, + Arc::new(TaskMetaHandle { + meta_client: self.meta_client.clone(), + node_id: self.node_id.clone(), + }), + meta_key.to_string(), + ))) + } else { + Ok(None) + } + } + + pub async fn update_last_execution_timestamp(&self, meta_key: &str) -> Result<()> { + self.meta_client + .upsert_kv(UpsertKV::new( + format!("{}/last_timestamp", meta_key), + MatchSeq::Any, + Operation::Update(serde_json::to_vec(&chrono::Utc::now().timestamp_millis())?), + None, + )) + .await?; + Ok(()) + } +} diff --git a/src/query/service/src/task/mod.rs b/src/query/service/src/task/mod.rs new file mode 100644 index 0000000000000..a6d9ab8d99131 --- /dev/null +++ b/src/query/service/src/task/mod.rs @@ -0,0 +1,19 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod meta; +mod session; +pub mod task_service; + +pub use task_service::TaskService; diff --git a/src/query/service/src/task/session.rs b/src/query/service/src/task/session.rs new file mode 100644 index 0000000000000..1b364d619735c --- /dev/null +++ b/src/query/service/src/task/session.rs @@ -0,0 +1,49 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use databend_common_catalog::session_type::SessionType; +use databend_common_exception::Result; +use databend_common_meta_app::principal::GrantObject; +use databend_common_meta_app::principal::UserInfo; +use databend_common_meta_app::principal::UserPrivilegeType; + +use crate::sessions::Session; +use crate::sessions::SessionManager; + +/// Create a user for task with necessary privileges +pub fn get_task_user(tenant_id: &str, cluster_id: &str) -> UserInfo { + let mut user = UserInfo::new_no_auth( + format!("{}-{}-task", tenant_id, cluster_id).as_str(), + "0.0.0.0", + ); + user.grants.grant_privileges( + &GrantObject::Global, + UserPrivilegeType::CreateDatabase.into(), + ); + user +} + +/// Create a dummy session for task +pub async fn create_session( + user: UserInfo, + restricted_role: Option, +) -> Result> { + let session_manager = SessionManager::instance(); + let dummy_session = session_manager.create_session(SessionType::Dummy).await?; + let session = session_manager.register_session(dummy_session)?; + session.set_authed_user(user, restricted_role).await?; + Ok(session) +} diff --git a/src/query/service/src/task/task_service.rs b/src/query/service/src/task/task_service.rs new file mode 100644 index 0000000000000..96f3c3ac889b0 --- /dev/null +++ b/src/query/service/src/task/task_service.rs @@ -0,0 +1,907 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::BTreeMap; +use std::collections::HashMap; +use std::str::FromStr; +use std::sync::atomic::AtomicBool; +use std::sync::atomic::Ordering; +use std::sync::Arc; +use std::time::Duration; + +use chrono::DateTime; +use chrono::Utc; +use chrono_tz::Tz; +use cron::Schedule; +use databend_common_ast::ast::AlterTaskOptions; +use databend_common_base::base::GlobalInstance; +use databend_common_base::runtime::Runtime; +use databend_common_base::runtime::TrySpawn; +use databend_common_catalog::cluster_info::Cluster; +use databend_common_config::InnerConfig; +use databend_common_exception::ErrorCode; +use databend_common_exception::Result; +use databend_common_expression::DataBlock; +use databend_common_management::task::TaskChannel; +use databend_common_management::task::TaskMessage; +use databend_common_meta_app::principal::task::AfterTaskInfo; +use databend_common_meta_app::principal::task::EMPTY_TASK_ID; +use databend_common_meta_app::principal::AfterTaskState; +use databend_common_meta_app::principal::ScheduleOptions; +use databend_common_meta_app::principal::ScheduleType; +use databend_common_meta_app::principal::State; +use databend_common_meta_app::principal::Status; +use databend_common_meta_app::principal::Task; +use databend_common_meta_app::principal::TaskRun; +use databend_common_meta_app::principal::UserIdentity; +use databend_common_meta_app::principal::UserInfo; +use databend_common_meta_app::principal::WarehouseOptions; +use databend_common_meta_app::tenant::Tenant; +use databend_common_meta_client::MetaGrpcClient; +use databend_common_sql::Planner; +use databend_common_users::UserApiProvider; +use databend_common_users::BUILTIN_ROLE_ACCOUNT_ADMIN; +use futures_util::lock::Mutex; +use futures_util::TryStreamExt; +use tokio::sync::mpsc::Receiver; +use tokio::time::sleep; +use tokio_util::sync::CancellationToken; + +use crate::interpreters::InterpreterFactory; +use crate::sessions::QueryContext; +use crate::task::meta::TaskMetaHandle; +use crate::task::session::create_session; +use crate::task::session::get_task_user; + +pub struct TaskService { + initialized: AtomicBool, + interval: u64, + tenant_id: String, + node_id: String, + cluster_id: String, + meta_handle: TaskMetaHandle, + _runtime: Arc, +} + +impl TaskService { + pub fn instance() -> Arc { + GlobalInstance::get() + } + + pub async fn prepare(&self) -> Result<()> { + let prepare_key = format!("{}/task_run_prepare/lock", self.tenant_id); + let _guard = self.meta_handle.acquire(&prepare_key, 0).await?; + let create_db = "CREATE DATABASE IF NOT EXISTS system_task"; + self.execute_sql(None, create_db).await?; + + let create_table = "CREATE TABLE IF NOT EXISTS system_task.task_run(\ + task_id UINT64,\ + task_name TEXT NOT NULL,\ + query_text TEXT NOT NULL,\ + when_condition TEXT, + after TEXT,\ + comment TEXT,\ + owner TEXT, + owner_user TEXT,\ + warehouse_name TEXT,\ + using_warehouse_size TEXT,\ + schedule_type INTEGER,\ + interval INTEGER,\ + interval_secs INTEGER,\ + interval_milliseconds UINT64,\ + cron TEXT,\ + time_zone TEXT DEFAULT 'UTC',\ + run_id UINT64,\ + attempt_number INTEGER,\ + state TEXT NOT NULL DEFAULT 'SCHEDULED',\ + error_code BIGINT,\ + error_message TEXT, + root_task_id UINT64,\ + scheduled_at TIMESTAMP DEFAULT NOW(),\ + completed_at TIMESTAMP,\ + next_scheduled_at TIMESTAMP DEFAULT NOW(),\ + error_integration TEXT,\ + status TEXT,\ + created_at TIMESTAMP,\ + updated_at TIMESTAMP,\ + session_params VARIANT,\ + last_suspended_at TIMESTAMP,\ + suspend_task_after_num_failures INTEGER\ + );"; + self.execute_sql(None, create_table).await?; + + Ok(()) + } + + pub fn initialized(&self) { + self.initialized.store(true, Ordering::SeqCst); + } + + pub fn init(mut task_rx: Receiver, cfg: &InnerConfig) -> Result<()> { + let tenant = cfg.query.tenant_id.clone(); + let meta_client = MetaGrpcClient::try_new(&cfg.meta.to_meta_grpc_client_conf()) + .map_err(|_e| ErrorCode::Internal("Create MetaClient failed for Task"))?; + let meta_handle = TaskMetaHandle::new(meta_client, cfg.query.node_id.clone()); + let runtime = Arc::new(Runtime::with_worker_threads( + 4, + Some("task-worker".to_owned()), + )?); + + let instance = TaskService { + initialized: AtomicBool::new(false), + interval: 2, + tenant_id: cfg.query.tenant_id.tenant_name().to_string(), + node_id: cfg.query.node_id.clone(), + cluster_id: cfg.query.cluster_id.clone(), + meta_handle, + _runtime: runtime.clone(), + }; + GlobalInstance::set(Arc::new(instance)); + + runtime.clone().try_spawn(async move { + let task_service = TaskService::instance(); + loop { + if !task_service.initialized.load(Ordering::SeqCst) { + tokio::time::sleep(Duration::from_secs(1)).await; + } else { + break; + } + } + task_service.prepare().await?; + + let mut scheduled_tasks: HashMap = HashMap::new(); + let task_mgr = UserApiProvider::instance().task_api(&tenant); + + // If `task_c` is defined as `AFTER task_a, task_b`, then: + // - task_after_infos["task_c"] = AfterTaskInfo { afters: ["task_a, task_b"] } + let mut task_after_infos = HashMap::::new(); + // - task_dep_infos["task_a"]["task_c"] = AfterTaskInfo { ... } + // - task_dep_infos["task_b"]["task_c"] = AfterTaskInfo { ... } + let mut task_dep_infos = HashMap::>::new(); + // after task state => [task1 name, task2 name], if task succeeded then remove task name on after task state + let task_deps = Arc::new(Mutex::new(HashMap::::new())); + + while let Some(task) = task_rx.recv().await { + match task { + TaskMessage::ScheduleTask(mut task) => { + debug_assert!(task.schedule_options.is_some()); + if let Some(schedule_options) = &task.schedule_options { + // clean old task if alter + if let Some(token) = scheduled_tasks.remove(&task.task_name) { + token.cancel(); + } + match task.status { + Status::Suspended => continue, + Status::Started => () + } + + let token = CancellationToken::new(); + let child_token = token.child_token(); + let task_name = task.task_name.to_string(); + let task_service = TaskService::instance(); + + task_service.update_or_create_task_run(&TaskRun { + task: task.clone(), + run_id: Self::make_run_id(), + attempt_number: task.suspend_task_after_num_failures.unwrap_or(0) as i32, + state: State::Scheduled, + scheduled_at: Utc::now(), + completed_at: None, + error_code: 0, + error_message: None, + root_task_id: EMPTY_TASK_ID, + }).await?; + + match schedule_options.schedule_type { + ScheduleType::IntervalType => { + let task_mgr = task_mgr.clone(); + let mut duration = Duration::from_secs(schedule_options.interval.unwrap() as u64); + if let Some(ms) = &schedule_options.milliseconds_interval { + duration += Duration::from_millis(*ms); + } + + runtime + .spawn(async move { + task.next_scheduled_at = Some(Utc::now() + duration); + task_mgr.update_task(task.clone()).await??; + loop { + tokio::select! { + _ = sleep(duration) => { + let _ = TaskChannel::instance().send(TaskMessage::ExecuteTask(task.clone())).await; + } + _ = child_token.cancelled() => { + break; + } + } + } + // TODO: log error + Result::Ok(()) + }); + } + ScheduleType::CronType => { + // SAFETY: check on CreateTask + let cron_expr = schedule_options.cron.as_ref().unwrap(); + let tz = schedule_options.time_zone.as_ref().unwrap().parse::().unwrap(); + let schedule = Schedule::from_str(cron_expr).unwrap(); + + runtime + .spawn(async move { + let upcoming = schedule.upcoming(tz); + + for next_time in upcoming { + let now = Utc::now(); + let duration = (next_time.with_timezone(&Utc) - now) + .to_std() + .unwrap_or(Duration::ZERO); + + task.next_scheduled_at = Some(Utc::now() + duration); + tokio::select! { + _ = sleep(duration) => { + let _ = TaskChannel::instance().send(TaskMessage::ExecuteTask(task.clone())).await; + } + _ = child_token.cancelled() => { + break; + } + } + } + }); + } + } + let _ = scheduled_tasks.insert(task_name, token); + } + } + TaskMessage::ExecuteTask(task) => { + let task_name = task.task_name.clone(); + let task_service = TaskService::instance(); + + // TODO: Meta control query is executed serially through watch key + + let mut task_run = task_service.lasted_task_run(&task_name).await? + .unwrap_or_else(|| TaskRun { + task: task.clone(), + run_id: Self::make_run_id(), + attempt_number: task.suspend_task_after_num_failures.unwrap_or(0) as i32, + state: State::Executing, + scheduled_at: Utc::now(), + completed_at: None, + error_code: 0, + error_message: None, + root_task_id: EMPTY_TASK_ID, + }); + task_service.update_or_create_task_run(&task_run).await?; + + let task_mgr = task_mgr.clone(); + let tenant = tenant.clone(); + let task_dep_infos = task_dep_infos.clone(); + let task_deps = task_deps.clone(); + let owner = Self::get_task_owner(&task, &tenant).await?; + runtime + .try_spawn(async move { + while task_run.attempt_number >= 0 { + let task_result = Self::spawn_task(task.clone(), owner.clone()).await; + + match task_result { + Ok(()) => { + task_run.state = State::Succeeded; + task_run.completed_at = Some(Utc::now()); + task_service.update_or_create_task_run(&task_run).await?; + + if let Some(info) = task_dep_infos.get(&task_name) { + let mut guard = task_deps.lock().await; + + for (dep_name, dep_info) in info { + if let Some(after_state) = guard.get_mut(dep_info) { + if after_state.completed_task(&task_name) { + *after_state = AfterTaskState::from(dep_info); + let dep_task = task_mgr.describe_task(dep_name).await?? + .ok_or_else(|| ErrorCode::UnknownTask(dep_name.clone()))?; + + let _ = TaskChannel::instance().send(TaskMessage::ExecuteTask(dep_task)).await; + } + } + } + } + break; + } + Err(err) => { + task_run.state = State::Failed; + task_run.completed_at = Some(Utc::now()); + task_run.attempt_number -= 1; + task_run.error_code = err.code() as i64; + task_run.error_message = Some(err.message()); + task_service.update_or_create_task_run(&task_run).await?; + task_run.run_id = Self::make_run_id(); + } + } + task_mgr.alter_task(&task.task_name, &AlterTaskOptions::Suspend).await??; + } + + // TODO: log error + Result::Ok(()) + }, None)?; + } + TaskMessage::DeleteTask(task_name) => { + if let Some(deps) = task_dep_infos.get(&task_name) { + if !deps.is_empty() { + continue; + // TODO: return delete failed error + } + } + if let Some(token) = scheduled_tasks.remove(&task_name) { + token.cancel(); + } + } + TaskMessage::AfterTask(task) => { + match task.status { + Status::Suspended => continue, + Status::Started => (), + } + // after info + if let Some(info) = task_after_infos.remove(&task.task_name) { + // dep info + for after in info.afters.iter() { + if let Some(dep_tasks) = task_dep_infos.get_mut(after) { + dep_tasks.remove(&task.task_name); + } + } + task_deps + .lock() + .await + .remove(&info); + } + if task.after.is_empty() { + continue; + } + let task_name = task.task_name.clone(); + let info = AfterTaskInfo::from(&task); + // after info + task_after_infos.insert(task_name.clone(), info.clone()); + // dep info + for after_task in task.after.iter() { + task_dep_infos + .entry(after_task.clone()) + .or_default() + .insert(task_name.clone(), info.clone()); + + task_deps + .lock() + .await + .insert(info.clone(), AfterTaskState::from(&info)); + } + } + } + } + + // TODO: log error + Result::Ok(()) + }, None)?; + Ok(()) + } + + async fn get_task_owner(task: &Task, tenant: &Tenant) -> Result { + UserApiProvider::instance() + .get_user( + tenant, + UserIdentity::parse(&task.owner_user).map_err(|e| { + ErrorCode::MetaServiceError(format!("Failed to parse UserIdentity: {}", e)) + })?, + ) + .await + } + + async fn spawn_task(task: Task, user: UserInfo) -> Result<()> { + let task_service = TaskService::instance(); + + if let Some(when_condition) = &task.when_condition { + let result = task_service + .execute_sql(Some(user.clone()), &format!("SELECT {when_condition}")) + .await?; + let is_met = result + .first() + .and_then(|block| block.get_by_offset(0).index(0)) + .and_then(|scalar| { + scalar + .as_boolean() + .cloned() + .map(Ok) + .or_else(|| scalar.as_string().map(|str| str.trim().parse::())) + }) + .transpose() + .map_err(|err| { + ErrorCode::TaskWhenConditionNotMet(format!( + "when condition error for task: {}, {}", + task.task_name, err + )) + })? + .unwrap_or(false); + if !is_met { + return Err(ErrorCode::TaskWhenConditionNotMet(format!( + "when condition not met for task: {}", + task.task_name + ))); + } + } + task_service + .execute_sql(Some(user), &task.query_text) + .await?; + + Ok(()) + } + + pub async fn create_context(&self, other_user: Option) -> Result> { + let (user, role) = if let Some(other_user) = other_user { + (other_user, None) + } else { + ( + get_task_user(&self.tenant_id, &self.cluster_id), + Some(BUILTIN_ROLE_ACCOUNT_ADMIN.to_string()), + ) + }; + let session = create_session(user, role).await?; + // only need run the sql on the current node + session.create_query_context_with_cluster(Arc::new(Cluster { + unassign: false, + local_id: self.node_id.clone(), + nodes: vec![], + })) + } + + pub async fn lasted_task_run(&self, task_name: &str) -> Result> { + let blocks = self + .execute_sql( + None, + &format!( + "SELECT + task_id, + task_name, + query_text, + when_condition, + after, + comment, + owner, + owner_user, + warehouse_name, + using_warehouse_size, + schedule_type, + interval, + interval_milliseconds, + cron, + time_zone, + run_id, + attempt_number, + state, + error_code, + error_message, + root_task_id, + scheduled_at, + completed_at, + next_scheduled_at, + error_integration, + status, + created_at, + updated_at, + session_params, + last_suspended_at, + suspend_task_after_num_failures + FROM system_task.task_run WHERE task_name = '{task_name}' ORDER BY run_id DESC LIMIT 1;" + ), + ) + .await?; + + let Some(block) = blocks.first() else { + return Ok(None); + }; + if block.num_rows() == 0 { + return Ok(None); + } + Ok(Self::block2task_run(block, 0)) + } + + pub async fn update_or_create_task_run(&self, task_run: &TaskRun) -> Result<()> { + let state = match task_run.state { + State::Scheduled => "SCHEDULED".to_string(), + State::Executing => "EXECUTING".to_string(), + State::Succeeded => "SUCCEEDED".to_string(), + State::Failed => "FAILED".to_string(), + State::Cancelled => "CANCELLED".to_string(), + }; + let scheduled_at = task_run.scheduled_at.timestamp(); + let completed_at = task_run + .completed_at + .map(|time| time.timestamp().to_string()) + .unwrap_or_else(|| "null".to_string()); + let error_message = task_run + .error_message + .as_ref() + .map(|s| format!("'{s}'")) + .unwrap_or_else(|| "null".to_string()); + let root_task_id = task_run.root_task_id; + + let is_exists = self.execute_sql(None, &format!("UPDATE system_task.task_run SET run_id = {}, state = '{}', scheduled_at = {}, completed_at = {}, error_message = {}, root_task_id = {} WHERE task_name = '{}' AND run_id = {}", task_run.run_id, state, scheduled_at, completed_at, error_message, root_task_id, task_run.task.task_name, task_run.run_id)).await? + .first() + .and_then(|block| { + block.get_by_offset(0).index(0).and_then(|s| s.as_number().and_then(|n| n.as_u_int64().cloned())) + }) + .map(|v| v > 0) + .unwrap_or(false); + + if !is_exists { + self.execute_sql(None, &Self::task_run2insert(task_run)?) + .await?; + } + Ok(()) + } + + fn task_run2insert(task_run: &TaskRun) -> Result { + let task = &task_run.task; + + let sql = format!( + "INSERT INTO system_task.task_run (\ + task_id, + task_name, + query_text, + when_condition, + after, + comment, + owner, + owner_user, + warehouse_name, + using_warehouse_size, + schedule_type, + interval, + interval_milliseconds, + cron, + time_zone, + run_id, + attempt_number, + state, + error_code, + error_message, + root_task_id, + scheduled_at, + completed_at, + next_scheduled_at, + error_integration, + status, + created_at, + updated_at, + session_params, + last_suspended_at, + suspend_task_after_num_failures + ) values ( + {}, + '{}', + '{}', + {}, + {}, + {}, + '{}', + '{}', + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + '{}', + {}, + {}, + {}, + {}, + {}, + {}, + {}, + '{}', + {}, + {}, + {}, + {}, + {} + );", + task.task_id, + task.task_name, + task.query_text.replace('\'', "''"), + task.when_condition + .as_ref() + .map(|s| format!("'{s}'").replace('\'', "''")) + .unwrap_or_else(|| "null".to_string()), + if !task.after.is_empty() { + format!("'{}'", task.after.join(", ")) + } else { + "null".to_string() + }, + task.comment + .as_ref() + .map(|s| format!("'{s}'")) + .unwrap_or_else(|| "null".to_string()), + task.owner, + task.owner_user.replace('\'', "''"), + task.warehouse_options + .as_ref() + .and_then(|w| w.warehouse.as_ref()) + .map(|s| format!("'{s}'")) + .unwrap_or_else(|| "null".to_string()), + task.warehouse_options + .as_ref() + .and_then(|w| w.using_warehouse_size.as_ref()) + .map(|s| format!("'{s}'")) + .unwrap_or_else(|| "null".to_string()), + task.schedule_options + .as_ref() + .map(|s| match s.schedule_type { + ScheduleType::IntervalType => "0".to_string(), + ScheduleType::CronType => "1".to_string(), + }) + .unwrap_or_else(|| "null".to_string()), + task.schedule_options + .as_ref() + .and_then(|s| s.interval) + .map(|v| v.to_string()) + .unwrap_or_else(|| "null".to_string()), + task.schedule_options + .as_ref() + .and_then(|s| s.milliseconds_interval) + .map(|v| v.to_string()) + .unwrap_or_else(|| "null".to_string()), + task.schedule_options + .as_ref() + .and_then(|s| s.cron.as_ref()) + .map(|s| format!("'{s}'")) + .unwrap_or_else(|| "null".to_string()), + task.schedule_options + .as_ref() + .and_then(|s| s.time_zone.as_ref()) + .map(|s| format!("'{s}'")) + .unwrap_or_else(|| "null".to_string()), + task_run.run_id, + task_run.attempt_number, + match task_run.state { + State::Scheduled => "SCHEDULED".to_string(), + State::Executing => "EXECUTING".to_string(), + State::Succeeded => "SUCCEEDED".to_string(), + State::Failed => "FAILED".to_string(), + State::Cancelled => "CANCELLED".to_string(), + }, + task_run.error_code, + task_run + .error_message + .as_ref() + .map(|s| format!("'{s}'")) + .unwrap_or_else(|| "null".to_string()), + task_run.root_task_id, + task_run.scheduled_at.timestamp(), + task_run + .completed_at + .as_ref() + .map(|d| d.to_string()) + .unwrap_or_else(|| "null".to_string()), + task.next_scheduled_at + .as_ref() + .map(|d| d.timestamp().to_string()) + .unwrap_or_else(|| "null".to_string()), + task.error_integration + .as_ref() + .map(|s| format!("'{s}'")) + .unwrap_or_else(|| "null".to_string()), + match task.status { + Status::Suspended => "SUSPENDED".to_string(), + Status::Started => "STARTED".to_string(), + }, + task.created_at.timestamp(), + task.updated_at.timestamp(), + serde_json::to_string(&task.session_params).map(|s| format!("'{s}'"))?, + task.last_suspended_at + .as_ref() + .map(|d| d.timestamp().to_string()) + .unwrap_or_else(|| "null".to_string()), + task.suspend_task_after_num_failures + .map(|s| s.to_string()) + .unwrap_or_else(|| "null".to_string()) + ); + Ok(sql) + } + + fn block2task_run(block: &DataBlock, row: usize) -> Option { + let task_id = *block + .get_by_offset(0) + .index(row)? + .as_number()? + .as_u_int64()?; + let task_name = block.get_by_offset(1).index(row)?.as_string()?.to_string(); + let query_text = block.get_by_offset(2).index(row)?.as_string()?.to_string(); + let when_condition = block + .get_by_offset(3) + .index(row) + .and_then(|s| s.as_string().map(|s| s.to_string())); + let after = block + .get_by_offset(4) + .index(row)? + .as_string()? + .split(", ") + .map(str::to_string) + .collect::>(); + let comment = block + .get_by_offset(5) + .index(row) + .and_then(|s| s.as_string().map(|s| s.to_string())); + let owner = block.get_by_offset(6).index(row)?.as_string()?.to_string(); + let owner_user = block.get_by_offset(7).index(row)?.as_string()?.to_string(); + let warehouse_name = block + .get_by_offset(8) + .index(row) + .and_then(|s| s.as_string().map(|s| s.to_string())); + let using_warehouse_size = block + .get_by_offset(9) + .index(row) + .and_then(|s| s.as_string().map(|s| s.to_string())); + let schedule_type = block + .get_by_offset(10) + .index(row) + .and_then(|s| s.as_number().and_then(|n| n.as_int32()).cloned()); + let interval = block + .get_by_offset(11) + .index(row) + .and_then(|s| s.as_number().and_then(|n| n.as_int32()).cloned()); + let milliseconds_interval = block + .get_by_offset(12) + .index(row) + .and_then(|s| s.as_number().and_then(|n| n.as_u_int64()).cloned()); + let cron = block + .get_by_offset(13) + .index(row) + .and_then(|s| s.as_string().map(|s| s.to_string())); + let time_zone = block + .get_by_offset(14) + .index(row) + .and_then(|s| s.as_string().map(|s| s.to_string())); + let run_id = *block + .get_by_offset(15) + .index(row)? + .as_number()? + .as_u_int64()?; + let attempt_number = block + .get_by_offset(16) + .index(row) + .and_then(|s| s.as_number().and_then(|n| n.as_int32()).cloned()); + let state = block.get_by_offset(17).index(row)?.as_string()?.to_string(); + let error_code = *block + .get_by_offset(18) + .index(row)? + .as_number()? + .as_int64()?; + let error_message = block + .get_by_offset(19) + .index(row) + .and_then(|s| s.as_string().map(|s| s.to_string())); + let root_task_id = *block + .get_by_offset(20) + .index(row)? + .as_number()? + .as_u_int64()?; + let scheduled_at = *block.get_by_offset(21).index(row)?.as_timestamp()?; + let completed_at = block + .get_by_offset(22) + .index(row) + .and_then(|s| s.as_timestamp().cloned()); + let next_scheduled_at = block + .get_by_offset(23) + .index(row) + .and_then(|s| s.as_timestamp().cloned()); + let error_integration = block + .get_by_offset(24) + .index(row) + .and_then(|s| s.as_string().map(|s| s.to_string())); + let status = block.get_by_offset(25).index(row)?.as_string()?.to_string(); + let created_at = *block.get_by_offset(26).index(row)?.as_timestamp()?; + let updated_at = *block.get_by_offset(27).index(row)?.as_timestamp()?; + let session_params = block.get_by_offset(28).index(row).and_then(|s| { + s.as_variant() + .and_then(|bytes| serde_json::from_slice::>(bytes).ok()) + })?; + let last_suspended_at = block + .get_by_offset(29) + .index(row) + .and_then(|s| s.as_timestamp().cloned()); + + let schedule_options = if let Some(s) = schedule_type { + let schedule_type = match s { + 0 => ScheduleType::IntervalType, + 1 => ScheduleType::CronType, + _ => { + return None; + } + }; + Some(ScheduleOptions { + interval, + cron, + time_zone, + schedule_type, + milliseconds_interval, + }) + } else { + None + }; + + let warehouse_options = if warehouse_name.is_some() && using_warehouse_size.is_some() { + Some(WarehouseOptions { + warehouse: warehouse_name, + using_warehouse_size, + }) + } else { + None + }; + let task = Task { + task_id, + task_name, + query_text, + when_condition, + after, + comment, + owner, + owner_user, + schedule_options, + warehouse_options, + next_scheduled_at: next_scheduled_at + .and_then(|i| DateTime::::from_timestamp(i, 0)), + suspend_task_after_num_failures: attempt_number.map(|i| i as u64), + error_integration, + status: match status.as_str() { + "SUSPENDED" => Status::Suspended, + "STARTED" => Status::Started, + _ => return None, + }, + created_at: DateTime::::from_timestamp(created_at, 0)?, + updated_at: DateTime::::from_timestamp(updated_at, 0)?, + last_suspended_at: last_suspended_at + .and_then(|i| DateTime::::from_timestamp(i, 0)), + session_params, + }; + + Some(TaskRun { + task, + run_id, + attempt_number: attempt_number.unwrap_or_default(), + state: match state.as_str() { + "SCHEDULED" => State::Scheduled, + "EXECUTING" => State::Executing, + "SUCCEEDED" => State::Succeeded, + "FAILED" => State::Failed, + "CANCELLED" => State::Cancelled, + _ => return None, + }, + scheduled_at: DateTime::::from_timestamp(scheduled_at, 0)?, + completed_at: completed_at.and_then(|i| DateTime::::from_timestamp(i, 0)), + error_code, + error_message, + root_task_id, + }) + } + + async fn execute_sql(&self, other_user: Option, sql: &str) -> Result> { + let context = self.create_context(other_user).await?; + + let mut planner = Planner::new(context.clone()); + let (plan, _) = planner.plan_sql(sql).await?; + let executor = InterpreterFactory::get(context.clone(), &plan).await?; + let stream = executor.execute(context).await?; + stream.try_collect::>().await + } + + fn make_run_id() -> u64 { + Utc::now().timestamp_millis() as u64 + } +} diff --git a/src/query/service/src/task_service.rs b/src/query/service/src/task_service.rs deleted file mode 100644 index 7e4b51da2cef6..0000000000000 --- a/src/query/service/src/task_service.rs +++ /dev/null @@ -1,322 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::collections::HashMap; -use std::str::FromStr; -use std::sync::Arc; -use std::time::Duration; - -use chrono::Utc; -use chrono_tz::Tz; -use cron::Schedule; -use databend_common_ast::ast::AlterTaskOptions; -use databend_common_base::runtime::GlobalQueryRuntime; -use databend_common_base::runtime::TrySpawn; -use databend_common_catalog::session_type::SessionType; -use databend_common_exception::ErrorCode; -use databend_common_exception::Result; -use databend_common_expression::DataBlock; -use databend_common_management::task::TaskChannel; -use databend_common_management::task::TaskMessage; -use databend_common_meta_app::principal::task::AfterTaskInfo; -use databend_common_meta_app::principal::task::EMPTY_TASK_ID; -use databend_common_meta_app::principal::AfterTaskState; -use databend_common_meta_app::principal::ScheduleType; -use databend_common_meta_app::principal::State; -use databend_common_meta_app::principal::Status; -use databend_common_meta_app::principal::Task; -use databend_common_meta_app::principal::TaskRun; -use databend_common_meta_app::principal::UserIdentity; -use databend_common_meta_app::principal::UserInfo; -use databend_common_meta_app::schema::CreateOption; -use databend_common_meta_app::tenant::Tenant; -use databend_common_sql::Planner; -use databend_common_users::UserApiProvider; -use futures_util::lock::Mutex; -use futures_util::TryStreamExt; -use tokio::sync::mpsc::Receiver; -use tokio::time::sleep; -use tokio_util::sync::CancellationToken; - -use crate::interpreters::InterpreterFactory; -use crate::sessions::SessionManager; - -pub struct TaskService; - -impl TaskService { - pub fn init(mut task_rx: Receiver, tenant: Tenant) -> Result<()> { - GlobalQueryRuntime::instance() - .runtime() - .try_spawn(async move { - let mut scheduled_tasks: HashMap = HashMap::new(); - let task_mgr = UserApiProvider::instance().task_api(&tenant); - - // If `task_c` is defined as `AFTER task_a, task_b`, then: - // - task_after_infos["task_c"] = AfterTaskInfo { afters: ["task_a, task_b"] } - let mut task_after_infos = HashMap::::new(); - // - task_dep_infos["task_a"]["task_c"] = AfterTaskInfo { ... } - // - task_dep_infos["task_b"]["task_c"] = AfterTaskInfo { ... } - let mut task_dep_infos = HashMap::>::new(); - // after task state => [task1 name, task2 name], if task succeeded then remove task name on after task state - let task_deps= Arc::new(Mutex::new(HashMap::::new())); - - while let Some(task) = task_rx.recv().await { - match task { - TaskMessage::ScheduleTask(mut task) => { - debug_assert!(task.schedule_options.is_some()); - if let Some(schedule_options) = &task.schedule_options { - // clean old task if alter - if let Some(token) = scheduled_tasks.remove(&task.task_name) { - token.cancel(); - } - match task.status { - Status::Suspended => continue, - Status::Started => () - } - - let token = CancellationToken::new(); - let child_token = token.child_token(); - let task_name = task.task_name.to_string(); - let task_mgr = task_mgr.clone(); - - task_mgr.update_task_run(TaskRun { - task: task.clone(), - run_id: Self::make_run_id(), - attempt_number: task.suspend_task_after_num_failures.unwrap_or(0) as i32, - state: State::Scheduled, - scheduled_at: Utc::now(), - completed_at: None, - error_code: 0, - error_message: None, - root_task_id: EMPTY_TASK_ID, - }).await??; - - match schedule_options.schedule_type { - ScheduleType::IntervalType => { - let mut duration = Duration::from_secs(schedule_options.interval.unwrap() as u64); - if let Some(ms) = &schedule_options.milliseconds_interval { - duration += Duration::from_millis(*ms); - } - - GlobalQueryRuntime::instance() - .runtime() - .spawn(async move { - task.next_scheduled_at = Some(Utc::now() + duration); - loop { - task_mgr.create_task(task.clone(), &CreateOption::CreateOrReplace).await??; - tokio::select! { - _ = sleep(duration) => { - let _ = TaskChannel::instance().send(TaskMessage::ExecuteTask(task.clone())); - } - _ = child_token.cancelled() => { - break; - } - } - } - // TODO: log error - Result::Ok(()) - }); - } - ScheduleType::CronType => { - // SAFETY: check on CreateTask - let cron_expr = schedule_options.cron.as_ref().unwrap(); - let tz = schedule_options.time_zone.as_ref().unwrap().parse::().unwrap(); - let schedule = Schedule::from_str(cron_expr).unwrap(); - - GlobalQueryRuntime::instance() - .runtime() - .spawn(async move { - let mut upcoming = schedule.upcoming(tz); - - for next_time in upcoming { - let now = Utc::now(); - let duration = (next_time.with_timezone(&Utc) - now) - .to_std() - .unwrap_or(Duration::ZERO); - - task.next_scheduled_at = Some(Utc::now() + duration); - tokio::select! { - _ = sleep(duration) => { - let _ = TaskChannel::instance().send(TaskMessage::ExecuteTask(task.clone())); - } - _ = child_token.cancelled() => { - break; - } - } - } - }); - } - } - let _ = scheduled_tasks.insert(task_name, token); - } - } - TaskMessage::ExecuteTask(task) => { - let task_name = task.task_name.clone(); - - // TODO: Meta control query is executed serially through watch key - - let mut task_run = task_mgr.lasted_task_run(&task_name).await?? - .unwrap_or_else(|| TaskRun { - task: task.clone(), - run_id: Self::make_run_id(), - attempt_number: task.suspend_task_after_num_failures.unwrap_or(0) as i32, - state: State::Executing, - scheduled_at: Utc::now(), - completed_at: None, - error_code: 0, - error_message: None, - root_task_id: EMPTY_TASK_ID, - }); - task_mgr.update_task_run(task_run.clone()).await??; - - let task_mgr = task_mgr.clone(); - let tenant = tenant.clone(); - let task_dep_infos = task_dep_infos.clone(); - let task_deps = task_deps.clone(); - let owner = Self::get_task_owner(&task, &tenant).await?; - GlobalQueryRuntime::instance() - .runtime() - .try_spawn(async move { - while task_run.attempt_number >= 0 { - let task_result = Self::spawn_task(task.clone(), owner.clone()).await; - - match task_result { - Ok(()) => { - task_run.state = State::Succeeded; - task_run.completed_at = Some(Utc::now()); - task_mgr.update_task_run(task_run.clone()).await??; - - if let Some(info) = task_dep_infos.get(&task_name) { - let mut guard = task_deps.lock().await; - - for (dep_name, dep_info) in info { - if let Some(after_state) = guard.get_mut(dep_info) { - if after_state.completed_task(&task_name) { - *after_state = AfterTaskState::from(dep_info); - let dep_task = task_mgr.describe_task(dep_name).await?? - .ok_or_else(|| ErrorCode::UnknownTask(dep_name.clone()))?; - - let _ = TaskChannel::instance().send(TaskMessage::ExecuteTask(dep_task)); - } - } - } - } - break; - } - Err(err) => { - task_run.state = State::Failed; - task_run.completed_at = Some(Utc::now()); - task_run.attempt_number -= 1; - task_run.error_code = err.code() as i64; - task_run.error_message = Some(err.message()); - task_mgr.update_task_run(task_run.clone()).await??; - task_run.run_id = Self::make_run_id(); - } - } - task_mgr.alter_task(&task.task_name, &AlterTaskOptions::Suspend).await??; - } - - // TODO: log error - Result::Ok(()) - }, None)?; - } - TaskMessage::DeleteTask(task_name) => { - if let Some(deps) = task_dep_infos.get(&task_name) { - if !deps.is_empty() { - continue - // TODO: return delete failed error - } - } - if let Some(token) = scheduled_tasks.remove(&task_name) { - token.cancel(); - } - } - TaskMessage::AfterTask(task) => { - match task.status { - Status::Suspended => continue, - Status::Started => (), - } - // after info - if let Some(info) = task_after_infos.remove(&task.task_name) { - // dep info - for after in info.afters.iter() { - if let Some(dep_tasks) = task_dep_infos.get_mut(after) { - dep_tasks.remove(&task.task_name); - } - } - task_deps - .lock() - .await - .remove(&info); - } - if task.after.is_empty() { - continue; - } - let task_name = task.task_name.clone(); - let info = AfterTaskInfo::from(&task); - // after info - task_after_infos.insert(task_name.clone(), info.clone()); - // dep info - for after_task in task.after.iter() { - task_dep_infos - .entry(after_task.clone()) - .or_default() - .insert(task_name.clone(), info.clone()); - - task_deps - .lock() - .await - .insert(info.clone(), AfterTaskState::from(&info)); - } - } - } - } - - // TODO: log error - Result::Ok(()) - }, None)?; - Ok(()) - } - - async fn get_task_owner(task: &Task, tenant: &Tenant) -> Result { - UserApiProvider::instance() - .get_user( - tenant, - UserIdentity::parse(&task.owner_user).map_err(|e| { - ErrorCode::MetaServiceError(format!("Failed to parse UserIdentity: {}", e)) - })?, - ) - .await - } - - async fn spawn_task(task: Task, user: UserInfo) -> Result<()> { - let session = SessionManager::instance() - .create_session(SessionType::Local) - .await?; - session.set_authed_user(user, None).await?; - let context = Arc::new(session).create_query_context().await?; - - let mut planner = Planner::new(context.clone()); - let (plan, _) = planner.plan_sql(&task.query_text).await?; - let executor = InterpreterFactory::get(context.clone(), &plan).await?; - let stream = executor.execute(context).await?; - let _ = stream.try_collect::>().await?; - - Ok(()) - } - - fn make_run_id() -> u64 { - Utc::now().timestamp_millis() as u64 - } -} diff --git a/src/query/users/src/user_api.rs b/src/query/users/src/user_api.rs index 3bd32249fb2ed..15967fd738178 100644 --- a/src/query/users/src/user_api.rs +++ b/src/query/users/src/user_api.rs @@ -153,9 +153,9 @@ impl UserApiProvider { let task = task?; if task.schedule_options.is_some() { - let _ = task_tx.send(TaskMessage::ScheduleTask(task)); + let _ = task_tx.send(TaskMessage::ScheduleTask(task)).await; } else if !task.after.is_empty() { - let _ = task_tx.send(TaskMessage::AfterTask(task)); + let _ = task_tx.send(TaskMessage::AfterTask(task)).await; } } } From 6baff0a5e77373aba4fe0cf823f1bbdd1272c3d6 Mon Sep 17 00:00:00 2001 From: Kould Date: Wed, 9 Jul 2025 19:21:18 +0800 Subject: [PATCH 03/25] feat: use Meta's Watch mechanism to distribute TaskMessage --- src/meta/api/src/kv_pb_api/mod.rs | 2 +- src/meta/app/src/principal/mod.rs | 3 +- src/meta/app/src/principal/task.rs | 37 ++++ ...ask_run_ident.rs => task_message_ident.rs} | 18 +- src/meta/cache/src/lib.rs | 2 +- .../src/task_from_to_protobuf_impl.rs | 99 ++++----- src/meta/protos/proto/task.proto | 22 +- src/query/management/src/lib.rs | 1 + src/query/management/src/task/mod.rs | 2 - src/query/management/src/task/task_mgr.rs | 194 ++++++++---------- src/query/service/src/global_services.rs | 6 +- src/query/service/src/task/meta.rs | 4 + src/query/service/src/task/mod.rs | 4 +- .../src/task/{task_service.rs => service.rs} | 80 ++++++-- src/query/users/src/user_api.rs | 22 -- 15 files changed, 262 insertions(+), 234 deletions(-) rename src/meta/app/src/principal/{task_run_ident.rs => task_message_ident.rs} (69%) rename src/query/service/src/task/{task_service.rs => service.rs} (91%) diff --git a/src/meta/api/src/kv_pb_api/mod.rs b/src/meta/api/src/kv_pb_api/mod.rs index 71e7a5356a8d4..ab46c591e140d 100644 --- a/src/meta/api/src/kv_pb_api/mod.rs +++ b/src/meta/api/src/kv_pb_api/mod.rs @@ -43,7 +43,7 @@ use futures::TryStreamExt; use itertools::Itertools; pub(crate) use self::codec::decode_non_empty_item; -pub(crate) use self::codec::decode_seqv; +pub use self::codec::decode_seqv; pub(crate) use self::codec::decode_transition; pub(crate) use self::codec::encode_operation; pub use self::upsert_pb::UpsertPB; diff --git a/src/meta/app/src/principal/mod.rs b/src/meta/app/src/principal/mod.rs index 43ad14e83a8d8..51361d1c036df 100644 --- a/src/meta/app/src/principal/mod.rs +++ b/src/meta/app/src/principal/mod.rs @@ -50,7 +50,7 @@ pub mod procedure_name_ident; pub mod stage_file_ident; pub mod task; pub mod task_ident; -pub mod task_run_ident; +pub mod task_message_ident; pub mod tenant_ownership_object_ident; pub mod tenant_user_ident; pub mod user_defined_file_format_ident; @@ -95,6 +95,7 @@ pub use task::ScheduleType; pub use task::State; pub use task::Status; pub use task::Task; +pub use task::TaskMessage; pub use task::TaskRun; pub use task::WarehouseOptions; pub use task_ident::TaskIdent; diff --git a/src/meta/app/src/principal/task.rs b/src/meta/app/src/principal/task.rs index 26a49f010a9f7..2dc73cf277512 100644 --- a/src/meta/app/src/principal/task.rs +++ b/src/meta/app/src/principal/task.rs @@ -102,6 +102,43 @@ impl TaskRun { } } +#[derive(Debug, Clone, PartialEq)] +pub enum TaskMessage { + ExecuteTask(Task), + ScheduleTask(Task), + DeleteTask(String), + AfterTask(Task), +} + +impl TaskMessage { + pub fn task_name(&self) -> &str { + match self { + TaskMessage::ExecuteTask(task) + | TaskMessage::ScheduleTask(task) + | TaskMessage::AfterTask(task) => task.task_name.as_str(), + TaskMessage::DeleteTask(task_name) => task_name.as_str(), + } + } + + pub fn key(&self) -> String { + let ty = match self { + TaskMessage::ExecuteTask(_) => 0, + TaskMessage::ScheduleTask(_) => 1, + TaskMessage::DeleteTask(_) => 2, + TaskMessage::AfterTask(_) => 3, + }; + format!("{}-{}-{}", TaskMessage::prefix(), ty, self.task_name()) + } + + pub fn prefix() -> i64 { + 0 + } + + pub fn prefix_range() -> (i64, i64) { + (0, 1) + } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct AfterTaskInfo { pub afters: Arc>, diff --git a/src/meta/app/src/principal/task_run_ident.rs b/src/meta/app/src/principal/task_message_ident.rs similarity index 69% rename from src/meta/app/src/principal/task_run_ident.rs rename to src/meta/app/src/principal/task_message_ident.rs index 34faa0e3acdfc..c4360ebdfb67b 100644 --- a/src/meta/app/src/principal/task_run_ident.rs +++ b/src/meta/app/src/principal/task_message_ident.rs @@ -14,29 +14,29 @@ use crate::tenant_key::ident::TIdent; -pub type TaskRunIdent = TIdent; +pub type TaskMessageIdent = TIdent; -pub type TaskRunIdentRaw = TIdent; +pub type TaskMessageIdentRaw = TIdent; pub use kvapi_impl::Resource; mod kvapi_impl { use databend_common_meta_kvapi::kvapi; - use crate::principal::task_run_ident::TaskRunIdent; - use crate::principal::TaskRun; + use crate::principal::task::TaskMessage; + use crate::principal::task_message_ident::TaskMessageIdent; use crate::tenant_key::resource::TenantResource; pub struct Resource; impl TenantResource for Resource { - const PREFIX: &'static str = "__fd_task_runs"; - const TYPE: &'static str = "TaskRunIdent"; + const PREFIX: &'static str = "__fd_task_messages"; + const TYPE: &'static str = "TaskMessageIdent"; const HAS_TENANT: bool = true; - type ValueType = TaskRun; + type ValueType = TaskMessage; } - impl kvapi::Value for TaskRun { - type KeyType = TaskRunIdent; + impl kvapi::Value for TaskMessage { + type KeyType = TaskMessageIdent; fn dependency_keys(&self, _key: &Self::KeyType) -> impl IntoIterator { [] diff --git a/src/meta/cache/src/lib.rs b/src/meta/cache/src/lib.rs index d2f52b73d9942..efcc947f7469c 100644 --- a/src/meta/cache/src/lib.rs +++ b/src/meta/cache/src/lib.rs @@ -96,6 +96,6 @@ mod cache; mod meta_cache_types; -mod meta_client_source; +pub mod meta_client_source; pub use cache::Cache; diff --git a/src/meta/proto-conv/src/task_from_to_protobuf_impl.rs b/src/meta/proto-conv/src/task_from_to_protobuf_impl.rs index 614f757b09536..577317727b31c 100644 --- a/src/meta/proto-conv/src/task_from_to_protobuf_impl.rs +++ b/src/meta/proto-conv/src/task_from_to_protobuf_impl.rs @@ -17,6 +17,7 @@ use chrono::Utc; use databend_common_meta_app::principal as mt; use databend_common_meta_app::principal::task::Status; use databend_common_protos::pb; +use databend_common_protos::pb::task_message::Message; use crate::reader_check_msg; use crate::FromToProto; @@ -63,13 +64,12 @@ impl FromToProto for mt::Task { } } }; - let warehouse = match p.warehouse_options { - None => None, - Some(ref w) => Some(mt::WarehouseOptions { - warehouse: w.warehouse.clone(), - using_warehouse_size: w.using_warehouse_size.clone(), - }), - }; + + + let warehouse = p.warehouse_options.as_ref().map(|w| mt::WarehouseOptions { + warehouse: w.warehouse.clone(), + using_warehouse_size: w.using_warehouse_size.clone(), + }); Ok(Self { task_id: p.task_id, task_name: p.task_name, @@ -107,23 +107,17 @@ impl FromToProto for mt::Task { query_text: self.query_text.clone(), comment: self.comment.clone(), owner: self.owner.clone(), - schedule_options: match &self.schedule_options { - None => None, - Some(s) => Some(pb::ScheduleOptions { - interval: s.interval, - cron: s.cron.clone(), - time_zone: s.time_zone.clone(), - schedule_type: s.schedule_type as i32, - milliseconds_interval: s.milliseconds_interval, - }), - }, - warehouse_options: match &self.warehouse_options { - None => None, - Some(w) => Some(pb::WarehouseOptions { - warehouse: w.warehouse.clone(), - using_warehouse_size: w.using_warehouse_size.clone(), - }), - }, + schedule_options: self.schedule_options.as_ref().map(|s| pb::ScheduleOptions { + interval: s.interval, + cron: s.cron.clone(), + time_zone: s.time_zone.clone(), + schedule_type: s.schedule_type as i32, + milliseconds_interval: s.milliseconds_interval, + }), + warehouse_options: self.warehouse_options.as_ref().map(|w| pb::WarehouseOptions { + warehouse: w.warehouse.clone(), + using_warehouse_size: w.using_warehouse_size.clone(), + }), next_scheduled_at: match &self.next_scheduled_at { None => None, Some(d) => Some(d.to_pb()?), @@ -145,54 +139,41 @@ impl FromToProto for mt::Task { } } -impl FromToProto for mt::TaskRun { - type PB = pb::TaskRun; +impl FromToProto for mt::TaskMessage { + type PB = pb::TaskMessage; fn get_pb_ver(p: &Self::PB) -> u64 { p.ver } fn from_pb(p: Self::PB) -> Result where Self: Sized { - Ok(mt::TaskRun { - task: mt::Task::from_pb( - p.task - .ok_or_else(|| Incompatible::new("State can not be empty"))?, - )?, - run_id: p.run_id, - attempt_number: p.attempt_number, - state: match p.state { - 0 => mt::State::Scheduled, - 1 => mt::State::Executing, - 2 => mt::State::Succeeded, - 3 => mt::State::Failed, - 4 => mt::State::Cancelled, - n => return Err(Incompatible::new(format!("State can not be {n}"))), + Ok(match p.message { + None => return Err(Incompatible::new("message can not be empty")), + Some(message) => match message { + Message::ExecuteTask(task) => { + mt::TaskMessage::ExecuteTask(mt::Task::from_pb(task)?) + } + Message::ScheduleTask(task) => { + mt::TaskMessage::ScheduleTask(mt::Task::from_pb(task)?) + } + Message::DeleteTask(task_name) => mt::TaskMessage::DeleteTask(task_name), + Message::AfterTask(task) => mt::TaskMessage::AfterTask(mt::Task::from_pb(task)?), }, - scheduled_at: DateTime::::from_pb(p.scheduled_at)?, - completed_at: p.completed_at.map(DateTime::::from_pb).transpose()?, - error_code: p.error_code, - error_message: p.error_message, - root_task_id: p.root_task_id, }) } fn to_pb(&self) -> Result { - Ok(pb::TaskRun { + let message = match self { + mt::TaskMessage::ExecuteTask(task) => Message::ExecuteTask(task.to_pb()?), + mt::TaskMessage::ScheduleTask(task) => Message::ScheduleTask(task.to_pb()?), + mt::TaskMessage::DeleteTask(task_name) => Message::DeleteTask(task_name.clone()), + mt::TaskMessage::AfterTask(task) => Message::AfterTask(task.to_pb()?), + }; + + Ok(pb::TaskMessage { ver: VER, min_reader_ver: MIN_READER_VER, - run_id: self.run_id, - attempt_number: self.attempt_number, - state: self.state as i32, - scheduled_at: self.scheduled_at.to_pb()?, - completed_at: self - .completed_at - .as_ref() - .map(DateTime::::to_pb) - .transpose()?, - error_code: self.error_code, - error_message: self.error_message.clone(), - root_task_id: self.root_task_id, - task: Some(self.task.to_pb()?), + message: Some(message), }) } } diff --git a/src/meta/protos/proto/task.proto b/src/meta/protos/proto/task.proto index 2947efdc53b11..157023fe4d36a 100644 --- a/src/meta/protos/proto/task.proto +++ b/src/meta/protos/proto/task.proto @@ -33,26 +33,16 @@ message ScheduleOptions { optional uint64 milliseconds_interval = 5; // milliseconds level interval } -message TaskRun { +message TaskMessage { uint64 ver = 100; uint64 min_reader_ver = 101; - enum State { - Scheduled = 0; - Executing = 1; - Succeeded = 2; - Failed = 3; - Cancelled = 4; + oneof message { + Task execute_task = 1; + Task schedule_task = 2; + string delete_task = 3; + Task after_task = 4; } - uint64 run_id = 1; - int32 attempt_number = 2; - State state = 3; - string scheduled_at = 4; - optional string completed_at = 5; - int64 error_code = 6; - optional string error_message = 7; - uint64 root_task_id = 8; - Task task = 9; } message Task { diff --git a/src/query/management/src/lib.rs b/src/query/management/src/lib.rs index bfabce3a9eaa1..d336a44a9e10c 100644 --- a/src/query/management/src/lib.rs +++ b/src/query/management/src/lib.rs @@ -51,6 +51,7 @@ pub use serde::serialize_struct; pub use setting::SettingMgr; pub use stage::StageApi; pub use stage::StageMgr; +pub use task::TaskMgr; pub use user::UserApi; pub use user::UserMgr; pub use warehouse::SelectedNode; diff --git a/src/query/management/src/task/mod.rs b/src/query/management/src/task/mod.rs index 1d254702fc89a..1820c819ad612 100644 --- a/src/query/management/src/task/mod.rs +++ b/src/query/management/src/task/mod.rs @@ -17,6 +17,4 @@ mod task_mgr; pub use errors::TaskApiError; pub use errors::TaskError; -pub use task_mgr::TaskChannel; -pub use task_mgr::TaskMessage; pub use task_mgr::TaskMgr; diff --git a/src/query/management/src/task/task_mgr.rs b/src/query/management/src/task/task_mgr.rs index 96fef417d031b..fd4c558f23493 100644 --- a/src/query/management/src/task/task_mgr.rs +++ b/src/query/management/src/task/task_mgr.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::ops::Deref; use std::str::FromStr; use std::sync::Arc; @@ -21,71 +20,35 @@ use chrono_tz::Tz; use cron::Schedule; use databend_common_ast::ast::AlterTaskOptions; use databend_common_ast::ast::ScheduleOptions; -use databend_common_base::base::tokio::sync::mpsc; -use databend_common_base::base::GlobalInstance; -use databend_common_exception::ErrorCode; +use databend_common_meta_api::kv_pb_api::errors::PbApiReadError; use databend_common_meta_api::kv_pb_api::KVPbApi; use databend_common_meta_api::kv_pb_api::UpsertPB; use databend_common_meta_app::principal::task; -use databend_common_meta_app::principal::task_run_ident::TaskRunIdent; +use databend_common_meta_app::principal::task::TaskMessage; +use databend_common_meta_app::principal::task_message_ident::TaskMessageIdent; use databend_common_meta_app::principal::ScheduleType; use databend_common_meta_app::principal::Status; use databend_common_meta_app::principal::Task; use databend_common_meta_app::principal::TaskIdent; -use databend_common_meta_app::principal::TaskRun; use databend_common_meta_app::schema::CreateOption; use databend_common_meta_app::tenant::Tenant; use databend_common_meta_kvapi::kvapi; use databend_common_meta_kvapi::kvapi::DirName; +use databend_common_meta_kvapi::kvapi::Key; use databend_common_meta_types::MatchSeq; use databend_common_meta_types::MetaError; use databend_common_meta_types::With; -use futures::StreamExt; use futures::TryStreamExt; use crate::task::errors::TaskApiError; use crate::task::errors::TaskError; -pub enum TaskMessage { - ExecuteTask(Task), - ScheduleTask(Task), - DeleteTask(String), - AfterTask(Task), -} - -pub struct TaskChannel { - tx: mpsc::Sender, -} - #[derive(Clone)] pub struct TaskMgr { kv_api: Arc>, tenant: Tenant, } -impl TaskChannel { - pub fn instance() -> Arc { - GlobalInstance::get() - } -} - -impl Deref for TaskChannel { - type Target = mpsc::Sender; - - fn deref(&self) -> &Self::Target { - &self.tx - } -} - -impl TaskChannel { - pub fn init(len: usize) -> Result, ErrorCode> { - let (tx, rx) = mpsc::channel(len); - - GlobalInstance::set(Arc::new(TaskChannel { tx })); - Ok(rx) - } -} - impl TaskMgr { pub fn create(kv_api: Arc>, tenant: &Tenant) -> Self { TaskMgr { @@ -113,65 +76,6 @@ impl TaskMgr { .await } - async fn create_task_inner( - &self, - task: Task, - create_option: &CreateOption, - without_schedule: bool, - ) -> Result, TaskApiError> { - assert!(task.after.is_empty() || task.schedule_options.is_none()); - // check - if let Some(schedule_options) = &task.schedule_options { - match schedule_options.schedule_type { - ScheduleType::IntervalType => (), - ScheduleType::CronType => { - if let Err(e) = schedule_options.time_zone.as_ref().unwrap().parse::() { - return Ok(Err(TaskError::InvalidTimezone { - tenant: self.tenant.tenant_name().to_string(), - name: task.task_name.to_string(), - reason: e.to_string(), - })); - } - if let Err(e) = Schedule::from_str(schedule_options.cron.as_ref().unwrap()) { - return Ok(Err(TaskError::InvalidCron { - tenant: self.tenant.tenant_name().to_string(), - name: task.task_name.to_string(), - reason: e.to_string(), - })); - } - } - } - } - - let seq = MatchSeq::from(*create_option); - - let key = TaskIdent::new(&self.tenant, &task.task_name); - let req = UpsertPB::insert(key, task.clone()).with(seq); - let res = self.kv_api.upsert_pb(&req).await?; - - if let CreateOption::Create = create_option { - if res.prev.is_some() { - let err = TaskError::Exists { - tenant: self.tenant.tenant_name().to_string(), - name: task.task_name.to_string(), - reason: "".to_string(), - }; - return Ok(Err(err)); - } - } - if !task.after.is_empty() { - let _ = TaskChannel::instance() - .send(TaskMessage::AfterTask(task)) - .await; - } else if task.schedule_options.is_some() && !without_schedule { - let _ = TaskChannel::instance() - .send(TaskMessage::ScheduleTask(task)) - .await; - } - - Ok(Ok(())) - } - #[async_backtrace::framed] #[fastrace::trace] pub async fn execute_task( @@ -186,9 +90,8 @@ impl TaskMgr { context: "while execute task".to_string(), })); }; - let _ = TaskChannel::instance() - .send(TaskMessage::ExecuteTask(Task::clone(&task))) - .await; + self.send(TaskMessage::ExecuteTask(Task::clone(&task))) + .await?; Ok(Ok(())) } @@ -230,7 +133,7 @@ impl TaskMgr { task.schedule_options = schedule.clone().map(Self::make_schedule_options); task.comment = comments.clone(); task.warehouse_options = Some(Self::make_warehouse_options(warehouse.clone())); - task.suspend_task_after_num_failures = suspend_task_after_num_failures.clone(); + task.suspend_task_after_num_failures = *suspend_task_after_num_failures; task.error_integration = error_integration.clone(); if let Some(session_parameters) = session_parameters { task.session_params = session_parameters.clone(); @@ -253,7 +156,7 @@ impl TaskMgr { })); } for after in afters { - if task.after.contains(&after) { + if task.after.contains(after) { continue; } task.after.push(after.clone()); @@ -295,6 +198,7 @@ impl TaskMgr { Ok(Ok(task.map(|task| Task::clone(&task)))) } + #[allow(clippy::useless_asref)] #[async_backtrace::framed] #[fastrace::trace] pub async fn drop_task(&self, task_name: &str) -> Result, MetaError> { @@ -336,6 +240,86 @@ impl TaskMgr { Ok(tasks) } + #[async_backtrace::framed] + #[fastrace::trace] + pub async fn send(&self, message: TaskMessage) -> Result<(), MetaError> { + let key = TaskMessageIdent::new(&self.tenant, message.key()); + let seq = MatchSeq::from(CreateOption::CreateOrReplace); + + let req = UpsertPB::insert(key, message).with(seq); + let _ = self.kv_api.upsert_pb(&req).await?; + + Ok(()) + } + + /// mark the corresponding execute task as accepted and delete it from the queue + #[async_backtrace::framed] + #[fastrace::trace] + pub async fn execute_accept(&self, key: &str) -> Result<(), MetaError> { + let key = TaskMessageIdent::from_str_key(key).map_err(PbApiReadError::from)?; + + let req = UpsertPB::delete(key).with(MatchSeq::GE(1)); + let _ = self.kv_api.upsert_pb(&req).await?; + + Ok(()) + } + + async fn create_task_inner( + &self, + task: Task, + create_option: &CreateOption, + without_schedule: bool, + ) -> Result, TaskApiError> { + assert!(task.after.is_empty() || task.schedule_options.is_none()); + // check + if let Some(schedule_options) = &task.schedule_options { + match schedule_options.schedule_type { + ScheduleType::IntervalType => (), + ScheduleType::CronType => { + if let Err(e) = schedule_options.time_zone.as_ref().unwrap().parse::() { + return Ok(Err(TaskError::InvalidTimezone { + tenant: self.tenant.tenant_name().to_string(), + name: task.task_name.to_string(), + reason: e.to_string(), + })); + } + if let Err(e) = Schedule::from_str(schedule_options.cron.as_ref().unwrap()) { + return Ok(Err(TaskError::InvalidCron { + tenant: self.tenant.tenant_name().to_string(), + name: task.task_name.to_string(), + reason: e.to_string(), + })); + } + } + } + } + + let seq = MatchSeq::from(*create_option); + + let key = TaskIdent::new(&self.tenant, &task.task_name); + let req = UpsertPB::insert(key, task.clone()).with(seq); + let res = self.kv_api.upsert_pb(&req).await?; + + if let CreateOption::Create = create_option { + if res.prev.is_some() { + let err = TaskError::Exists { + tenant: self.tenant.tenant_name().to_string(), + name: task.task_name.to_string(), + reason: "".to_string(), + }; + return Ok(Err(err)); + } + } + if !task.after.is_empty() { + self.send(TaskMessage::AfterTask(task)).await?; + } else if task.schedule_options.is_some() && !without_schedule { + self.send(TaskMessage::ScheduleTask(task)) + .await?; + } + + Ok(Ok(())) + } + pub fn make_schedule_options(opt: ScheduleOptions) -> task::ScheduleOptions { match opt { ScheduleOptions::IntervalSecs(secs, ms) => { diff --git a/src/query/service/src/global_services.rs b/src/query/service/src/global_services.rs index 43d8393ffe46d..a991904ad1483 100644 --- a/src/query/service/src/global_services.rs +++ b/src/query/service/src/global_services.rs @@ -27,7 +27,6 @@ use databend_common_config::InnerConfig; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_exception::StackTrace; -use databend_common_management::task::TaskChannel; use databend_common_management::WorkloadGroupResourceManager; use databend_common_management::WorkloadMgr; use databend_common_meta_app::schema::CatalogType; @@ -57,7 +56,7 @@ use crate::servers::http::v1::ClientSessionManager; use crate::servers::http::v1::HttpQueryManager; use crate::sessions::QueriesQueueManager; use crate::sessions::SessionManager; -use crate::task::task_service::TaskService; +use crate::task::service::TaskService; pub struct GlobalServices; @@ -123,7 +122,6 @@ impl GlobalServices { LockManager::init()?; AuthMgr::init(config)?; - let task_rx = TaskChannel::init(GlobalConfig::instance().query.tasks_channel_len)?; // Init user manager. // Builtin users and udfs are created here. { @@ -177,7 +175,7 @@ impl GlobalServices { if config.log.history.on { GlobalHistoryLog::init(config).await?; } - TaskService::init(task_rx, config)?; + TaskService::init(config)?; GLOBAL_QUERIES_MANAGER.set_gc_handle(memory_gc_handle); diff --git a/src/query/service/src/task/meta.rs b/src/query/service/src/task/meta.rs index d4215d9538263..364ce3b0f8acd 100644 --- a/src/query/service/src/task/meta.rs +++ b/src/query/service/src/task/meta.rs @@ -67,6 +67,10 @@ impl TaskMetaHandle { } } + pub fn meta_client(&self) -> &Arc { + &self.meta_client + } + pub async fn acquire(&self, meta_key: &str, interval: u64) -> Result> { let acquired_guard = Semaphore::new_acquired( self.meta_client.clone(), diff --git a/src/query/service/src/task/mod.rs b/src/query/service/src/task/mod.rs index a6d9ab8d99131..ca4084fdfc1d8 100644 --- a/src/query/service/src/task/mod.rs +++ b/src/query/service/src/task/mod.rs @@ -13,7 +13,7 @@ // limitations under the License. mod meta; +pub mod service; mod session; -pub mod task_service; -pub use task_service::TaskService; +pub use service::TaskService; diff --git a/src/query/service/src/task/task_service.rs b/src/query/service/src/task/service.rs similarity index 91% rename from src/query/service/src/task/task_service.rs rename to src/query/service/src/task/service.rs index 96f3c3ac889b0..e7f29a9523c6b 100644 --- a/src/query/service/src/task/task_service.rs +++ b/src/query/service/src/task/service.rs @@ -14,6 +14,7 @@ use std::collections::BTreeMap; use std::collections::HashMap; +use std::ops::Deref; use std::str::FromStr; use std::sync::atomic::AtomicBool; use std::sync::atomic::Ordering; @@ -33,10 +34,11 @@ use databend_common_config::InnerConfig; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::DataBlock; -use databend_common_management::task::TaskChannel; -use databend_common_management::task::TaskMessage; +use databend_common_meta_api::kv_pb_api::decode_seqv; use databend_common_meta_app::principal::task::AfterTaskInfo; +use databend_common_meta_app::principal::task::TaskMessage; use databend_common_meta_app::principal::task::EMPTY_TASK_ID; +use databend_common_meta_app::principal::task_message_ident::TaskMessageIdent; use databend_common_meta_app::principal::AfterTaskState; use databend_common_meta_app::principal::ScheduleOptions; use databend_common_meta_app::principal::ScheduleType; @@ -49,13 +51,18 @@ use databend_common_meta_app::principal::UserInfo; use databend_common_meta_app::principal::WarehouseOptions; use databend_common_meta_app::tenant::Tenant; use databend_common_meta_client::MetaGrpcClient; +use databend_common_meta_kvapi::kvapi::Key; +use databend_common_meta_types::protobuf::WatchRequest; +use databend_common_meta_types::protobuf::WatchResponse; +use databend_common_meta_types::MetaError; use databend_common_sql::Planner; use databend_common_users::UserApiProvider; use databend_common_users::BUILTIN_ROLE_ACCOUNT_ADMIN; use futures_util::lock::Mutex; +use futures_util::stream::BoxStream; use futures_util::TryStreamExt; -use tokio::sync::mpsc::Receiver; use tokio::time::sleep; +use tokio_stream::StreamExt; use tokio_util::sync::CancellationToken; use crate::interpreters::InterpreterFactory; @@ -64,10 +71,15 @@ use crate::task::meta::TaskMetaHandle; use crate::task::session::create_session; use crate::task::session::get_task_user; +pub type TaskMessageStream = BoxStream<'static, Result<(String, TaskMessage)>>; + +/// - Multiple Query nodes can send the same Task at the same time, and the key will be distinguished by node_id +/// - Each Query node will grab the corresponding task through acquire task_name. +/// - Tasks with the same task_name cannot be executed at the same time. pub struct TaskService { initialized: AtomicBool, interval: u64, - tenant_id: String, + tenant: Tenant, node_id: String, cluster_id: String, meta_handle: TaskMetaHandle, @@ -80,7 +92,7 @@ impl TaskService { } pub async fn prepare(&self) -> Result<()> { - let prepare_key = format!("{}/task_run_prepare/lock", self.tenant_id); + let prepare_key = format!("{}/task_run_prepare/lock", self.tenant.tenant_name()); let _guard = self.meta_handle.acquire(&prepare_key, 0).await?; let create_db = "CREATE DATABASE IF NOT EXISTS system_task"; self.execute_sql(None, create_db).await?; @@ -128,7 +140,7 @@ impl TaskService { self.initialized.store(true, Ordering::SeqCst); } - pub fn init(mut task_rx: Receiver, cfg: &InnerConfig) -> Result<()> { + pub fn init(cfg: &InnerConfig) -> Result<()> { let tenant = cfg.query.tenant_id.clone(); let meta_client = MetaGrpcClient::try_new(&cfg.meta.to_meta_grpc_client_conf()) .map_err(|_e| ErrorCode::Internal("Create MetaClient failed for Task"))?; @@ -141,7 +153,7 @@ impl TaskService { let instance = TaskService { initialized: AtomicBool::new(false), interval: 2, - tenant_id: cfg.query.tenant_id.tenant_name().to_string(), + tenant: cfg.query.tenant_id.clone(), node_id: cfg.query.node_id.clone(), cluster_id: cfg.query.cluster_id.clone(), meta_handle, @@ -172,7 +184,10 @@ impl TaskService { // after task state => [task1 name, task2 name], if task succeeded then remove task name on after task state let task_deps = Arc::new(Mutex::new(HashMap::::new())); - while let Some(task) = task_rx.recv().await { + let mut steam = task_service.subscribe().await?; + + while let Some(result) = steam.next().await { + let (key, task) = result?; match task { TaskMessage::ScheduleTask(mut task) => { debug_assert!(task.schedule_options.is_some()); @@ -218,7 +233,7 @@ impl TaskService { loop { tokio::select! { _ = sleep(duration) => { - let _ = TaskChannel::instance().send(TaskMessage::ExecuteTask(task.clone())).await; + task_mgr.send(TaskMessage::ExecuteTask(task.clone())).await?; } _ = child_token.cancelled() => { break; @@ -230,6 +245,7 @@ impl TaskService { }); } ScheduleType::CronType => { + let task_mgr = task_mgr.clone(); // SAFETY: check on CreateTask let cron_expr = schedule_options.cron.as_ref().unwrap(); let tz = schedule_options.time_zone.as_ref().unwrap().parse::().unwrap(); @@ -248,13 +264,14 @@ impl TaskService { task.next_scheduled_at = Some(Utc::now() + duration); tokio::select! { _ = sleep(duration) => { - let _ = TaskChannel::instance().send(TaskMessage::ExecuteTask(task.clone())).await; + task_mgr.send(TaskMessage::ExecuteTask(task.clone())).await?; } _ = child_token.cancelled() => { break; } } } + Result::Ok(()) }); } } @@ -262,6 +279,13 @@ impl TaskService { } } TaskMessage::ExecuteTask(task) => { + let may_permit = task_service + .meta_handle + .acquire_with_guard(&format!("{}/lock", key), task_service.interval) + .await?; + let Some(_guard) = may_permit else { + continue; + }; let task_name = task.task_name.clone(); let task_service = TaskService::instance(); @@ -286,6 +310,7 @@ impl TaskService { let task_dep_infos = task_dep_infos.clone(); let task_deps = task_deps.clone(); let owner = Self::get_task_owner(&task, &tenant).await?; + task_mgr.execute_accept(&key).await?; runtime .try_spawn(async move { while task_run.attempt_number >= 0 { @@ -307,7 +332,7 @@ impl TaskService { let dep_task = task_mgr.describe_task(dep_name).await?? .ok_or_else(|| ErrorCode::UnknownTask(dep_name.clone()))?; - let _ = TaskChannel::instance().send(TaskMessage::ExecuteTask(dep_task)).await; + task_mgr.send(TaskMessage::ExecuteTask(dep_task)).await?; } } } @@ -389,6 +414,37 @@ impl TaskService { Ok(()) } + pub async fn subscribe(&self) -> Result { + let (min, max) = TaskMessage::prefix_range(); + let left = TaskMessageIdent::new(&self.tenant, min).to_string_key(); + let right = TaskMessageIdent::new(&self.tenant, max).to_string_key(); + + let watch = WatchRequest::new(left, Some(right)).with_initial_flush(true); + let stream = self + .meta_handle + .meta_client() + .watch_with_initialization(watch) + .await?; + + Ok(Box::pin(stream.filter_map(|result| { + result + .map(Self::decode) + .map_err(|_| ErrorCode::MetaServiceError("task watch-stream closed")) + .flatten() + .transpose() + }))) + } + + fn decode(resp: WatchResponse) -> Result> { + let Some((key, _, Some(value))) = resp.unpack() else { + return Ok(None); + }; + let message = decode_seqv::(value, || format!("decode value of {}", key)) + .map_err(MetaError::from)?; + + Ok(Some((key, TaskMessage::clone(message.deref())))) + } + async fn get_task_owner(task: &Task, tenant: &Tenant) -> Result { UserApiProvider::instance() .get_user( @@ -444,7 +500,7 @@ impl TaskService { (other_user, None) } else { ( - get_task_user(&self.tenant_id, &self.cluster_id), + get_task_user(self.tenant.tenant_name(), &self.cluster_id), Some(BUILTIN_ROLE_ACCOUNT_ADMIN.to_string()), ) }; diff --git a/src/query/users/src/user_api.rs b/src/query/users/src/user_api.rs index 15967fd738178..9151644e12c6b 100644 --- a/src/query/users/src/user_api.rs +++ b/src/query/users/src/user_api.rs @@ -23,8 +23,6 @@ use databend_common_config::GlobalConfig; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_grpc::RpcClientConf; -use databend_common_management::task::TaskChannel; -use databend_common_management::task::TaskMessage; use databend_common_management::task::TaskMgr; use databend_common_management::udf::UdfMgr; use databend_common_management::ClientSessionMgr; @@ -41,21 +39,17 @@ use databend_common_management::StageApi; use databend_common_management::StageMgr; use databend_common_management::UserApi; use databend_common_management::UserMgr; -use databend_common_meta_api::kv_pb_api::KVPbApi; use databend_common_meta_app::principal::AuthInfo; use databend_common_meta_app::principal::RoleInfo; -use databend_common_meta_app::principal::TaskIdent; use databend_common_meta_app::principal::UserDefinedFunction; use databend_common_meta_app::tenant::Tenant; use databend_common_meta_app::tenant::TenantQuota; use databend_common_meta_cache::Cache; use databend_common_meta_kvapi::kvapi; -use databend_common_meta_kvapi::kvapi::DirName; use databend_common_meta_store::MetaStore; use databend_common_meta_store::MetaStoreProvider; use databend_common_meta_types::MatchSeq; use databend_common_meta_types::MetaError; -use futures::stream::StreamExt; use log::debug; use crate::builtin::BuiltIn; @@ -144,22 +138,6 @@ impl UserApiProvider { user_mgr.add_role(tenant, public, true).await?; } - { - let task_tx = TaskChannel::instance(); - let key = DirName::new(TaskIdent::new(tenant, "")); - let mut stream = user_mgr.client.list_pb_values(&key).await?; - - while let Some(task) = stream.next().await { - let task = task?; - - if task.schedule_options.is_some() { - let _ = task_tx.send(TaskMessage::ScheduleTask(task)).await; - } else if !task.after.is_empty() { - let _ = task_tx.send(TaskMessage::AfterTask(task)).await; - } - } - } - Ok(Arc::new(user_mgr)) } From c42aac3c92dc4182b8c90d0db503007063f46775 Mon Sep 17 00:00:00 2001 From: Kould Date: Fri, 11 Jul 2025 13:29:32 +0800 Subject: [PATCH 04/25] refactor: Using SQL to simplify DAG --- src/meta/app/src/principal/mod.rs | 1 - src/meta/app/src/principal/task.rs | 34 --- .../src/task_from_to_protobuf_impl.rs | 14 +- src/query/management/src/task/task_mgr.rs | 5 +- src/query/service/src/task/meta.rs | 6 +- src/query/service/src/task/service.rs | 199 +++++++++++------- src/query/service/src/task/session.rs | 4 +- 7 files changed, 139 insertions(+), 124 deletions(-) diff --git a/src/meta/app/src/principal/mod.rs b/src/meta/app/src/principal/mod.rs index 51361d1c036df..b0402f5eefe27 100644 --- a/src/meta/app/src/principal/mod.rs +++ b/src/meta/app/src/principal/mod.rs @@ -89,7 +89,6 @@ pub use role_info::RoleInfo; pub use role_info::RoleInfoSerdeError; pub use stage_file_ident::StageFileIdent; pub use stage_file_path::StageFilePath; -pub use task::AfterTaskState; pub use task::ScheduleOptions; pub use task::ScheduleType; pub use task::State; diff --git a/src/meta/app/src/principal/task.rs b/src/meta/app/src/principal/task.rs index 2dc73cf277512..6929e3b3dcc34 100644 --- a/src/meta/app/src/principal/task.rs +++ b/src/meta/app/src/principal/task.rs @@ -13,8 +13,6 @@ // limitations under the License. use std::collections::BTreeMap; -use std::collections::HashSet; -use std::sync::Arc; use chrono::DateTime; use chrono::Utc; @@ -138,35 +136,3 @@ impl TaskMessage { (0, 1) } } - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct AfterTaskInfo { - pub afters: Arc>, -} - -pub struct AfterTaskState { - waiting: HashSet, -} - -impl From<&Task> for AfterTaskInfo { - fn from(value: &Task) -> Self { - AfterTaskInfo { - afters: Arc::new(value.after.clone()), - } - } -} - -impl AfterTaskState { - pub fn completed_task(&mut self, task_name: &str) -> bool { - self.waiting.remove(task_name); - self.waiting.is_empty() - } -} - -impl From<&AfterTaskInfo> for AfterTaskState { - fn from(value: &AfterTaskInfo) -> Self { - Self { - waiting: HashSet::from_iter(value.afters.to_vec()), - } - } -} diff --git a/src/meta/proto-conv/src/task_from_to_protobuf_impl.rs b/src/meta/proto-conv/src/task_from_to_protobuf_impl.rs index 577317727b31c..bc8ea4558330d 100644 --- a/src/meta/proto-conv/src/task_from_to_protobuf_impl.rs +++ b/src/meta/proto-conv/src/task_from_to_protobuf_impl.rs @@ -64,8 +64,7 @@ impl FromToProto for mt::Task { } } }; - - + let warehouse = p.warehouse_options.as_ref().map(|w| mt::WarehouseOptions { warehouse: w.warehouse.clone(), using_warehouse_size: w.using_warehouse_size.clone(), @@ -114,10 +113,13 @@ impl FromToProto for mt::Task { schedule_type: s.schedule_type as i32, milliseconds_interval: s.milliseconds_interval, }), - warehouse_options: self.warehouse_options.as_ref().map(|w| pb::WarehouseOptions { - warehouse: w.warehouse.clone(), - using_warehouse_size: w.using_warehouse_size.clone(), - }), + warehouse_options: self + .warehouse_options + .as_ref() + .map(|w| pb::WarehouseOptions { + warehouse: w.warehouse.clone(), + using_warehouse_size: w.using_warehouse_size.clone(), + }), next_scheduled_at: match &self.next_scheduled_at { None => None, Some(d) => Some(d.to_pb()?), diff --git a/src/query/management/src/task/task_mgr.rs b/src/query/management/src/task/task_mgr.rs index fd4c558f23493..ddb74c1de1ed2 100644 --- a/src/query/management/src/task/task_mgr.rs +++ b/src/query/management/src/task/task_mgr.rs @@ -206,6 +206,8 @@ impl TaskMgr { let req = UpsertPB::delete(key).with(MatchSeq::GE(1)); let res = self.kv_api.upsert_pb(&req).await?; + self.send(TaskMessage::DeleteTask(task_name.to_string())) + .await?; if res.is_changed() { Ok(res.prev.as_ref().map(|prev| Task::clone(prev))) } else { @@ -313,8 +315,7 @@ impl TaskMgr { if !task.after.is_empty() { self.send(TaskMessage::AfterTask(task)).await?; } else if task.schedule_options.is_some() && !without_schedule { - self.send(TaskMessage::ScheduleTask(task)) - .await?; + self.send(TaskMessage::ScheduleTask(task)).await?; } Ok(Ok(())) diff --git a/src/query/service/src/task/meta.rs b/src/query/service/src/task/meta.rs index 364ce3b0f8acd..8f19a9f9e9306 100644 --- a/src/query/service/src/task/meta.rs +++ b/src/query/service/src/task/meta.rs @@ -71,7 +71,7 @@ impl TaskMetaHandle { &self.meta_client } - pub async fn acquire(&self, meta_key: &str, interval: u64) -> Result> { + pub async fn acquire(&self, meta_key: &str, interval_millis: u64) -> Result> { let acquired_guard = Semaphore::new_acquired( self.meta_client.clone(), meta_key, @@ -81,7 +81,7 @@ impl TaskMetaHandle { ) .await .map_err(|_e| "acquire semaphore failed from TaskService")?; - if interval == 0 { + if interval_millis == 0 { return Ok(Some(acquired_guard)); } if match self @@ -92,7 +92,7 @@ impl TaskMetaHandle { Some(v) => { let last: u64 = serde_json::from_slice(&v.data)?; chrono::Utc::now().timestamp_millis() as u64 - - Duration::from_secs(interval).as_millis() as u64 + - Duration::from_millis(interval_millis).as_millis() as u64 > last } None => true, diff --git a/src/query/service/src/task/service.rs b/src/query/service/src/task/service.rs index e7f29a9523c6b..22b038578b4c2 100644 --- a/src/query/service/src/task/service.rs +++ b/src/query/service/src/task/service.rs @@ -21,6 +21,7 @@ use std::sync::atomic::Ordering; use std::sync::Arc; use std::time::Duration; +use async_stream::stream; use chrono::DateTime; use chrono::Utc; use chrono_tz::Tz; @@ -35,11 +36,9 @@ use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::DataBlock; use databend_common_meta_api::kv_pb_api::decode_seqv; -use databend_common_meta_app::principal::task::AfterTaskInfo; use databend_common_meta_app::principal::task::TaskMessage; use databend_common_meta_app::principal::task::EMPTY_TASK_ID; use databend_common_meta_app::principal::task_message_ident::TaskMessageIdent; -use databend_common_meta_app::principal::AfterTaskState; use databend_common_meta_app::principal::ScheduleOptions; use databend_common_meta_app::principal::ScheduleType; use databend_common_meta_app::principal::State; @@ -58,14 +57,16 @@ use databend_common_meta_types::MetaError; use databend_common_sql::Planner; use databend_common_users::UserApiProvider; use databend_common_users::BUILTIN_ROLE_ACCOUNT_ADMIN; -use futures_util::lock::Mutex; +use futures::Stream; use futures_util::stream::BoxStream; use futures_util::TryStreamExt; +use itertools::Itertools; use tokio::time::sleep; use tokio_stream::StreamExt; use tokio_util::sync::CancellationToken; use crate::interpreters::InterpreterFactory; +use crate::schedulers::ServiceQueryExecutor; use crate::sessions::QueryContext; use crate::task::meta::TaskMetaHandle; use crate::task::session::create_session; @@ -97,7 +98,7 @@ impl TaskService { let create_db = "CREATE DATABASE IF NOT EXISTS system_task"; self.execute_sql(None, create_db).await?; - let create_table = "CREATE TABLE IF NOT EXISTS system_task.task_run(\ + let create_task_run_table = "CREATE TABLE IF NOT EXISTS system_task.task_run(\ task_id UINT64,\ task_name TEXT NOT NULL,\ query_text TEXT NOT NULL,\ @@ -131,7 +132,13 @@ impl TaskService { last_suspended_at TIMESTAMP,\ suspend_task_after_num_failures INTEGER\ );"; - self.execute_sql(None, create_table).await?; + self.execute_sql(None, create_task_run_table).await?; + + let create_task_after_table = "CREATE TABLE IF NOT EXISTS system_task.task_after(\ + task_name STRING NOT NULL,\ + next_task STRING NOT NULL\ + );"; + self.execute_sql(None, create_task_after_table).await?; Ok(()) } @@ -152,7 +159,7 @@ impl TaskService { let instance = TaskService { initialized: AtomicBool::new(false), - interval: 2, + interval: 200, tenant: cfg.query.tenant_id.clone(), node_id: cfg.query.node_id.clone(), cluster_id: cfg.query.cluster_id.clone(), @@ -175,20 +182,19 @@ impl TaskService { let mut scheduled_tasks: HashMap = HashMap::new(); let task_mgr = UserApiProvider::instance().task_api(&tenant); - // If `task_c` is defined as `AFTER task_a, task_b`, then: - // - task_after_infos["task_c"] = AfterTaskInfo { afters: ["task_a, task_b"] } - let mut task_after_infos = HashMap::::new(); - // - task_dep_infos["task_a"]["task_c"] = AfterTaskInfo { ... } - // - task_dep_infos["task_b"]["task_c"] = AfterTaskInfo { ... } - let mut task_dep_infos = HashMap::>::new(); - // after task state => [task1 name, task2 name], if task succeeded then remove task name on after task state - let task_deps = Arc::new(Mutex::new(HashMap::::new())); - let mut steam = task_service.subscribe().await?; + let fn_lock = async |task_service: &TaskService, key: &str| { + task_service + .meta_handle + .acquire_with_guard(&format!("{}/lock", key), task_service.interval) + .await + }; + while let Some(result) = steam.next().await { - let (key, task) = result?; - match task { + let (task_key, task_message) = result?; + match task_message { + // ScheduleTask is always monitored by all Query nodes, and ExecuteTask is sent serially to avoid repeated sending. TaskMessage::ScheduleTask(mut task) => { debug_assert!(task.schedule_options.is_some()); if let Some(schedule_options) = &task.schedule_options { @@ -204,6 +210,7 @@ impl TaskService { let token = CancellationToken::new(); let child_token = token.child_token(); let task_name = task.task_name.to_string(); + let task_name_clone = task_name.clone(); let task_service = TaskService::instance(); task_service.update_or_create_task_run(&TaskRun { @@ -233,6 +240,9 @@ impl TaskService { loop { tokio::select! { _ = sleep(duration) => { + let Some(_guard) = fn_lock(&task_service, &task_name).await? else { + continue; + }; task_mgr.send(TaskMessage::ExecuteTask(task.clone())).await?; } _ = child_token.cancelled() => { @@ -250,6 +260,7 @@ impl TaskService { let cron_expr = schedule_options.cron.as_ref().unwrap(); let tz = schedule_options.time_zone.as_ref().unwrap().parse::().unwrap(); let schedule = Schedule::from_str(cron_expr).unwrap(); + let task_name = task_name.clone(); runtime .spawn(async move { @@ -264,6 +275,9 @@ impl TaskService { task.next_scheduled_at = Some(Utc::now() + duration); tokio::select! { _ = sleep(duration) => { + let Some(_guard) = fn_lock(&task_service, &task_name).await? else { + continue; + }; task_mgr.send(TaskMessage::ExecuteTask(task.clone())).await?; } _ = child_token.cancelled() => { @@ -275,15 +289,11 @@ impl TaskService { }); } } - let _ = scheduled_tasks.insert(task_name, token); + let _ = scheduled_tasks.insert(task_name_clone, token); } } TaskMessage::ExecuteTask(task) => { - let may_permit = task_service - .meta_handle - .acquire_with_guard(&format!("{}/lock", key), task_service.interval) - .await?; - let Some(_guard) = may_permit else { + let Some(_guard) = fn_lock(&task_service, &task.task_name).await? else { continue; }; let task_name = task.task_name.clone(); @@ -307,10 +317,8 @@ impl TaskService { let task_mgr = task_mgr.clone(); let tenant = tenant.clone(); - let task_dep_infos = task_dep_infos.clone(); - let task_deps = task_deps.clone(); let owner = Self::get_task_owner(&task, &tenant).await?; - task_mgr.execute_accept(&key).await?; + runtime .try_spawn(async move { while task_run.attempt_number >= 0 { @@ -322,21 +330,16 @@ impl TaskService { task_run.completed_at = Some(Utc::now()); task_service.update_or_create_task_run(&task_run).await?; - if let Some(info) = task_dep_infos.get(&task_name) { - let mut guard = task_deps.lock().await; + let mut stream = Box::pin(task_service.check_next_tasks(&task_name)); - for (dep_name, dep_info) in info { - if let Some(after_state) = guard.get_mut(dep_info) { - if after_state.completed_task(&task_name) { - *after_state = AfterTaskState::from(dep_info); - let dep_task = task_mgr.describe_task(dep_name).await?? - .ok_or_else(|| ErrorCode::UnknownTask(dep_name.clone()))?; + while let Some(next_task) = stream.next().await { + let next_task = next_task?; + let dep_task = task_mgr.describe_task(&next_task).await?? + .ok_or_else(|| ErrorCode::UnknownTask(next_task))?; - task_mgr.send(TaskMessage::ExecuteTask(dep_task)).await?; - } - } - } + task_mgr.send(TaskMessage::ExecuteTask(dep_task)).await?; } + task_mgr.execute_accept(&task_key).await?; break; } Err(err) => { @@ -349,6 +352,7 @@ impl TaskService { task_run.run_id = Self::make_run_id(); } } + task_mgr.execute_accept(&task_key).await?; task_mgr.alter_task(&task.task_name, &AlterTaskOptions::Suspend).await??; } @@ -357,53 +361,22 @@ impl TaskService { }, None)?; } TaskMessage::DeleteTask(task_name) => { - if let Some(deps) = task_dep_infos.get(&task_name) { - if !deps.is_empty() { - continue; - // TODO: return delete failed error - } + if let Some(_guard) = fn_lock(&task_service, &task_name).await? { + task_service.clean_task_afters(&task_name).await?; } if let Some(token) = scheduled_tasks.remove(&task_name) { token.cancel(); } } TaskMessage::AfterTask(task) => { + let Some(_guard) = fn_lock(&task_service, &task.task_name).await? else { + continue; + }; match task.status { Status::Suspended => continue, Status::Started => (), } - // after info - if let Some(info) = task_after_infos.remove(&task.task_name) { - // dep info - for after in info.afters.iter() { - if let Some(dep_tasks) = task_dep_infos.get_mut(after) { - dep_tasks.remove(&task.task_name); - } - } - task_deps - .lock() - .await - .remove(&info); - } - if task.after.is_empty() { - continue; - } - let task_name = task.task_name.clone(); - let info = AfterTaskInfo::from(&task); - // after info - task_after_infos.insert(task_name.clone(), info.clone()); - // dep info - for after_task in task.after.iter() { - task_dep_infos - .entry(after_task.clone()) - .or_default() - .insert(task_name.clone(), info.clone()); - - task_deps - .lock() - .await - .insert(info.clone(), AfterTaskState::from(&info)); - } + task_service.update_task_afters(&task).await?; } } } @@ -599,6 +572,75 @@ impl TaskService { Ok(()) } + pub fn check_next_tasks<'a>( + &'a self, + task_name: &'a str, + ) -> impl Stream> + '_ { + stream! { + let check = format!("WITH latest_task_run AS ( \ + SELECT \ + ranked.task_name, \ + ranked.state, \ + ranked.completed_at \ + FROM ( \ + SELECT \ + task_name, \ + state, \ + completed_at, \ + ROW_NUMBER() OVER (PARTITION BY task_name ORDER BY completed_at DESC) AS rn \ + FROM system_task.task_run \ + ) AS ranked \ + WHERE ranked.rn = 1 \ + ) \ + SELECT ta.next_task \ + FROM system_task.task_after ta \ + JOIN system_task.task_after ta2 \ + ON ta.next_task = ta2.next_task \ + LEFT JOIN latest_task_run tr \ + ON ta2.task_name = tr.task_name \ + AND tr.state = 'SUCCEEDED' \ + AND tr.completed_at IS NOT NULL \ + WHERE ta.task_name = '{task_name}' \ + GROUP BY ta.next_task \ + HAVING COUNT(DISTINCT ta2.task_name) = COUNT(DISTINCT tr.task_name);"); + if let Some(next_task) = self.execute_sql(None, &check).await?.first().and_then(|block| block.columns()[0].index(0).and_then(|scalar| { scalar.as_string().map(|s| s.to_string()) })) { + yield Result::Ok(next_task); + } + } + } + + pub async fn clean_task_afters(&self, task_name: &str) -> Result<()> { + self.execute_sql( + None, + &format!( + "DELETE FROM system_task.task_after WHERE next_task = '{}'", + task_name + ), + ) + .await?; + + Ok(()) + } + + pub async fn update_task_afters(&self, task: &Task) -> Result<()> { + self.clean_task_afters(&task.task_name).await?; + let values = task + .after + .iter() + .map(|after| format!("('{}', '{}')", after, task.task_name)) + .join(", "); + self.execute_sql( + None, + &format!( + "INSERT INTO system_task.task_after (task_name, next_task) VALUES {}", + values + ), + ) + .await?; + + Ok(()) + } + fn task_run2insert(task_run: &TaskRun) -> Result { let task = &task_run.task; @@ -950,7 +992,12 @@ impl TaskService { async fn execute_sql(&self, other_user: Option, sql: &str) -> Result> { let context = self.create_context(other_user).await?; - let mut planner = Planner::new(context.clone()); + let mut planner = Planner::new_with_query_executor( + context.clone(), + Arc::new(ServiceQueryExecutor::new(QueryContext::create_from( + context.as_ref(), + ))), + ); let (plan, _) = planner.plan_sql(sql).await?; let executor = InterpreterFactory::get(context.clone(), &plan).await?; let stream = executor.execute(context).await?; diff --git a/src/query/service/src/task/session.rs b/src/query/service/src/task/session.rs index 1b364d619735c..dbed311cb993b 100644 --- a/src/query/service/src/task/session.rs +++ b/src/query/service/src/task/session.rs @@ -42,8 +42,8 @@ pub async fn create_session( restricted_role: Option, ) -> Result> { let session_manager = SessionManager::instance(); - let dummy_session = session_manager.create_session(SessionType::Dummy).await?; - let session = session_manager.register_session(dummy_session)?; + let session = session_manager.create_session(SessionType::MySQL).await?; + let session = session_manager.register_session(session)?; session.set_authed_user(user, restricted_role).await?; Ok(session) } From 5fe7a8bca00357a08491c60c8d897dd68feaf971 Mon Sep 17 00:00:00 2001 From: Kould Date: Fri, 11 Jul 2025 14:17:47 +0800 Subject: [PATCH 05/25] chore: fix meta_store init --- Cargo.lock | 1 - src/query/service/src/global_services.rs | 2 +- src/query/service/src/task/service.rs | 13 +++++++++---- src/query/users/Cargo.toml | 1 - 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 37e6bcb3a3e82..e4de983dbcaa6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4790,7 +4790,6 @@ dependencies = [ "databend-common-meta-store", "databend-common-meta-types", "enumflags2", - "futures", "itertools 0.13.0", "jwt-simple", "log", diff --git a/src/query/service/src/global_services.rs b/src/query/service/src/global_services.rs index a991904ad1483..9fcbfdf41c3d5 100644 --- a/src/query/service/src/global_services.rs +++ b/src/query/service/src/global_services.rs @@ -175,7 +175,7 @@ impl GlobalServices { if config.log.history.on { GlobalHistoryLog::init(config).await?; } - TaskService::init(config)?; + TaskService::init(config).await?; GLOBAL_QUERIES_MANAGER.set_gc_handle(memory_gc_handle); diff --git a/src/query/service/src/task/service.rs b/src/query/service/src/task/service.rs index 22b038578b4c2..e19f79d41189b 100644 --- a/src/query/service/src/task/service.rs +++ b/src/query/service/src/task/service.rs @@ -49,8 +49,8 @@ use databend_common_meta_app::principal::UserIdentity; use databend_common_meta_app::principal::UserInfo; use databend_common_meta_app::principal::WarehouseOptions; use databend_common_meta_app::tenant::Tenant; -use databend_common_meta_client::MetaGrpcClient; use databend_common_meta_kvapi::kvapi::Key; +use databend_common_meta_store::MetaStoreProvider; use databend_common_meta_types::protobuf::WatchRequest; use databend_common_meta_types::protobuf::WatchResponse; use databend_common_meta_types::MetaError; @@ -147,10 +147,15 @@ impl TaskService { self.initialized.store(true, Ordering::SeqCst); } - pub fn init(cfg: &InnerConfig) -> Result<()> { + pub async fn init(cfg: &InnerConfig) -> Result<()> { let tenant = cfg.query.tenant_id.clone(); - let meta_client = MetaGrpcClient::try_new(&cfg.meta.to_meta_grpc_client_conf()) - .map_err(|_e| ErrorCode::Internal("Create MetaClient failed for Task"))?; + let meta_store = MetaStoreProvider::new(cfg.meta.to_meta_grpc_client_conf()) + .create_meta_store() + .await + .map_err(|e| { + ErrorCode::MetaServiceError(format!("Failed to create meta store: {}", e)) + })?; + let meta_client = meta_store.deref().clone(); let meta_handle = TaskMetaHandle::new(meta_client, cfg.query.node_id.clone()); let runtime = Arc::new(Runtime::with_worker_threads( 4, diff --git a/src/query/users/Cargo.toml b/src/query/users/Cargo.toml index 23ece2e843a86..ab8100292b179 100644 --- a/src/query/users/Cargo.toml +++ b/src/query/users/Cargo.toml @@ -27,7 +27,6 @@ databend-common-meta-kvapi = { workspace = true } databend-common-meta-store = { workspace = true } databend-common-meta-types = { workspace = true } enumflags2 = { workspace = true } -futures = { workspace = true } itertools = { workspace = true } jwt-simple = { workspace = true } log = { workspace = true } From b4eb4bd4189dd3afe7a3e80de50d5e0e00023bbf Mon Sep 17 00:00:00 2001 From: Kould Date: Fri, 11 Jul 2025 15:04:36 +0800 Subject: [PATCH 06/25] chore: log error on spawn --- .../service/src/interpreters/task/mod.rs | 5 +- src/query/service/src/task/service.rs | 302 ++++++++++-------- 2 files changed, 177 insertions(+), 130 deletions(-) diff --git a/src/query/service/src/interpreters/task/mod.rs b/src/query/service/src/interpreters/task/mod.rs index 9d18aefbf52f6..3599d664f72d2 100644 --- a/src/query/service/src/interpreters/task/mod.rs +++ b/src/query/service/src/interpreters/task/mod.rs @@ -15,6 +15,7 @@ use std::sync::Arc; use databend_common_cloud_control::task_utils; +use databend_common_config::GlobalConfig; use databend_common_exception::Result; use databend_common_sql::plans::AlterTaskPlan; use databend_common_sql::plans::CreateTaskPlan; @@ -34,9 +35,7 @@ pub(crate) struct TaskInterpreterFactory; impl TaskInterpreterFactory { pub fn build() -> TaskInterpreterImpl { - // TODO: for test - if true { - // if GlobalConfig::instance().query.enable_private_task { + if GlobalConfig::instance().query.enable_private_task { return TaskInterpreterImpl::Private(PrivateTaskInterpreter); } TaskInterpreterImpl::Cloud(CloudTaskInterpreter) diff --git a/src/query/service/src/task/service.rs b/src/query/service/src/task/service.rs index e19f79d41189b..e999a0b6da63e 100644 --- a/src/query/service/src/task/service.rs +++ b/src/query/service/src/task/service.rs @@ -61,6 +61,7 @@ use futures::Stream; use futures_util::stream::BoxStream; use futures_util::TryStreamExt; use itertools::Itertools; +use log::error; use tokio::time::sleep; use tokio_stream::StreamExt; use tokio_util::sync::CancellationToken; @@ -173,73 +174,90 @@ impl TaskService { }; GlobalInstance::set(Arc::new(instance)); - runtime.clone().try_spawn(async move { - let task_service = TaskService::instance(); - loop { - if !task_service.initialized.load(Ordering::SeqCst) { - tokio::time::sleep(Duration::from_secs(1)).await; - } else { - break; + runtime.clone().try_spawn( + async move { + let task_service = TaskService::instance(); + loop { + if !task_service.initialized.load(Ordering::SeqCst) { + tokio::time::sleep(Duration::from_secs(1)).await; + } else { + break; + } } - } - task_service.prepare().await?; + if let Err(err) = task_service.prepare().await { + error!("[PRIVATE-TASKS] prepare failed due to {}", err); + } + if let Err(err) = task_service.work(&tenant, runtime).await { + error!("[PRIVATE-TASKS] prepare failed due to {}", err); + } + }, + None, + )?; + Ok(()) + } - let mut scheduled_tasks: HashMap = HashMap::new(); - let task_mgr = UserApiProvider::instance().task_api(&tenant); + async fn work(&self, tenant: &Tenant, runtime: Arc) -> Result<()> { + let mut scheduled_tasks: HashMap = HashMap::new(); + let task_mgr = UserApiProvider::instance().task_api(tenant); - let mut steam = task_service.subscribe().await?; + let mut steam = self.subscribe().await?; - let fn_lock = async |task_service: &TaskService, key: &str| { - task_service - .meta_handle - .acquire_with_guard(&format!("{}/lock", key), task_service.interval) - .await - }; + let fn_lock = async |task_service: &TaskService, key: &str| { + task_service + .meta_handle + .acquire_with_guard(&format!("{}/lock", key), task_service.interval) + .await + }; - while let Some(result) = steam.next().await { - let (task_key, task_message) = result?; - match task_message { - // ScheduleTask is always monitored by all Query nodes, and ExecuteTask is sent serially to avoid repeated sending. - TaskMessage::ScheduleTask(mut task) => { - debug_assert!(task.schedule_options.is_some()); - if let Some(schedule_options) = &task.schedule_options { - // clean old task if alter - if let Some(token) = scheduled_tasks.remove(&task.task_name) { - token.cancel(); - } - match task.status { - Status::Suspended => continue, - Status::Started => () - } + while let Some(result) = steam.next().await { + let (task_key, task_message) = result?; + match task_message { + // ScheduleTask is always monitored by all Query nodes, and ExecuteTask is sent serially to avoid repeated sending. + TaskMessage::ScheduleTask(mut task) => { + debug_assert!(task.schedule_options.is_some()); + if let Some(schedule_options) = &task.schedule_options { + // clean old task if alter + if let Some(token) = scheduled_tasks.remove(&task.task_name) { + token.cancel(); + } + match task.status { + Status::Suspended => continue, + Status::Started => (), + } - let token = CancellationToken::new(); - let child_token = token.child_token(); - let task_name = task.task_name.to_string(); - let task_name_clone = task_name.clone(); - let task_service = TaskService::instance(); + let token = CancellationToken::new(); + let child_token = token.child_token(); + let task_name = task.task_name.to_string(); + let task_name_clone = task_name.clone(); + let task_service = TaskService::instance(); - task_service.update_or_create_task_run(&TaskRun { + task_service + .update_or_create_task_run(&TaskRun { task: task.clone(), run_id: Self::make_run_id(), - attempt_number: task.suspend_task_after_num_failures.unwrap_or(0) as i32, + attempt_number: task.suspend_task_after_num_failures.unwrap_or(0) + as i32, state: State::Scheduled, scheduled_at: Utc::now(), completed_at: None, error_code: 0, error_message: None, root_task_id: EMPTY_TASK_ID, - }).await?; - - match schedule_options.schedule_type { - ScheduleType::IntervalType => { - let task_mgr = task_mgr.clone(); - let mut duration = Duration::from_secs(schedule_options.interval.unwrap() as u64); - if let Some(ms) = &schedule_options.milliseconds_interval { - duration += Duration::from_millis(*ms); - } + }) + .await?; + + match schedule_options.schedule_type { + ScheduleType::IntervalType => { + let task_mgr = task_mgr.clone(); + let mut duration = + Duration::from_secs(schedule_options.interval.unwrap() as u64); + if let Some(ms) = &schedule_options.milliseconds_interval { + duration += Duration::from_millis(*ms); + } - runtime - .spawn(async move { + runtime + .spawn(async move { + let mut fn_work = async move || { task.next_scheduled_at = Some(Utc::now() + duration); task_mgr.update_task(task.clone()).await??; loop { @@ -255,20 +273,29 @@ impl TaskService { } } } - // TODO: log error Result::Ok(()) - }); - } - ScheduleType::CronType => { - let task_mgr = task_mgr.clone(); - // SAFETY: check on CreateTask - let cron_expr = schedule_options.cron.as_ref().unwrap(); - let tz = schedule_options.time_zone.as_ref().unwrap().parse::().unwrap(); - let schedule = Schedule::from_str(cron_expr).unwrap(); - let task_name = task_name.clone(); - - runtime - .spawn(async move { + }; + if let Err(err) = fn_work().await { + error!("[PRIVATE-TASKS] interval schedule failed due to {}", err); + } + }); + } + ScheduleType::CronType => { + let task_mgr = task_mgr.clone(); + // SAFETY: check on CreateTask + let cron_expr = schedule_options.cron.as_ref().unwrap(); + let tz = schedule_options + .time_zone + .as_ref() + .unwrap() + .parse::() + .unwrap(); + let schedule = Schedule::from_str(cron_expr).unwrap(); + let task_name = task_name.clone(); + + runtime + .spawn(async move { + let mut fn_work = async move || { let upcoming = schedule.upcoming(tz); for next_time in upcoming { @@ -291,58 +318,74 @@ impl TaskService { } } Result::Ok(()) - }); - } + }; + if let Err(err) = fn_work().await { + error!("[PRIVATE-TASKS] cron schedule failed due to {}", err); + } + }); } - let _ = scheduled_tasks.insert(task_name_clone, token); } + let _ = scheduled_tasks.insert(task_name_clone, token); } - TaskMessage::ExecuteTask(task) => { - let Some(_guard) = fn_lock(&task_service, &task.task_name).await? else { - continue; - }; - let task_name = task.task_name.clone(); - let task_service = TaskService::instance(); - - // TODO: Meta control query is executed serially through watch key - - let mut task_run = task_service.lasted_task_run(&task_name).await? - .unwrap_or_else(|| TaskRun { - task: task.clone(), - run_id: Self::make_run_id(), - attempt_number: task.suspend_task_after_num_failures.unwrap_or(0) as i32, - state: State::Executing, - scheduled_at: Utc::now(), - completed_at: None, - error_code: 0, - error_message: None, - root_task_id: EMPTY_TASK_ID, - }); - task_service.update_or_create_task_run(&task_run).await?; - - let task_mgr = task_mgr.clone(); - let tenant = tenant.clone(); - let owner = Self::get_task_owner(&task, &tenant).await?; - - runtime - .try_spawn(async move { + } + TaskMessage::ExecuteTask(task) => { + let Some(_guard) = fn_lock(self, &task.task_name).await? else { + continue; + }; + let task_name = task.task_name.clone(); + let task_service = TaskService::instance(); + + let mut task_run = task_service + .lasted_task_run(&task_name) + .await? + .unwrap_or_else(|| TaskRun { + task: task.clone(), + run_id: Self::make_run_id(), + attempt_number: task.suspend_task_after_num_failures.unwrap_or(0) + as i32, + state: State::Executing, + scheduled_at: Utc::now(), + completed_at: None, + error_code: 0, + error_message: None, + root_task_id: EMPTY_TASK_ID, + }); + task_service.update_or_create_task_run(&task_run).await?; + + let task_mgr = task_mgr.clone(); + let tenant = tenant.clone(); + let owner = Self::get_task_owner(&task, &tenant).await?; + + runtime.try_spawn( + async move { + let mut fn_work = async move || { while task_run.attempt_number >= 0 { - let task_result = Self::spawn_task(task.clone(), owner.clone()).await; + let task_result = + Self::spawn_task(task.clone(), owner.clone()).await; match task_result { Ok(()) => { task_run.state = State::Succeeded; task_run.completed_at = Some(Utc::now()); - task_service.update_or_create_task_run(&task_run).await?; + task_service + .update_or_create_task_run(&task_run) + .await?; - let mut stream = Box::pin(task_service.check_next_tasks(&task_name)); + let mut stream = + Box::pin(task_service.check_next_tasks(&task_name)); while let Some(next_task) = stream.next().await { let next_task = next_task?; - let dep_task = task_mgr.describe_task(&next_task).await?? - .ok_or_else(|| ErrorCode::UnknownTask(next_task))?; - - task_mgr.send(TaskMessage::ExecuteTask(dep_task)).await?; + let dep_task = task_mgr + .describe_task(&next_task) + .await?? + .ok_or_else(|| { + ErrorCode::UnknownTask(next_task) + })?; + + task_mgr + .send(TaskMessage::ExecuteTask(dep_task)) + .await?; } task_mgr.execute_accept(&task_key).await?; break; @@ -353,42 +396,47 @@ impl TaskService { task_run.attempt_number -= 1; task_run.error_code = err.code() as i64; task_run.error_message = Some(err.message()); - task_service.update_or_create_task_run(&task_run).await?; + task_service + .update_or_create_task_run(&task_run) + .await?; task_run.run_id = Self::make_run_id(); } } task_mgr.execute_accept(&task_key).await?; - task_mgr.alter_task(&task.task_name, &AlterTaskOptions::Suspend).await??; + task_mgr + .alter_task(&task.task_name, &AlterTaskOptions::Suspend) + .await??; } - // TODO: log error Result::Ok(()) - }, None)?; + }; + if let Err(err) = fn_work().await { + error!("[PRIVATE-TASKS] execute failed due to {}", err); + } + }, + None, + )?; + } + TaskMessage::DeleteTask(task_name) => { + if let Some(_guard) = fn_lock(self, &task_name).await? { + self.clean_task_afters(&task_name).await?; } - TaskMessage::DeleteTask(task_name) => { - if let Some(_guard) = fn_lock(&task_service, &task_name).await? { - task_service.clean_task_afters(&task_name).await?; - } - if let Some(token) = scheduled_tasks.remove(&task_name) { - token.cancel(); - } + if let Some(token) = scheduled_tasks.remove(&task_name) { + token.cancel(); } - TaskMessage::AfterTask(task) => { - let Some(_guard) = fn_lock(&task_service, &task.task_name).await? else { - continue; - }; - match task.status { - Status::Suspended => continue, - Status::Started => (), - } - task_service.update_task_afters(&task).await?; + } + TaskMessage::AfterTask(task) => { + let Some(_guard) = fn_lock(self, &task.task_name).await? else { + continue; + }; + match task.status { + Status::Suspended => continue, + Status::Started => (), } + self.update_task_afters(&task).await?; } } - - // TODO: log error - Result::Ok(()) - }, None)?; + } Ok(()) } From d9b45763eea779e72ceae59e0530643cce656c0c Mon Sep 17 00:00:00 2001 From: Kould Date: Mon, 14 Jul 2025 17:33:55 +0800 Subject: [PATCH 07/25] test: add test for private task --- .github/actions/test_tasks/action.yml | 39 +++++ .github/workflows/reuse.linux.yml | 18 +++ src/binaries/query/entry.rs | 4 +- src/query/config/src/config.rs | 45 ++++-- src/query/config/src/inner.rs | 12 +- src/query/config/src/mask.rs | 1 + src/query/management/src/task/task_mgr.rs | 8 +- src/query/service/src/global_services.rs | 4 +- .../service/src/interpreters/task/mod.rs | 2 +- src/query/service/src/task/service.rs | 91 ++++++----- tests/task/private_task.toml | 2 + tests/task/test-private-task.sh | 149 ++++++++++++++++++ 12 files changed, 313 insertions(+), 62 deletions(-) create mode 100644 .github/actions/test_tasks/action.yml create mode 100644 tests/task/private_task.toml create mode 100644 tests/task/test-private-task.sh diff --git a/.github/actions/test_tasks/action.yml b/.github/actions/test_tasks/action.yml new file mode 100644 index 0000000000000..9084b6cd1ef40 --- /dev/null +++ b/.github/actions/test_tasks/action.yml @@ -0,0 +1,39 @@ +name: "Test task for databend query" +description: "Test task for databend query" +runs: + using: "composite" + steps: + - uses: ./.github/actions/setup_test + + - name: Install lsof + shell: bash + run: sudo apt-get update -yq && sudo apt-get install -yq lsof + + - name: Minio Setup for (ubuntu-latest only) + shell: bash + run: | + docker run -d --network host --name minio \ + -e "MINIO_ACCESS_KEY=minioadmin" \ + -e "MINIO_SECRET_KEY=minioadmin" \ + -e "MINIO_ADDRESS=:9900" \ + -v /tmp/data:/data \ + -v /tmp/config:/root/.minio \ + minio/minio server /data + + export AWS_ACCESS_KEY_ID=minioadmin + export AWS_SECRET_ACCESS_KEY=minioadmin + export AWS_EC2_METADATA_DISABLED=true + + aws --endpoint-url http://127.0.0.1:9900/ s3 mb s3://testbucket + aws --endpoint-url http://127.0.0.1:9900/ s3 cp tests/data s3://testbucket/data --recursive --no-progress + + - name: Run Task Tests + shell: bash + run: | + bash ./tests/task/test-private-task.sh + + - name: Upload failure + if: failure() + uses: ./.github/actions/artifact_failure + with: + name: test-tasks diff --git a/.github/workflows/reuse.linux.yml b/.github/workflows/reuse.linux.yml index 6b6ffc0e65069..5d7f97fee0282 100644 --- a/.github/workflows/reuse.linux.yml +++ b/.github/workflows/reuse.linux.yml @@ -183,6 +183,24 @@ jobs: - uses: ./.github/actions/test_logs timeout-minutes: 20 + test_tasks: + needs: [ build, check ] + runs-on: + - self-hosted + - X64 + - Linux + - 2c8g + - "${{ inputs.runner_provider }}" + - "${{ inputs.runner_capacity }}" + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/setup_license + with: + runner_provider: ${{ inputs.runner_provider }} + type: ${{ inputs.license_type }} + - uses: ./.github/actions/test_tasks + timeout-minutes: 20 + test_meta_cluster: needs: [build, check] runs-on: diff --git a/src/binaries/query/entry.rs b/src/binaries/query/entry.rs index fec1cfdde5559..e2a7f56927ff6 100644 --- a/src/binaries/query/entry.rs +++ b/src/binaries/query/entry.rs @@ -303,7 +303,9 @@ pub async fn start_services(conf: &InnerConfig) -> Result<(), MainError> { } println!(" system history tables: {}", conf.log.history); } - TaskService::instance().initialized(); + if conf.task.on { + TaskService::instance().initialized(); + } println!(); println!( diff --git a/src/query/config/src/config.rs b/src/query/config/src/config.rs index cab5f61b368fe..06178f91cc207 100644 --- a/src/query/config/src/config.rs +++ b/src/query/config/src/config.rs @@ -72,6 +72,7 @@ use super::inner::InnerConfig; use super::inner::LocalConfig as InnerLocalConfig; use super::inner::MetaConfig as InnerMetaConfig; use super::inner::QueryConfig as InnerQueryConfig; +use super::inner::TaskConfig as InnerTaskConfig; use crate::builtin::BuiltInConfig; use crate::builtin::UDFConfig; use crate::builtin::UserConfig; @@ -114,6 +115,9 @@ pub struct Config { #[clap(flatten)] pub log: LogConfig, + #[clap(flatten)] + pub task: TaskConfig, + // Meta Service config. #[clap(flatten)] pub meta: MetaConfig, @@ -1937,12 +1941,6 @@ pub struct QueryConfig { #[clap(long, value_name = "VALUE", default_value = "50")] pub max_cached_queries_profiles: usize, - #[clap(long, value_name = "VALUE", default_value = "false")] - pub enable_private_task: bool, - - #[clap(long, value_name = "VALUE", default_value = "1024")] - pub tasks_channel_len: usize, - /// A list of network that not to be checked by network policy. #[clap(long, value_name = "VALUE")] pub network_policy_whitelist: Vec, @@ -2049,8 +2047,6 @@ impl TryInto for QueryConfig { cloud_control_grpc_server_address: self.cloud_control_grpc_server_address, cloud_control_grpc_timeout: self.cloud_control_grpc_timeout, max_cached_queries_profiles: self.max_cached_queries_profiles, - enable_private_task: self.enable_private_task, - tasks_channel_len: self.tasks_channel_len, network_policy_whitelist: self.network_policy_whitelist, settings: self .settings @@ -2164,8 +2160,6 @@ impl From for QueryConfig { cloud_control_grpc_server_address: inner.cloud_control_grpc_server_address, cloud_control_grpc_timeout: inner.cloud_control_grpc_timeout, max_cached_queries_profiles: inner.max_cached_queries_profiles, - enable_private_task: inner.enable_private_task, - tasks_channel_len: inner.tasks_channel_len, network_policy_whitelist: inner.network_policy_whitelist, settings: HashMap::new(), resources_management: None, @@ -2353,6 +2347,34 @@ impl From for LogConfig { } } +#[derive(Clone, Debug, PartialEq, Eq, Default, Serialize, Deserialize, Args)] +#[serde(default)] +pub struct TaskConfig { + #[clap( + long = "private-task-on", value_name = "VALUE", default_value = "false", action = ArgAction::Set, num_args = 0..=1, require_equals = true, default_missing_value = "true" + )] + #[serde(rename = "on")] + pub private_task_on: bool, +} + +impl TryInto for TaskConfig { + type Error = ErrorCode; + + fn try_into(self) -> Result { + Ok(InnerTaskConfig { + on: self.private_task_on, + }) + } +} + +impl From for TaskConfig { + fn from(inner: InnerTaskConfig) -> Self { + TaskConfig { + private_task_on: inner.on, + } + } +} + #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Args)] #[serde(default)] pub struct FileLogConfig { @@ -3565,6 +3587,7 @@ mod cache_config_converters { config_file: inner.config_file, query: inner.query.into(), log: inner.log.into(), + task: inner.task.into(), meta: inner.meta.into(), storage: inner.storage.into(), catalog: HiveCatalogConfig::default(), @@ -3589,6 +3612,7 @@ mod cache_config_converters { config_file, query, log, + task, meta, storage, catalog, @@ -3619,6 +3643,7 @@ mod cache_config_converters { config_file, query: query.try_into()?, log: log.try_into()?, + task: task.try_into()?, meta: meta.try_into()?, storage: storage.try_into()?, catalogs, diff --git a/src/query/config/src/inner.rs b/src/query/config/src/inner.rs index 2578de5e54a33..bada25c04b482 100644 --- a/src/query/config/src/inner.rs +++ b/src/query/config/src/inner.rs @@ -53,6 +53,8 @@ pub struct InnerConfig { pub log: LogConfig, + pub task: TaskConfig, + // Meta Service config. pub meta: MetaConfig, @@ -252,9 +254,6 @@ pub struct QueryConfig { pub cloud_control_grpc_timeout: u64, pub max_cached_queries_profiles: usize, - pub enable_private_task: bool, - pub tasks_channel_len: usize, - pub network_policy_whitelist: Vec, pub settings: HashMap, @@ -345,8 +344,6 @@ impl Default for QueryConfig { cloud_control_grpc_timeout: 0, data_retention_time_in_days_max: 90, max_cached_queries_profiles: 50, - enable_private_task: false, - tasks_channel_len: 1024, network_policy_whitelist: Vec::new(), settings: HashMap::new(), resources_management: None, @@ -505,6 +502,11 @@ pub enum CatalogConfig { Hive(CatalogHiveConfig), } +#[derive(Clone, Debug, PartialEq, Eq, Default)] +pub struct TaskConfig { + pub on: bool, +} + // TODO: add compat protocol support #[derive(Clone, Debug, PartialEq, Eq)] pub enum ThriftProtocol { diff --git a/src/query/config/src/mask.rs b/src/query/config/src/mask.rs index 85fcb1dcec3b0..e9747e3ac3dea 100644 --- a/src/query/config/src/mask.rs +++ b/src/query/config/src/mask.rs @@ -48,6 +48,7 @@ impl Config { config_file: self.config_file, query: self.query.mask_display(), log: self.log, + task: self.task, meta: self.meta.mask_display(), storage: self.storage.mask_display(), catalog: self.catalog, diff --git a/src/query/management/src/task/task_mgr.rs b/src/query/management/src/task/task_mgr.rs index ddb74c1de1ed2..5c08e089631b3 100644 --- a/src/query/management/src/task/task_mgr.rs +++ b/src/query/management/src/task/task_mgr.rs @@ -20,7 +20,6 @@ use chrono_tz::Tz; use cron::Schedule; use databend_common_ast::ast::AlterTaskOptions; use databend_common_ast::ast::ScheduleOptions; -use databend_common_meta_api::kv_pb_api::errors::PbApiReadError; use databend_common_meta_api::kv_pb_api::KVPbApi; use databend_common_meta_api::kv_pb_api::UpsertPB; use databend_common_meta_app::principal::task; @@ -34,7 +33,6 @@ use databend_common_meta_app::schema::CreateOption; use databend_common_meta_app::tenant::Tenant; use databend_common_meta_kvapi::kvapi; use databend_common_meta_kvapi::kvapi::DirName; -use databend_common_meta_kvapi::kvapi::Key; use databend_common_meta_types::MatchSeq; use databend_common_meta_types::MetaError; use databend_common_meta_types::With; @@ -257,10 +255,8 @@ impl TaskMgr { /// mark the corresponding execute task as accepted and delete it from the queue #[async_backtrace::framed] #[fastrace::trace] - pub async fn execute_accept(&self, key: &str) -> Result<(), MetaError> { - let key = TaskMessageIdent::from_str_key(key).map_err(PbApiReadError::from)?; - - let req = UpsertPB::delete(key).with(MatchSeq::GE(1)); + pub async fn execute_accept(&self, key: &TaskMessageIdent) -> Result<(), MetaError> { + let req = UpsertPB::delete(key.clone()).with(MatchSeq::GE(1)); let _ = self.kv_api.upsert_pb(&req).await?; Ok(()) diff --git a/src/query/service/src/global_services.rs b/src/query/service/src/global_services.rs index 9fcbfdf41c3d5..8243b630aa357 100644 --- a/src/query/service/src/global_services.rs +++ b/src/query/service/src/global_services.rs @@ -175,7 +175,9 @@ impl GlobalServices { if config.log.history.on { GlobalHistoryLog::init(config).await?; } - TaskService::init(config).await?; + if config.task.on { + TaskService::init(config).await?; + } GLOBAL_QUERIES_MANAGER.set_gc_handle(memory_gc_handle); diff --git a/src/query/service/src/interpreters/task/mod.rs b/src/query/service/src/interpreters/task/mod.rs index 3599d664f72d2..0f80032ba71f8 100644 --- a/src/query/service/src/interpreters/task/mod.rs +++ b/src/query/service/src/interpreters/task/mod.rs @@ -35,7 +35,7 @@ pub(crate) struct TaskInterpreterFactory; impl TaskInterpreterFactory { pub fn build() -> TaskInterpreterImpl { - if GlobalConfig::instance().query.enable_private_task { + if GlobalConfig::instance().task.on { return TaskInterpreterImpl::Private(PrivateTaskInterpreter); } TaskInterpreterImpl::Cloud(CloudTaskInterpreter) diff --git a/src/query/service/src/task/service.rs b/src/query/service/src/task/service.rs index e999a0b6da63e..800afab407ea5 100644 --- a/src/query/service/src/task/service.rs +++ b/src/query/service/src/task/service.rs @@ -202,7 +202,7 @@ impl TaskService { let mut steam = self.subscribe().await?; - let fn_lock = async |task_service: &TaskService, key: &str| { + let fn_lock = async |task_service: &TaskService, key: &TaskMessageIdent| { task_service .meta_handle .acquire_with_guard(&format!("{}/lock", key), task_service.interval) @@ -211,6 +211,7 @@ impl TaskService { while let Some(result) = steam.next().await { let (task_key, task_message) = result?; + let task_key = TaskMessageIdent::new(tenant, task_key); match task_message { // ScheduleTask is always monitored by all Query nodes, and ExecuteTask is sent serially to avoid repeated sending. TaskMessage::ScheduleTask(mut task) => { @@ -263,7 +264,7 @@ impl TaskService { loop { tokio::select! { _ = sleep(duration) => { - let Some(_guard) = fn_lock(&task_service, &task_name).await? else { + let Some(_guard) = fn_lock(&task_service, &task_key).await? else { continue; }; task_mgr.send(TaskMessage::ExecuteTask(task.clone())).await?; @@ -291,7 +292,6 @@ impl TaskService { .parse::() .unwrap(); let schedule = Schedule::from_str(cron_expr).unwrap(); - let task_name = task_name.clone(); runtime .spawn(async move { @@ -307,7 +307,7 @@ impl TaskService { task.next_scheduled_at = Some(Utc::now() + duration); tokio::select! { _ = sleep(duration) => { - let Some(_guard) = fn_lock(&task_service, &task_name).await? else { + let Some(_guard) = fn_lock(&task_service, &task_key).await? else { continue; }; task_mgr.send(TaskMessage::ExecuteTask(task.clone())).await?; @@ -329,7 +329,7 @@ impl TaskService { } } TaskMessage::ExecuteTask(task) => { - let Some(_guard) = fn_lock(self, &task.task_name).await? else { + let Some(_guard) = fn_lock(self, &task_key).await? else { continue; }; let task_name = task.task_name.clone(); @@ -376,16 +376,17 @@ impl TaskService { while let Some(next_task) = stream.next().await { let next_task = next_task?; - let dep_task = task_mgr + let next_task = task_mgr .describe_task(&next_task) .await?? .ok_or_else(|| { ErrorCode::UnknownTask(next_task) })?; - - task_mgr - .send(TaskMessage::ExecuteTask(dep_task)) - .await?; + if let Some(_guard) = fn_lock(&TaskService::instance(), &TaskMessageIdent::new(tenant.clone(), format!("check_{}", task_name))).await? { + task_mgr + .send(TaskMessage::ExecuteTask(next_task)) + .await?; + } } task_mgr.execute_accept(&task_key).await?; break; @@ -418,7 +419,7 @@ impl TaskService { )?; } TaskMessage::DeleteTask(task_name) => { - if let Some(_guard) = fn_lock(self, &task_name).await? { + if let Some(_guard) = fn_lock(self, &task_key).await? { self.clean_task_afters(&task_name).await?; } if let Some(token) = scheduled_tasks.remove(&task_name) { @@ -426,7 +427,7 @@ impl TaskService { } } TaskMessage::AfterTask(task) => { - let Some(_guard) = fn_lock(self, &task.task_name).await? else { + let Some(_guard) = fn_lock(self, &task_key).await? else { continue; }; match task.status { @@ -630,32 +631,46 @@ impl TaskService { task_name: &'a str, ) -> impl Stream> + '_ { stream! { - let check = format!("WITH latest_task_run AS ( \ - SELECT \ - ranked.task_name, \ - ranked.state, \ - ranked.completed_at \ - FROM ( \ - SELECT \ - task_name, \ - state, \ - completed_at, \ - ROW_NUMBER() OVER (PARTITION BY task_name ORDER BY completed_at DESC) AS rn \ - FROM system_task.task_run \ - ) AS ranked \ - WHERE ranked.rn = 1 \ - ) \ - SELECT ta.next_task \ - FROM system_task.task_after ta \ - JOIN system_task.task_after ta2 \ - ON ta.next_task = ta2.next_task \ - LEFT JOIN latest_task_run tr \ - ON ta2.task_name = tr.task_name \ - AND tr.state = 'SUCCEEDED' \ - AND tr.completed_at IS NOT NULL \ - WHERE ta.task_name = '{task_name}' \ - GROUP BY ta.next_task \ - HAVING COUNT(DISTINCT ta2.task_name) = COUNT(DISTINCT tr.task_name);"); + let check = format!(" + WITH latest_task_run AS ( + SELECT + task_name, + state, + completed_at + FROM ( + SELECT + task_name, + state, + completed_at, + ROW_NUMBER() OVER (PARTITION BY task_name ORDER BY completed_at DESC) AS rn + FROM system_task.task_run + ) ranked + WHERE rn = 1 +), +next_task_time AS ( + SELECT + task_name AS next_task, + completed_at + FROM latest_task_run +) +SELECT DISTINCT ta.next_task +FROM system_task.task_after ta +WHERE ta.task_name = '{task_name}' + AND NOT EXISTS ( + SELECT 1 + FROM system_task.task_after ta_dep + LEFT JOIN latest_task_run tr + ON ta_dep.task_name = tr.task_name + LEFT JOIN next_task_time nt + ON ta_dep.next_task = nt.next_task + WHERE ta_dep.next_task = ta.next_task + AND ( + tr.task_name IS NULL + OR tr.state != 'SUCCEEDED' + OR tr.completed_at IS NULL + OR (nt.completed_at IS NOT NULL AND tr.completed_at <= nt.completed_at) + ) + );"); if let Some(next_task) = self.execute_sql(None, &check).await?.first().and_then(|block| block.columns()[0].index(0).and_then(|scalar| { scalar.as_string().map(|s| s.to_string()) })) { yield Result::Ok(next_task); } diff --git a/tests/task/private_task.toml b/tests/task/private_task.toml new file mode 100644 index 0000000000000..6cee0d80ac5b8 --- /dev/null +++ b/tests/task/private_task.toml @@ -0,0 +1,2 @@ +[task] +on = true \ No newline at end of file diff --git a/tests/task/test-private-task.sh b/tests/task/test-private-task.sh new file mode 100644 index 0000000000000..a2db185529645 --- /dev/null +++ b/tests/task/test-private-task.sh @@ -0,0 +1,149 @@ +#!/bin/bash + +set -e + +BUILD_PROFILE="${BUILD_PROFILE:-debug}" +SCRIPT_PATH="$(cd "$(dirname "$0")" >/dev/null 2>&1 && pwd)" +cd "$SCRIPT_PATH/../../" || exit + +echo "Cleaning up previous runs" + +killall -9 databend-query || true +killall -9 databend-meta || true +rm -rf .databend + +echo "Starting Databend Query cluster with 2 nodes enable private task" + +for node in 1 2; do + CONFIG_FILE="./scripts/ci/deploy/config/databend-query-node-${node}.toml" + + echo "Appending history table config to node-${node}" + cat ./tests/task/private_task.toml >> "$CONFIG_FILE" +done + +# Start meta cluster (3 nodes - needed for HA) +echo 'Start Meta service HA cluster(3 nodes)...' + +mkdir -p ./.databend/ + +nohup ./target/${BUILD_PROFILE}/databend-meta -c scripts/ci/deploy/config/databend-meta-node-1.toml >./.databend/meta-1.out 2>&1 & +python3 scripts/ci/wait_tcp.py --timeout 30 --port 9191 + +sleep 1 + +nohup ./target/${BUILD_PROFILE}/databend-meta -c scripts/ci/deploy/config/databend-meta-node-2.toml >./.databend/meta-2.out 2>&1 & +python3 scripts/ci/wait_tcp.py --timeout 30 --port 28202 + +sleep 1 + +nohup ./target/${BUILD_PROFILE}/databend-meta -c scripts/ci/deploy/config/databend-meta-node-3.toml >./.databend/meta-3.out 2>&1 & +python3 scripts/ci/wait_tcp.py --timeout 30 --port 28302 + +sleep 1 + +# Start only 2 query nodes +echo 'Start databend-query node-1' +nohup env RUST_BACKTRACE=1 target/${BUILD_PROFILE}/databend-query -c scripts/ci/deploy/config/databend-query-node-1.toml --internal-enable-sandbox-tenant >./.databend/query-1.out 2>&1 & + +echo "Waiting on node-1..." +python3 scripts/ci/wait_tcp.py --timeout 30 --port 9091 + +echo 'Start databend-query node-2' +env "RUST_BACKTRACE=1" nohup target/${BUILD_PROFILE}/databend-query -c scripts/ci/deploy/config/databend-query-node-2.toml --internal-enable-sandbox-tenant >./.databend/query-2.out 2>&1 & + +echo "Waiting on node-2..." +python3 scripts/ci/wait_tcp.py --timeout 30 --port 9092 + +echo "Started 2-node cluster with private task enabled..." + +response=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"CREATE TABLE t1 (c1 int)\"}") +create_table_query_id=$(echo $response | jq -r '.id') +echo "Create Table Query ID: $create_table_query_id" + +response1=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"CREATE TASK my_task_1 WAREHOUSE = 'mywh' SCHEDULE = 1 SECOND AS insert into t1 values(0)\"}") +create_task_1_query_id=$(echo $response1 | jq -r '.id') +echo "Create Task 1 Query ID: $create_task_1_query_id" + +response2=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"CREATE TASK my_task_2 WAREHOUSE = 'mywh' SCHEDULE = 5 SECOND AS insert into t1 values(1)\"}") +create_task_2_query_id=$(echo $response2 | jq -r '.id') +echo "Create Task 2 ID: $create_task_2_query_id" + +response3=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"CREATE TASK my_task_3 WAREHOUSE = 'mywh' AFTER 'my_task_1', 'my_task_2' AS insert into t1 values(2)\"}") +create_task_3_query_id=$(echo $response3 | jq -r '.id') +echo "Create Task 3 ID: $create_task_3_query_id" + +sleep 1 + +response4=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"ALTER TASK my_task_3 RESUME\"}") +alter_task_3_query_id=$(echo $response4 | jq -r '.id') +echo "Resume Task 3 ID: $alter_task_3_query_id" + +sleep 1 + +# Check Task Afters 1 + +response5=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"EXECUTE TASK my_task_1\"}") +execute_task_1_query_id=$(echo $response5 | jq -r '.id') +echo "Execute Task 1 ID: $execute_task_1_query_id" + +response6=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"EXECUTE TASK my_task_2\"}") +execute_task_2_query_id=$(echo $response6 | jq -r '.id') +echo "Execute Task 2 ID: $execute_task_2_query_id" + +sleep 5 + +response7=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"SELECT c1 FROM t1 ORDER BY c1\"}") + +actual=$(echo "$response7" | jq -c '.data') +expected='[["0"],["1"],["2"]]' + +if [ "$actual" = "$expected" ]; then + echo "✅ Query result matches expected" +else + echo "❌ Mismatch" + echo "Expected: $expected" + echo "Actual : $actual" + exit 1 +fi + +# Check Task Afters With Schedule Root + +response5=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"EXECUTE TASK my_task_1\"}") +execute_task_1_query_id=$(echo $response5 | jq -r '.id') +echo "Execute Task 1 ID: $execute_task_1_query_id" + +sleep 5 + +response7=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"SELECT c1 FROM t1 ORDER BY c1\"}") + +actual=$(echo "$response7" | jq -c '.data') +expected='[["0"],["0"],["1"],["2"]]' + +if [ "$actual" = "$expected" ]; then + echo "✅ Query result matches expected" +else + echo "❌ Mismatch" + echo "Expected: $expected" + echo "Actual : $actual" + exit 1 +fi + +response8=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"ALTER TASK my_task_2 RESUME\"}") +alter_task_2_query_id=$(echo $response8 | jq -r '.id') +echo "Resume Task 2 ID: $alter_task_2_query_id" + +sleep 10 + +response9=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"SELECT c1 FROM t1 ORDER BY c1\"}") + +actual=$(echo "$response9" | jq -c '.data') +expected='[["0"],["0"],["1"],["1"],["2"],["2"]]' + +if [ "$actual" = "$expected" ]; then + echo "✅ Query result matches expected" +else + echo "❌ Mismatch" + echo "Expected: $expected" + echo "Actual : $actual" + exit 1 +fi \ No newline at end of file From 51bcf06b06758b5d4afa9211eb321b472bfc06d1 Mon Sep 17 00:00:00 2001 From: Kould Date: Mon, 14 Jul 2025 17:58:43 +0800 Subject: [PATCH 08/25] chore: add license for private task --- src/common/license/src/license.rs | 9 +++++++++ .../src/interpreters/interpreter_task_alter.rs | 2 +- .../src/interpreters/interpreter_task_create.rs | 2 +- .../src/interpreters/interpreter_task_describe.rs | 2 +- .../src/interpreters/interpreter_task_drop.rs | 2 +- .../src/interpreters/interpreter_task_execute.rs | 2 +- .../src/interpreters/interpreter_tasks_show.rs | 2 +- src/query/service/src/interpreters/task/mod.rs | 13 ++++++++++--- src/query/service/src/task/service.rs | 10 +++++++++- 9 files changed, 34 insertions(+), 10 deletions(-) diff --git a/src/common/license/src/license.rs b/src/common/license/src/license.rs index 95d46156a74ee..dd744874d2d22 100644 --- a/src/common/license/src/license.rs +++ b/src/common/license/src/license.rs @@ -83,6 +83,8 @@ pub enum Feature { SystemHistory, #[serde(alias = "vector_index", alias = "VECTOR_INDEX")] VectorIndex, + #[serde(alias = "private_task", alias = "PRIVATE_TASK")] + PrivateTask, #[serde(other)] Unknown, } @@ -134,6 +136,7 @@ impl fmt::Display for Feature { Feature::WorkloadGroup => write!(f, "workload_group"), Feature::SystemHistory => write!(f, "system_history"), Feature::VectorIndex => write!(f, "vector_index"), + Feature::PrivateTask => write!(f, "private_task"), Feature::Unknown => write!(f, "unknown"), } } @@ -372,6 +375,11 @@ mod tests { serde_json::from_str::("\"VectorIndex\"").unwrap() ); + assert_eq!( + Feature::PrivateTask, + serde_json::from_str::("\"private_task\"").unwrap() + ); + assert_eq!( Feature::Unknown, serde_json::from_str::("\"ssss\"").unwrap() @@ -408,6 +416,7 @@ mod tests { Feature::NgramIndex, Feature::WorkloadGroup, Feature::SystemHistory, + Feature::PrivateTask, ]), }; diff --git a/src/query/service/src/interpreters/interpreter_task_alter.rs b/src/query/service/src/interpreters/interpreter_task_alter.rs index 71eabd5a6261d..fa9fdf0f79cca 100644 --- a/src/query/service/src/interpreters/interpreter_task_alter.rs +++ b/src/query/service/src/interpreters/interpreter_task_alter.rs @@ -50,7 +50,7 @@ impl Interpreter for AlterTaskInterpreter { #[fastrace::trace] #[async_backtrace::framed] async fn execute2(&self) -> Result { - TaskInterpreterFactory::build() + TaskInterpreterFactory::build(&self.ctx)? .alter_task(&self.ctx, &self.plan) .await?; diff --git a/src/query/service/src/interpreters/interpreter_task_create.rs b/src/query/service/src/interpreters/interpreter_task_create.rs index 88bd453bc0967..ca9408afd942b 100644 --- a/src/query/service/src/interpreters/interpreter_task_create.rs +++ b/src/query/service/src/interpreters/interpreter_task_create.rs @@ -48,7 +48,7 @@ impl Interpreter for CreateTaskInterpreter { #[fastrace::trace] #[async_backtrace::framed] async fn execute2(&self) -> Result { - TaskInterpreterFactory::build() + TaskInterpreterFactory::build(&self.ctx)? .create_task(&self.ctx, &self.plan) .await?; diff --git a/src/query/service/src/interpreters/interpreter_task_describe.rs b/src/query/service/src/interpreters/interpreter_task_describe.rs index e0814a0486495..3e40cc297cc44 100644 --- a/src/query/service/src/interpreters/interpreter_task_describe.rs +++ b/src/query/service/src/interpreters/interpreter_task_describe.rs @@ -49,7 +49,7 @@ impl Interpreter for DescribeTaskInterpreter { #[fastrace::trace] #[async_backtrace::framed] async fn execute2(&self) -> Result { - let Some(task) = TaskInterpreterFactory::build() + let Some(task) = TaskInterpreterFactory::build(&self.ctx)? .describe_task(&self.ctx, &self.plan) .await? else { diff --git a/src/query/service/src/interpreters/interpreter_task_drop.rs b/src/query/service/src/interpreters/interpreter_task_drop.rs index ca4f5554ae4aa..e4beca7f11d07 100644 --- a/src/query/service/src/interpreters/interpreter_task_drop.rs +++ b/src/query/service/src/interpreters/interpreter_task_drop.rs @@ -48,7 +48,7 @@ impl Interpreter for DropTaskInterpreter { #[fastrace::trace] #[async_backtrace::framed] async fn execute2(&self) -> Result { - TaskInterpreterFactory::build() + TaskInterpreterFactory::build(&self.ctx)? .drop_task(&self.ctx, &self.plan) .await?; diff --git a/src/query/service/src/interpreters/interpreter_task_execute.rs b/src/query/service/src/interpreters/interpreter_task_execute.rs index 23ccc1a33bb41..37017b7b548f1 100644 --- a/src/query/service/src/interpreters/interpreter_task_execute.rs +++ b/src/query/service/src/interpreters/interpreter_task_execute.rs @@ -48,7 +48,7 @@ impl Interpreter for ExecuteTaskInterpreter { #[fastrace::trace] #[async_backtrace::framed] async fn execute2(&self) -> Result { - TaskInterpreterFactory::build() + TaskInterpreterFactory::build(&self.ctx)? .execute_task(&self.ctx, &self.plan) .await?; diff --git a/src/query/service/src/interpreters/interpreter_tasks_show.rs b/src/query/service/src/interpreters/interpreter_tasks_show.rs index 62e58ee3badc6..d6f5a6231c764 100644 --- a/src/query/service/src/interpreters/interpreter_tasks_show.rs +++ b/src/query/service/src/interpreters/interpreter_tasks_show.rs @@ -51,7 +51,7 @@ impl Interpreter for ShowTasksInterpreter { #[fastrace::trace] #[async_backtrace::framed] async fn execute2(&self) -> Result { - let tasks = TaskInterpreterFactory::build() + let tasks = TaskInterpreterFactory::build(&self.ctx)? .show_tasks(&self.ctx, &self.plan) .await?; diff --git a/src/query/service/src/interpreters/task/mod.rs b/src/query/service/src/interpreters/task/mod.rs index 0f80032ba71f8..db37b9f72b8d0 100644 --- a/src/query/service/src/interpreters/task/mod.rs +++ b/src/query/service/src/interpreters/task/mod.rs @@ -14,9 +14,12 @@ use std::sync::Arc; +use databend_common_catalog::table_context::TableContext; use databend_common_cloud_control::task_utils; use databend_common_config::GlobalConfig; use databend_common_exception::Result; +use databend_common_license::license::Feature; +use databend_common_license::license_manager::LicenseManagerSwitch; use databend_common_sql::plans::AlterTaskPlan; use databend_common_sql::plans::CreateTaskPlan; use databend_common_sql::plans::DescribeTaskPlan; @@ -34,11 +37,15 @@ mod private; pub(crate) struct TaskInterpreterFactory; impl TaskInterpreterFactory { - pub fn build() -> TaskInterpreterImpl { + pub fn build(ctx: &QueryContext) -> Result { if GlobalConfig::instance().task.on { - return TaskInterpreterImpl::Private(PrivateTaskInterpreter); + LicenseManagerSwitch::instance().check_enterprise_enabled( + ctx.get_settings().get_enterprise_license(), + Feature::PrivateTask, + )?; + return Ok(TaskInterpreterImpl::Private(PrivateTaskInterpreter)); } - TaskInterpreterImpl::Cloud(CloudTaskInterpreter) + Ok(TaskInterpreterImpl::Cloud(CloudTaskInterpreter)) } } diff --git a/src/query/service/src/task/service.rs b/src/query/service/src/task/service.rs index 800afab407ea5..2cc17e6779491 100644 --- a/src/query/service/src/task/service.rs +++ b/src/query/service/src/task/service.rs @@ -382,7 +382,15 @@ impl TaskService { .ok_or_else(|| { ErrorCode::UnknownTask(next_task) })?; - if let Some(_guard) = fn_lock(&TaskService::instance(), &TaskMessageIdent::new(tenant.clone(), format!("check_{}", task_name))).await? { + if let Some(_guard) = fn_lock( + &TaskService::instance(), + &TaskMessageIdent::new( + tenant.clone(), + format!("check_{}", task_name), + ), + ) + .await? + { task_mgr .send(TaskMessage::ExecuteTask(next_task)) .await?; From cc8056466a8610ea6b30a4894da615ff7acbc07e Mon Sep 17 00:00:00 2001 From: Kould Date: Mon, 14 Jul 2025 18:16:22 +0800 Subject: [PATCH 09/25] chore: fix test_display_license_info --- src/common/license/src/license.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/license/src/license.rs b/src/common/license/src/license.rs index dd744874d2d22..6eb748a6416ac 100644 --- a/src/common/license/src/license.rs +++ b/src/common/license/src/license.rs @@ -421,7 +421,7 @@ mod tests { }; assert_eq!( - "LicenseInfo{ type: enterprise, org: databend, tenants: [databend_tenant,foo], features: [aggregate_index,amend_table,attach_table,compute_quota(threads_num: 1, memory_usage: 1),computed_column,data_mask,hilbert_clustering,inverted_index,license_info,ngram_index,storage_encryption,storage_quota(storage_usage: 1),stream,system_history,vacuum,virtual_column,workload_group] }", + "LicenseInfo{ type: enterprise, org: databend, tenants: [databend_tenant,foo], features: [aggregate_index,amend_table,attach_table,compute_quota(threads_num: 1, memory_usage: 1),computed_column,data_mask,hilbert_clustering,inverted_index,license_info,ngram_index,private_task,storage_encryption,storage_quota(storage_usage: 1),stream,system_history,vacuum,virtual_column,workload_group] }", license_info.to_string() ); } From 181a43856796b2d9a020a7c5b11db256b7420be8 Mon Sep 17 00:00:00 2001 From: Kould Date: Tue, 15 Jul 2025 10:24:18 +0800 Subject: [PATCH 10/25] chore: codefmt --- .../src/task_from_to_protobuf_impl.rs | 43 +++++----- src/meta/proto-conv/src/util.rs | 1 + src/meta/proto-conv/tests/it/main.rs | 1 + src/meta/proto-conv/tests/it/v135_add_task.rs | 78 +++++++++++++++++++ .../service/src/interpreters/task/private.rs | 28 ++++--- src/query/users/src/lib.rs | 1 - src/query/users/src/user_task.rs | 78 ------------------- 7 files changed, 118 insertions(+), 112 deletions(-) create mode 100644 src/meta/proto-conv/tests/it/v135_add_task.rs delete mode 100644 src/query/users/src/user_task.rs diff --git a/src/meta/proto-conv/src/task_from_to_protobuf_impl.rs b/src/meta/proto-conv/src/task_from_to_protobuf_impl.rs index bc8ea4558330d..4f2914d890202 100644 --- a/src/meta/proto-conv/src/task_from_to_protobuf_impl.rs +++ b/src/meta/proto-conv/src/task_from_to_protobuf_impl.rs @@ -40,30 +40,27 @@ impl FromToProto for mt::Task { return Err(Incompatible::new(format!("Status can not be {s}"))); } }; - let schedule = match p.schedule_options { - None => None, - Some(ref s) => { - if !p.after.is_empty() { - None - } else { - let schedule_type = match s.schedule_type { - 0 => mt::ScheduleType::IntervalType, - 1 => mt::ScheduleType::CronType, - s => { - return Err(Incompatible::new(format!("ScheduleType can not be {s}"))); - } - }; + let schedule = p + .schedule_options + .as_ref() + .map(|s| { + let schedule_type = match s.schedule_type { + 0 => mt::ScheduleType::IntervalType, + 1 => mt::ScheduleType::CronType, + s => { + return Err(Incompatible::new(format!("ScheduleType can not be {s}"))); + } + }; - Some(mt::ScheduleOptions { - interval: s.interval, - cron: s.cron.clone(), - time_zone: s.time_zone.clone(), - schedule_type, - milliseconds_interval: s.milliseconds_interval, - }) - } - } - }; + Ok(mt::ScheduleOptions { + interval: s.interval, + cron: s.cron.clone(), + time_zone: s.time_zone.clone(), + schedule_type, + milliseconds_interval: s.milliseconds_interval, + }) + }) + .transpose()?; let warehouse = p.warehouse_options.as_ref().map(|w| mt::WarehouseOptions { warehouse: w.warehouse.clone(), diff --git a/src/meta/proto-conv/src/util.rs b/src/meta/proto-conv/src/util.rs index 9a92d06d6d999..29d6dfa907a03 100644 --- a/src/meta/proto-conv/src/util.rs +++ b/src/meta/proto-conv/src/util.rs @@ -165,6 +165,7 @@ const META_CHANGE_LOG: &[(u64, &str)] = &[ (133, "2025-06-25: Add: Add new StageFileCompression Zip"), (134, "2025-06-27: Add: SequenceMeta.storage_version"), (135, "2025-07-16: Add: UDFServer.immutable, UDFScript.immutable"), + (136, "2025-07-17: Add: Task"), // Dear developer: // If you're gonna add a new metadata version, you'll have to add a test for it. // You could just copy an existing test file(e.g., `../tests/it/v024_table_meta.rs`) diff --git a/src/meta/proto-conv/tests/it/main.rs b/src/meta/proto-conv/tests/it/main.rs index d3071819f5e76..4405a09b49683 100644 --- a/src/meta/proto-conv/tests/it/main.rs +++ b/src/meta/proto-conv/tests/it/main.rs @@ -127,3 +127,4 @@ mod v132_remove_sequence_meta_start; mod v133_stage_file_compression; mod v134_add_sequence_meta_storage_version; mod v135_udf_immutable; +mod v135_add_task; diff --git a/src/meta/proto-conv/tests/it/v135_add_task.rs b/src/meta/proto-conv/tests/it/v135_add_task.rs new file mode 100644 index 0000000000000..df8f7730e58ca --- /dev/null +++ b/src/meta/proto-conv/tests/it/v135_add_task.rs @@ -0,0 +1,78 @@ +// Copyright 2023 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use chrono::DateTime; +use databend_common_meta_app::principal as mt; +use databend_common_meta_app::principal::ScheduleOptions; +use databend_common_meta_app::principal::ScheduleType; +use databend_common_meta_app::principal::WarehouseOptions; +use fastrace::func_name; +use maplit::btreemap; + +use crate::common; + +#[test] +fn test_decode_v135_add_task() -> anyhow::Result<()> { + let sequence_meta_v134 = vec![ + 8, 11, 18, 6, 116, 97, 115, 107, 95, 99, 34, 16, 83, 69, 76, 69, 67, 84, 32, 42, 32, 70, + 82, 79, 77, 32, 116, 49, 42, 7, 99, 111, 109, 109, 101, 110, 116, 50, 6, 112, 117, 98, 108, + 105, 99, 58, 22, 8, 11, 18, 11, 51, 48, 32, 49, 50, 32, 42, 32, 42, 32, 42, 26, 3, 85, 84, + 67, 40, 11, 66, 17, 10, 11, 119, 97, 114, 101, 104, 111, 117, 115, 101, 95, 97, 18, 2, 49, + 48, 74, 23, 49, 57, 55, 48, 45, 48, 49, 45, 48, 49, 32, 48, 48, 58, 48, 48, 58, 49, 48, 32, + 85, 84, 67, 80, 10, 114, 23, 49, 57, 55, 48, 45, 48, 49, 45, 48, 49, 32, 48, 48, 58, 48, + 48, 58, 49, 49, 32, 85, 84, 67, 122, 23, 49, 57, 55, 48, 45, 48, 49, 45, 48, 49, 32, 48, + 48, 58, 48, 48, 58, 49, 50, 32, 85, 84, 67, 130, 1, 23, 49, 57, 55, 48, 45, 48, 49, 45, 48, + 49, 32, 48, 48, 58, 48, 48, 58, 49, 51, 32, 85, 84, 67, 138, 1, 6, 116, 97, 115, 107, 95, + 97, 138, 1, 6, 116, 97, 115, 107, 95, 98, 146, 1, 6, 99, 49, 32, 62, 32, 49, 154, 1, 6, 10, + 1, 97, 18, 1, 98, 170, 1, 2, 109, 101, 160, 6, 135, 1, 168, 6, 24, + ]; + + let want = || mt::Task { + task_id: 11, + task_name: "task_c".to_string(), + query_text: "SELECT * FROM t1".to_string(), + when_condition: Some("c1 > 1".to_string()), + after: vec!["task_a".to_string(), "task_b".to_string()], + comment: Some("comment".to_string()), + owner: "public".to_string(), + owner_user: "me".to_string(), + schedule_options: Some(ScheduleOptions { + interval: Some(11), + cron: Some("30 12 * * *".to_string()), + time_zone: Some("UTC".to_string()), + schedule_type: ScheduleType::IntervalType, + milliseconds_interval: Some(11), + }), + warehouse_options: Some(WarehouseOptions { + warehouse: Some("warehouse_a".to_string()), + using_warehouse_size: Some("10".to_string()), + }), + next_scheduled_at: Some(DateTime::from_timestamp(10, 0).unwrap()), + suspend_task_after_num_failures: Some(10), + error_integration: None, + status: mt::Status::Suspended, + created_at: DateTime::from_timestamp(11, 0).unwrap(), + updated_at: DateTime::from_timestamp(12, 0).unwrap(), + last_suspended_at: Some(DateTime::from_timestamp(13, 0).unwrap()), + session_params: btreemap! { s("a") => s("b") }, + }; + common::test_pb_from_to(func_name!(), want())?; + common::test_load_old(func_name!(), sequence_meta_v134.as_slice(), 135, want())?; + + Ok(()) +} + +fn s(ss: impl ToString) -> String { + ss.to_string() +} diff --git a/src/query/service/src/interpreters/task/private.rs b/src/query/service/src/interpreters/task/private.rs index 0322ddeb0ff86..72415d3b3aeff 100644 --- a/src/query/service/src/interpreters/task/private.rs +++ b/src/query/service/src/interpreters/task/private.rs @@ -115,24 +115,27 @@ impl TaskInterpreter for PrivateTaskInterpreter { session_params: plan.session_parameters.clone(), }; UserApiProvider::instance() - .create_task(&plan.tenant, task, &plan.create_option) - .await?; + .task_api(&plan.tenant) + .create_task(task, &plan.create_option) + .await??; Ok(()) } async fn execute_task(&self, _ctx: &Arc, plan: &ExecuteTaskPlan) -> Result<()> { UserApiProvider::instance() - .execute_task(&plan.tenant, &plan.task_name) - .await?; + .task_api(&plan.tenant) + .execute_task(&plan.task_name) + .await??; Ok(()) } async fn alter_task(&self, _ctx: &Arc, plan: &AlterTaskPlan) -> Result<()> { UserApiProvider::instance() - .alter_task(&plan.tenant, &plan.task_name, &plan.alter_options) - .await?; + .task_api(&plan.tenant) + .alter_task(&plan.task_name, &plan.alter_options) + .await??; Ok(()) } @@ -143,14 +146,16 @@ impl TaskInterpreter for PrivateTaskInterpreter { plan: &DescribeTaskPlan, ) -> Result> { let task = UserApiProvider::instance() - .describe_task(&plan.tenant, &plan.task_name) - .await?; + .task_api(&plan.tenant) + .describe_task(&plan.task_name) + .await??; task.map(Self::task_trans).transpose() } async fn drop_task(&self, _ctx: &Arc, plan: &DropTaskPlan) -> Result<()> { UserApiProvider::instance() - .drop_task(&plan.tenant, &plan.task_name) + .task_api(&plan.tenant) + .drop_task(&plan.task_name) .await?; Ok(()) @@ -161,7 +166,10 @@ impl TaskInterpreter for PrivateTaskInterpreter { _ctx: &Arc, plan: &ShowTasksPlan, ) -> Result> { - let tasks = UserApiProvider::instance().show_tasks(&plan.tenant).await?; + let tasks = UserApiProvider::instance() + .task_api(&plan.tenant) + .list_task() + .await?; tasks.into_iter().map(Self::task_trans).try_collect() } diff --git a/src/query/users/src/lib.rs b/src/query/users/src/lib.rs index 181309778098d..19f9d595ce55e 100644 --- a/src/query/users/src/lib.rs +++ b/src/query/users/src/lib.rs @@ -33,7 +33,6 @@ pub mod connection; pub mod file_format; pub mod role_cache_mgr; pub mod role_util; -mod user_task; pub use jwt::*; pub use password_policy::*; diff --git a/src/query/users/src/user_task.rs b/src/query/users/src/user_task.rs deleted file mode 100644 index 58fa175e0dc6b..0000000000000 --- a/src/query/users/src/user_task.rs +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use databend_common_ast::ast::AlterTaskOptions; -use databend_common_exception::Result; -use databend_common_meta_app::principal::Task; -use databend_common_meta_app::schema::CreateOption; -use databend_common_meta_app::tenant::Tenant; - -use crate::UserApiProvider; - -impl UserApiProvider { - // Add a new Task. - #[async_backtrace::framed] - pub async fn create_task( - &self, - tenant: &Tenant, - task: Task, - create_option: &CreateOption, - ) -> Result<()> { - let task_api = self.task_api(tenant); - task_api.create_task(task, create_option).await??; - Ok(()) - } - - #[async_backtrace::framed] - pub async fn execute_task(&self, tenant: &Tenant, task_name: &str) -> Result<()> { - let task_api = self.task_api(tenant); - task_api.execute_task(task_name).await??; - Ok(()) - } - - #[async_backtrace::framed] - pub async fn alter_task( - &self, - tenant: &Tenant, - task_name: &str, - alter_options: &AlterTaskOptions, - ) -> Result<()> { - let task_api = self.task_api(tenant); - task_api.alter_task(task_name, alter_options).await??; - Ok(()) - } - - #[async_backtrace::framed] - pub async fn describe_task(&self, tenant: &Tenant, task_name: &str) -> Result> { - let task_api = self.task_api(tenant); - let task = task_api.describe_task(task_name).await??; - Ok(task) - } - - #[async_backtrace::framed] - pub async fn drop_task(&self, tenant: &Tenant, task_name: &str) -> Result<()> { - let task_api = self.task_api(tenant); - task_api.drop_task(task_name).await?; - - Ok(()) - } - - #[async_backtrace::framed] - pub async fn show_tasks(&self, tenant: &Tenant) -> Result> { - let task_api = self.task_api(tenant); - let tasks = task_api.list_task().await?; - - Ok(tasks) - } -} From 3a8a3dce23448bbef175c98ba5aa4623181519b3 Mon Sep 17 00:00:00 2001 From: Kould Date: Tue, 15 Jul 2025 10:42:25 +0800 Subject: [PATCH 11/25] chore: add `accept` for Delete & After --- src/meta/app/src/principal/task.rs | 4 ++++ src/query/management/src/task/task_mgr.rs | 2 +- src/query/service/src/task/service.rs | 12 ++++++++++-- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/meta/app/src/principal/task.rs b/src/meta/app/src/principal/task.rs index 6929e3b3dcc34..08109fccfd4b0 100644 --- a/src/meta/app/src/principal/task.rs +++ b/src/meta/app/src/principal/task.rs @@ -128,6 +128,10 @@ impl TaskMessage { format!("{}-{}-{}", TaskMessage::prefix(), ty, self.task_name()) } + pub fn schedule_key(task_name: &str) -> String { + format!("{}-1-{task_name}", TaskMessage::prefix()) + } + pub fn prefix() -> i64 { 0 } diff --git a/src/query/management/src/task/task_mgr.rs b/src/query/management/src/task/task_mgr.rs index 5c08e089631b3..39f6f0c27b3e7 100644 --- a/src/query/management/src/task/task_mgr.rs +++ b/src/query/management/src/task/task_mgr.rs @@ -255,7 +255,7 @@ impl TaskMgr { /// mark the corresponding execute task as accepted and delete it from the queue #[async_backtrace::framed] #[fastrace::trace] - pub async fn execute_accept(&self, key: &TaskMessageIdent) -> Result<(), MetaError> { + pub async fn accept(&self, key: &TaskMessageIdent) -> Result<(), MetaError> { let req = UpsertPB::delete(key.clone()).with(MatchSeq::GE(1)); let _ = self.kv_api.upsert_pb(&req).await?; diff --git a/src/query/service/src/task/service.rs b/src/query/service/src/task/service.rs index 2cc17e6779491..db43d1aaed11f 100644 --- a/src/query/service/src/task/service.rs +++ b/src/query/service/src/task/service.rs @@ -396,7 +396,7 @@ impl TaskService { .await?; } } - task_mgr.execute_accept(&task_key).await?; + task_mgr.accept(&task_key).await?; break; } Err(err) => { @@ -411,7 +411,7 @@ impl TaskService { task_run.run_id = Self::make_run_id(); } } - task_mgr.execute_accept(&task_key).await?; + task_mgr.accept(&task_key).await?; task_mgr .alter_task(&task.task_name, &AlterTaskOptions::Suspend) .await??; @@ -433,6 +433,13 @@ impl TaskService { if let Some(token) = scheduled_tasks.remove(&task_name) { token.cancel(); } + task_mgr.accept(&task_key).await?; + task_mgr + .accept(&TaskMessageIdent::new( + tenant, + TaskMessage::schedule_key(&task_name), + )) + .await?; } TaskMessage::AfterTask(task) => { let Some(_guard) = fn_lock(self, &task_key).await? else { @@ -443,6 +450,7 @@ impl TaskService { Status::Started => (), } self.update_task_afters(&task).await?; + task_mgr.accept(&task_key).await?; } } } From d7ba36b45e1c158403c55bd847b971e6124a5588 Mon Sep 17 00:00:00 2001 From: Kould Date: Tue, 15 Jul 2025 14:29:08 +0800 Subject: [PATCH 12/25] chore: add restart test --- src/query/service/src/task/service.rs | 16 +++++++------ tests/task/test-private-task.sh | 34 +++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 7 deletions(-) diff --git a/src/query/service/src/task/service.rs b/src/query/service/src/task/service.rs index db43d1aaed11f..b2541a1e48363 100644 --- a/src/query/service/src/task/service.rs +++ b/src/query/service/src/task/service.rs @@ -23,6 +23,7 @@ use std::time::Duration; use async_stream::stream; use chrono::DateTime; +use chrono::Local; use chrono::Utc; use chrono_tz::Tz; use cron::Schedule; @@ -75,9 +76,10 @@ use crate::task::session::get_task_user; pub type TaskMessageStream = BoxStream<'static, Result<(String, TaskMessage)>>; -/// - Multiple Query nodes can send the same Task at the same time, and the key will be distinguished by node_id -/// - Each Query node will grab the corresponding task through acquire task_name. -/// - Tasks with the same task_name cannot be executed at the same time. +/// Currently, query uses the watch in meta to imitate channel to obtain tasks. When task messages are sent to channels, they are stored in meta using TaskMessage::key. +/// TaskMessage::key is divided into only 4 types of keys that will overwrite each other to avoid repeated storage and repeated processing. +/// Whenever a new key is inserted for overwriting, each query will receive the corresponding key change and process it, thus realizing the channel +/// The init type key of watch is used to let the Service load the Schedule, and TaskService will delete the corresponding key (TaskMgr::accept) when processing Execute & After & Delete TaskMessage to avoid repeated processing pub struct TaskService { initialized: AtomicBool, interval: u64, @@ -165,7 +167,7 @@ impl TaskService { let instance = TaskService { initialized: AtomicBool::new(false), - interval: 200, + interval: 300, tenant: cfg.query.tenant_id.clone(), node_id: cfg.query.node_id.clone(), cluster_id: cfg.query.cluster_id.clone(), @@ -210,8 +212,8 @@ impl TaskService { }; while let Some(result) = steam.next().await { - let (task_key, task_message) = result?; - let task_key = TaskMessageIdent::new(tenant, task_key); + let (_, task_message) = result?; + let task_key = TaskMessageIdent::new(tenant, task_message.key()); match task_message { // ScheduleTask is always monitored by all Query nodes, and ExecuteTask is sent serially to avoid repeated sending. TaskMessage::ScheduleTask(mut task) => { @@ -411,10 +413,10 @@ impl TaskService { task_run.run_id = Self::make_run_id(); } } - task_mgr.accept(&task_key).await?; task_mgr .alter_task(&task.task_name, &AlterTaskOptions::Suspend) .await??; + task_mgr.accept(&task_key).await?; } Result::Ok(()) diff --git a/tests/task/test-private-task.sh b/tests/task/test-private-task.sh index a2db185529645..21376230b04c3 100644 --- a/tests/task/test-private-task.sh +++ b/tests/task/test-private-task.sh @@ -139,6 +139,40 @@ response9=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content actual=$(echo "$response9" | jq -c '.data') expected='[["0"],["0"],["1"],["1"],["2"],["2"]]' +if [ "$actual" = "$expected" ]; then + echo "✅ Query result matches expected" +else + echo "❌ Mismatch" + echo "Expected: $expected" + echo "Actual : $actual" + exit 1 +fi + +# Test whether the schedule can be restored after restart + +killall -9 databend-query || true + +echo 'Start databend-query node-1' +nohup env RUST_BACKTRACE=1 target/${BUILD_PROFILE}/databend-query -c scripts/ci/deploy/config/databend-query-node-1.toml --internal-enable-sandbox-tenant >./.databend/query-1.out 2>&1 & + +echo "Waiting on node-1..." +python3 scripts/ci/wait_tcp.py --timeout 30 --port 9091 + +echo 'Start databend-query node-2' +env "RUST_BACKTRACE=1" nohup target/${BUILD_PROFILE}/databend-query -c scripts/ci/deploy/config/databend-query-node-2.toml --internal-enable-sandbox-tenant >./.databend/query-2.out 2>&1 & + +echo "Waiting on node-2..." +python3 scripts/ci/wait_tcp.py --timeout 30 --port 9092 + +echo "Started 2-node cluster with private task enabled..." + +sleep 9 + +response9=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"SELECT c1 FROM t1 ORDER BY c1\"}") + +actual=$(echo "$response9" | jq -c '.data') +expected='[["0"],["0"],["1"],["1"],["1"],["2"],["2"]]' + if [ "$actual" = "$expected" ]; then echo "✅ Query result matches expected" else From f0721b1ca3f608d6b10f05a6baeae34d221399b9 Mon Sep 17 00:00:00 2001 From: Kould Date: Tue, 15 Jul 2025 15:12:33 +0800 Subject: [PATCH 13/25] chore: codefmt --- src/query/service/src/interpreters/interpreter_task_alter.rs | 4 ++-- src/query/service/src/interpreters/interpreter_task_create.rs | 4 ++-- .../service/src/interpreters/interpreter_task_describe.rs | 4 ++-- src/query/service/src/interpreters/interpreter_task_drop.rs | 4 ++-- .../service/src/interpreters/interpreter_task_execute.rs | 4 ++-- src/query/service/src/interpreters/interpreter_tasks_show.rs | 4 ++-- src/query/service/src/interpreters/task/mod.rs | 4 ++-- src/query/service/src/task/service.rs | 1 - 8 files changed, 14 insertions(+), 15 deletions(-) diff --git a/src/query/service/src/interpreters/interpreter_task_alter.rs b/src/query/service/src/interpreters/interpreter_task_alter.rs index fa9fdf0f79cca..98d8911ad5bd9 100644 --- a/src/query/service/src/interpreters/interpreter_task_alter.rs +++ b/src/query/service/src/interpreters/interpreter_task_alter.rs @@ -18,7 +18,7 @@ use databend_common_exception::Result; use databend_common_sql::plans::AlterTaskPlan; use crate::interpreters::task::TaskInterpreter; -use crate::interpreters::task::TaskInterpreterFactory; +use crate::interpreters::task::TaskInterpreterManager; use crate::interpreters::Interpreter; use crate::pipelines::PipelineBuildResult; use crate::sessions::QueryContext; @@ -50,7 +50,7 @@ impl Interpreter for AlterTaskInterpreter { #[fastrace::trace] #[async_backtrace::framed] async fn execute2(&self) -> Result { - TaskInterpreterFactory::build(&self.ctx)? + TaskInterpreterManager::build(&self.ctx)? .alter_task(&self.ctx, &self.plan) .await?; diff --git a/src/query/service/src/interpreters/interpreter_task_create.rs b/src/query/service/src/interpreters/interpreter_task_create.rs index ca9408afd942b..d6a31f17007f5 100644 --- a/src/query/service/src/interpreters/interpreter_task_create.rs +++ b/src/query/service/src/interpreters/interpreter_task_create.rs @@ -18,7 +18,7 @@ use databend_common_exception::Result; use databend_common_sql::plans::CreateTaskPlan; use crate::interpreters::task::TaskInterpreter; -use crate::interpreters::task::TaskInterpreterFactory; +use crate::interpreters::task::TaskInterpreterManager; use crate::interpreters::Interpreter; use crate::pipelines::PipelineBuildResult; use crate::sessions::QueryContext; @@ -48,7 +48,7 @@ impl Interpreter for CreateTaskInterpreter { #[fastrace::trace] #[async_backtrace::framed] async fn execute2(&self) -> Result { - TaskInterpreterFactory::build(&self.ctx)? + TaskInterpreterManager::build(&self.ctx)? .create_task(&self.ctx, &self.plan) .await?; diff --git a/src/query/service/src/interpreters/interpreter_task_describe.rs b/src/query/service/src/interpreters/interpreter_task_describe.rs index 3e40cc297cc44..a7796259cec2f 100644 --- a/src/query/service/src/interpreters/interpreter_task_describe.rs +++ b/src/query/service/src/interpreters/interpreter_task_describe.rs @@ -19,7 +19,7 @@ use databend_common_sql::plans::DescribeTaskPlan; use databend_common_storages_system::parse_tasks_to_datablock; use crate::interpreters::task::TaskInterpreter; -use crate::interpreters::task::TaskInterpreterFactory; +use crate::interpreters::task::TaskInterpreterManager; use crate::interpreters::Interpreter; use crate::pipelines::PipelineBuildResult; use crate::sessions::QueryContext; @@ -49,7 +49,7 @@ impl Interpreter for DescribeTaskInterpreter { #[fastrace::trace] #[async_backtrace::framed] async fn execute2(&self) -> Result { - let Some(task) = TaskInterpreterFactory::build(&self.ctx)? + let Some(task) = TaskInterpreterManager::build(&self.ctx)? .describe_task(&self.ctx, &self.plan) .await? else { diff --git a/src/query/service/src/interpreters/interpreter_task_drop.rs b/src/query/service/src/interpreters/interpreter_task_drop.rs index e4beca7f11d07..6713c5a11725d 100644 --- a/src/query/service/src/interpreters/interpreter_task_drop.rs +++ b/src/query/service/src/interpreters/interpreter_task_drop.rs @@ -18,7 +18,7 @@ use databend_common_exception::Result; use databend_common_sql::plans::DropTaskPlan; use crate::interpreters::task::TaskInterpreter; -use crate::interpreters::task::TaskInterpreterFactory; +use crate::interpreters::task::TaskInterpreterManager; use crate::interpreters::Interpreter; use crate::pipelines::PipelineBuildResult; use crate::sessions::QueryContext; @@ -48,7 +48,7 @@ impl Interpreter for DropTaskInterpreter { #[fastrace::trace] #[async_backtrace::framed] async fn execute2(&self) -> Result { - TaskInterpreterFactory::build(&self.ctx)? + TaskInterpreterManager::build(&self.ctx)? .drop_task(&self.ctx, &self.plan) .await?; diff --git a/src/query/service/src/interpreters/interpreter_task_execute.rs b/src/query/service/src/interpreters/interpreter_task_execute.rs index 37017b7b548f1..70c12e67e4849 100644 --- a/src/query/service/src/interpreters/interpreter_task_execute.rs +++ b/src/query/service/src/interpreters/interpreter_task_execute.rs @@ -18,7 +18,7 @@ use databend_common_exception::Result; use databend_common_sql::plans::ExecuteTaskPlan; use crate::interpreters::task::TaskInterpreter; -use crate::interpreters::task::TaskInterpreterFactory; +use crate::interpreters::task::TaskInterpreterManager; use crate::interpreters::Interpreter; use crate::pipelines::PipelineBuildResult; use crate::sessions::QueryContext; @@ -48,7 +48,7 @@ impl Interpreter for ExecuteTaskInterpreter { #[fastrace::trace] #[async_backtrace::framed] async fn execute2(&self) -> Result { - TaskInterpreterFactory::build(&self.ctx)? + TaskInterpreterManager::build(&self.ctx)? .execute_task(&self.ctx, &self.plan) .await?; diff --git a/src/query/service/src/interpreters/interpreter_tasks_show.rs b/src/query/service/src/interpreters/interpreter_tasks_show.rs index d6f5a6231c764..1cc8e514ae474 100644 --- a/src/query/service/src/interpreters/interpreter_tasks_show.rs +++ b/src/query/service/src/interpreters/interpreter_tasks_show.rs @@ -19,7 +19,7 @@ use databend_common_sql::plans::ShowTasksPlan; use databend_common_storages_system::parse_tasks_to_datablock; use crate::interpreters::task::TaskInterpreter; -use crate::interpreters::task::TaskInterpreterFactory; +use crate::interpreters::task::TaskInterpreterManager; use crate::interpreters::Interpreter; use crate::pipelines::PipelineBuildResult; use crate::sessions::QueryContext; @@ -51,7 +51,7 @@ impl Interpreter for ShowTasksInterpreter { #[fastrace::trace] #[async_backtrace::framed] async fn execute2(&self) -> Result { - let tasks = TaskInterpreterFactory::build(&self.ctx)? + let tasks = TaskInterpreterManager::build(&self.ctx)? .show_tasks(&self.ctx, &self.plan) .await?; diff --git a/src/query/service/src/interpreters/task/mod.rs b/src/query/service/src/interpreters/task/mod.rs index db37b9f72b8d0..3f15576aa00b2 100644 --- a/src/query/service/src/interpreters/task/mod.rs +++ b/src/query/service/src/interpreters/task/mod.rs @@ -34,9 +34,9 @@ use crate::sessions::QueryContext; mod cloud; mod private; -pub(crate) struct TaskInterpreterFactory; +pub(crate) struct TaskInterpreterManager; -impl TaskInterpreterFactory { +impl TaskInterpreterManager { pub fn build(ctx: &QueryContext) -> Result { if GlobalConfig::instance().task.on { LicenseManagerSwitch::instance().check_enterprise_enabled( diff --git a/src/query/service/src/task/service.rs b/src/query/service/src/task/service.rs index b2541a1e48363..49c38c72670f9 100644 --- a/src/query/service/src/task/service.rs +++ b/src/query/service/src/task/service.rs @@ -23,7 +23,6 @@ use std::time::Duration; use async_stream::stream; use chrono::DateTime; -use chrono::Local; use chrono::Utc; use chrono_tz::Tz; use cron::Schedule; From 0b8b72fec3e1c28d72bbdac610f30e1401144f77 Mon Sep 17 00:00:00 2001 From: Kould Date: Tue, 15 Jul 2025 17:51:19 +0800 Subject: [PATCH 14/25] chore: add `system.task_history` for Private Task & use `TaskMgr::accept` replace `TaskMetaHandle::acquire_with_guard` --- src/query/management/src/task/task_mgr.rs | 6 +- .../src/databases/system/system_database.rs | 9 +- .../table_functions/table_function_factory.rs | 29 ++-- src/query/service/src/task/meta.rs | 4 +- src/query/service/src/task/service.rs | 53 +++----- src/query/storages/system/src/lib.rs | 2 + .../system/src/private_task_history_table.rs | 128 ++++++++++++++++++ tests/task/test-private-task.sh | 51 ++++++- 8 files changed, 227 insertions(+), 55 deletions(-) create mode 100644 src/query/storages/system/src/private_task_history_table.rs diff --git a/src/query/management/src/task/task_mgr.rs b/src/query/management/src/task/task_mgr.rs index 39f6f0c27b3e7..22c8d1b915539 100644 --- a/src/query/management/src/task/task_mgr.rs +++ b/src/query/management/src/task/task_mgr.rs @@ -255,11 +255,11 @@ impl TaskMgr { /// mark the corresponding execute task as accepted and delete it from the queue #[async_backtrace::framed] #[fastrace::trace] - pub async fn accept(&self, key: &TaskMessageIdent) -> Result<(), MetaError> { + pub async fn accept(&self, key: &TaskMessageIdent) -> Result { let req = UpsertPB::delete(key.clone()).with(MatchSeq::GE(1)); - let _ = self.kv_api.upsert_pb(&req).await?; + let change = self.kv_api.upsert_pb(&req).await?; - Ok(()) + Ok(change.is_changed()) } async fn create_task_inner( diff --git a/src/query/service/src/databases/system/system_database.rs b/src/query/service/src/databases/system/system_database.rs index 55e6e614f7aab..36cf36da60243 100644 --- a/src/query/service/src/databases/system/system_database.rs +++ b/src/query/service/src/databases/system/system_database.rs @@ -49,6 +49,7 @@ use databend_common_storages_system::NotificationHistoryTable; use databend_common_storages_system::NotificationsTable; use databend_common_storages_system::OneTable; use databend_common_storages_system::PasswordPoliciesTable; +use databend_common_storages_system::PrivateTaskHistoryTable; use databend_common_storages_system::ProceduresTable; use databend_common_storages_system::ProcessesTable; use databend_common_storages_system::QueryCacheTable; @@ -153,8 +154,6 @@ impl SystemDatabase { IndexesTable::create(sys_db_meta.next_table_id()), BacktraceTable::create(sys_db_meta.next_table_id()), TempFilesTable::create(sys_db_meta.next_table_id()), - TasksTable::create(sys_db_meta.next_table_id()), - TaskHistoryTable::create(sys_db_meta.next_table_id()), LocksTable::create(sys_db_meta.next_table_id(), ctl_name), NotificationsTable::create(sys_db_meta.next_table_id()), NotificationHistoryTable::create(sys_db_meta.next_table_id()), @@ -166,6 +165,12 @@ impl SystemDatabase { config.query.max_query_log_size, )), ]); + if config.task.on { + table_list.push(PrivateTaskHistoryTable::create(sys_db_meta.next_table_id())); + } else { + table_list.push(TasksTable::create(sys_db_meta.next_table_id())); + table_list.push(TaskHistoryTable::create(sys_db_meta.next_table_id())); + } disable_system_table_load = config.query.disable_system_table_load; } else { disable_system_table_load = false; diff --git a/src/query/service/src/table_functions/table_function_factory.rs b/src/query/service/src/table_functions/table_function_factory.rs index 1fbc25766c94b..423caad2a817f 100644 --- a/src/query/service/src/table_functions/table_function_factory.rs +++ b/src/query/service/src/table_functions/table_function_factory.rs @@ -16,6 +16,7 @@ use std::collections::HashMap; use std::sync::Arc; use databend_common_catalog::table_args::TableArgs; +use databend_common_config::GlobalConfig; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_meta_types::MetaId; @@ -295,26 +296,28 @@ impl TableFunctionFactory { ), ); - creators.insert( - "task_dependents".to_string(), - (next_id(), Arc::new(TaskDependentsTable::create)), - ); + if !GlobalConfig::instance().task.on { + creators.insert( + "task_dependents".to_string(), + (next_id(), Arc::new(TaskDependentsTable::create)), + ); - creators.insert( - "task_dependents_enable".to_string(), - (next_id(), Arc::new(TaskDependentsEnableTable::create)), - ); + creators.insert( + "task_dependents_enable".to_string(), + (next_id(), Arc::new(TaskDependentsEnableTable::create)), + ); + + creators.insert( + "task_history".to_string(), + (next_id(), Arc::new(TaskHistoryTable::create)), + ); + } creators.insert( "show_grants".to_string(), (next_id(), Arc::new(ShowGrants::create)), ); - creators.insert( - "task_history".to_string(), - (next_id(), Arc::new(TaskHistoryTable::create)), - ); - creators.insert( "show_variables".to_string(), (next_id(), Arc::new(ShowVariables::create)), diff --git a/src/query/service/src/task/meta.rs b/src/query/service/src/task/meta.rs index 8f19a9f9e9306..6d76b873f05f8 100644 --- a/src/query/service/src/task/meta.rs +++ b/src/query/service/src/task/meta.rs @@ -107,9 +107,9 @@ impl TaskMetaHandle { pub async fn acquire_with_guard( &self, meta_key: &str, - interval: u64, + interval_millis: u64, ) -> Result> { - if let Some(permit) = self.acquire(meta_key, interval).await? { + if let Some(permit) = self.acquire(meta_key, interval_millis).await? { Ok(Some(PermitGuard::new( permit, Arc::new(TaskMetaHandle { diff --git a/src/query/service/src/task/service.rs b/src/query/service/src/task/service.rs index 49c38c72670f9..479a398498ef5 100644 --- a/src/query/service/src/task/service.rs +++ b/src/query/service/src/task/service.rs @@ -81,7 +81,6 @@ pub type TaskMessageStream = BoxStream<'static, Result<(String, TaskMessage)>>; /// The init type key of watch is used to let the Service load the Schedule, and TaskService will delete the corresponding key (TaskMgr::accept) when processing Execute & After & Delete TaskMessage to avoid repeated processing pub struct TaskService { initialized: AtomicBool, - interval: u64, tenant: Tenant, node_id: String, cluster_id: String, @@ -166,7 +165,6 @@ impl TaskService { let instance = TaskService { initialized: AtomicBool::new(false), - interval: 300, tenant: cfg.query.tenant_id.clone(), node_id: cfg.query.node_id.clone(), cluster_id: cfg.query.cluster_id.clone(), @@ -203,13 +201,6 @@ impl TaskService { let mut steam = self.subscribe().await?; - let fn_lock = async |task_service: &TaskService, key: &TaskMessageIdent| { - task_service - .meta_handle - .acquire_with_guard(&format!("{}/lock", key), task_service.interval) - .await - }; - while let Some(result) = steam.next().await { let (_, task_message) = result?; let task_key = TaskMessageIdent::new(tenant, task_message.key()); @@ -233,6 +224,16 @@ impl TaskService { let task_name_clone = task_name.clone(); let task_service = TaskService::instance(); + let fn_lock = + async |task_service: &TaskService, + key: &TaskMessageIdent, + interval_millis: u64| { + task_service + .meta_handle + .acquire_with_guard(&format!("{}/lock", key), interval_millis) + .await + }; + task_service .update_or_create_task_run(&TaskRun { task: task.clone(), @@ -265,7 +266,7 @@ impl TaskService { loop { tokio::select! { _ = sleep(duration) => { - let Some(_guard) = fn_lock(&task_service, &task_key).await? else { + let Some(_guard) = fn_lock(&task_service, &task_key, duration.as_millis() as u64).await? else { continue; }; task_mgr.send(TaskMessage::ExecuteTask(task.clone())).await?; @@ -308,7 +309,7 @@ impl TaskService { task.next_scheduled_at = Some(Utc::now() + duration); tokio::select! { _ = sleep(duration) => { - let Some(_guard) = fn_lock(&task_service, &task_key).await? else { + let Some(_guard) = fn_lock(&task_service, &task_key, duration.as_millis() as u64).await? else { continue; }; task_mgr.send(TaskMessage::ExecuteTask(task.clone())).await?; @@ -330,9 +331,9 @@ impl TaskService { } } TaskMessage::ExecuteTask(task) => { - let Some(_guard) = fn_lock(self, &task_key).await? else { + if !task_mgr.accept(&task_key).await? { continue; - }; + } let task_name = task.task_name.clone(); let task_service = TaskService::instance(); @@ -383,21 +384,10 @@ impl TaskService { .ok_or_else(|| { ErrorCode::UnknownTask(next_task) })?; - if let Some(_guard) = fn_lock( - &TaskService::instance(), - &TaskMessageIdent::new( - tenant.clone(), - format!("check_{}", task_name), - ), - ) - .await? - { - task_mgr - .send(TaskMessage::ExecuteTask(next_task)) - .await?; - } + task_mgr + .send(TaskMessage::ExecuteTask(next_task)) + .await?; } - task_mgr.accept(&task_key).await?; break; } Err(err) => { @@ -415,7 +405,6 @@ impl TaskService { task_mgr .alter_task(&task.task_name, &AlterTaskOptions::Suspend) .await??; - task_mgr.accept(&task_key).await?; } Result::Ok(()) @@ -428,13 +417,12 @@ impl TaskService { )?; } TaskMessage::DeleteTask(task_name) => { - if let Some(_guard) = fn_lock(self, &task_key).await? { + if task_mgr.accept(&task_key).await? { self.clean_task_afters(&task_name).await?; } if let Some(token) = scheduled_tasks.remove(&task_name) { token.cancel(); } - task_mgr.accept(&task_key).await?; task_mgr .accept(&TaskMessageIdent::new( tenant, @@ -443,15 +431,14 @@ impl TaskService { .await?; } TaskMessage::AfterTask(task) => { - let Some(_guard) = fn_lock(self, &task_key).await? else { + if !task_mgr.accept(&task_key).await? { continue; - }; + } match task.status { Status::Suspended => continue, Status::Started => (), } self.update_task_afters(&task).await?; - task_mgr.accept(&task_key).await?; } } } diff --git a/src/query/storages/system/src/lib.rs b/src/query/storages/system/src/lib.rs index a03c00f73f1a2..c5a2a5e7a365e 100644 --- a/src/query/storages/system/src/lib.rs +++ b/src/query/storages/system/src/lib.rs @@ -46,6 +46,7 @@ mod notification_history_table; mod notifications_table; mod one_table; mod password_policies_table; +mod private_task_history_table; mod procedures_table; mod processes_table; mod query_cache_table; @@ -98,6 +99,7 @@ pub use notifications_table::parse_notifications_to_datablock; pub use notifications_table::NotificationsTable; pub use one_table::OneTable; pub use password_policies_table::PasswordPoliciesTable; +pub use private_task_history_table::PrivateTaskHistoryTable; pub use procedures_table::ProceduresTable; pub use processes_table::ProcessesTable; pub use query_cache_table::QueryCacheTable; diff --git a/src/query/storages/system/src/private_task_history_table.rs b/src/query/storages/system/src/private_task_history_table.rs new file mode 100644 index 0000000000000..495866f2bb14e --- /dev/null +++ b/src/query/storages/system/src/private_task_history_table.rs @@ -0,0 +1,128 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::BTreeMap; +use std::sync::Arc; + +use databend_common_catalog::catalog::CATALOG_DEFAULT; +use databend_common_catalog::table::Table; +use databend_common_meta_app::schema::CatalogInfo; +use databend_common_meta_app::schema::CatalogNameIdent; +use databend_common_meta_app::schema::TableIdent; +use databend_common_meta_app::schema::TableInfo; +use databend_common_meta_app::schema::TableMeta; +use databend_common_meta_app::tenant::Tenant; +use databend_common_storages_view::view_table::ViewTable; +use databend_common_storages_view::view_table::QUERY; + +use crate::generate_catalog_meta; + +pub struct PrivateTaskHistoryTable {} + +impl PrivateTaskHistoryTable { + // desc system.task_history; + // +------------------------------+---------------------+------+-----+---------------------------+--------+ + // | Field | Type | Null | Key | Default | Extra | + // +------------------------------+---------------------+------+-----+---------------------------+--------+ + // | task_id | bigint unsigned | YES | | NULL | | + // | task_name | text | NO | | NULL | | + // | query_text | text | NO | | NULL | | + // | when_condition | text | YES | | NULL | | + // | after | text | YES | | NULL | | + // | comment | text | YES | | NULL | | + // | owner | text | YES | | NULL | | + // | owner_user | text | YES | | NULL | | + // | warehouse_name | text | YES | | NULL | | + // | using_warehouse_size | text | YES | | NULL | | + // | schedule_type | integer | YES | | NULL | | + // | interval | integer | YES | | NULL | | + // | interval_secs | integer | YES | | NULL | | + // | interval_milliseconds | bigint unsigned | YES | | NULL | | + // | cron | text | YES | | NULL | | + // | time_zone | text | YES | | 'UTC' | | + // | run_id | bigint unsigned | YES | | NULL | | + // | attempt_number | integer | YES | | NULL | | + // | state | text | NO | | 'SCHEDULED' | | + // | error_code | bigint | YES | | NULL | | + // | error_message | text | YES | | NULL | | + // | root_task_id | bigint unsigned | YES | | NULL | | + // | scheduled_at | timestamp | YES | | CURRENT_TIMESTAMP | | + // | completed_at | timestamp | YES | | NULL | | + // | next_scheduled_at | timestamp | YES | | CURRENT_TIMESTAMP | | + // | error_integration | text | YES | | NULL | | + // | status | text | YES | | NULL | | + // | created_at | timestamp | YES | | NULL | | + // | updated_at | timestamp | YES | | NULL | | + // | session_params | variant | YES | | NULL | | + // | last_suspended_at | timestamp | YES | | NULL | | + // | suspend_task_after_num_failures | integer | YES | | NULL | | + // +------------------------------+---------------------+------+-----+---------------------------+--------+ + pub fn create(table_id: u64) -> Arc { + let query = "SELECT + task_id, + task_name, + query_text, + when_condition, + after, + comment, + owner, + owner_user, + warehouse_name, + using_warehouse_size, + schedule_type, + interval, + interval_milliseconds, + cron, + time_zone, + run_id, + attempt_number, + state, + error_code, + error_message, + root_task_id, + scheduled_at, + completed_at, + next_scheduled_at, + error_integration, + status, + created_at, + updated_at, + session_params, + last_suspended_at, + suspend_task_after_num_failures + FROM system_task.task_run ORDER BY run_id DESC;"; + + let mut options = BTreeMap::new(); + options.insert(QUERY.to_string(), query.to_string()); + let table_info = TableInfo { + desc: "'information_schema'.'task_history'".to_string(), + name: "task_history".to_string(), + ident: TableIdent::new(table_id, 0), + meta: TableMeta { + options, + engine: "VIEW".to_string(), + ..Default::default() + }, + catalog_info: Arc::new(CatalogInfo { + name_ident: CatalogNameIdent::new(Tenant::new_literal("dummy"), CATALOG_DEFAULT) + .into(), + meta: generate_catalog_meta(CATALOG_DEFAULT), + ..Default::default() + }), + ..Default::default() + }; + + ViewTable::create(table_info) + } +} diff --git a/tests/task/test-private-task.sh b/tests/task/test-private-task.sh index 21376230b04c3..173a0f729bc48 100644 --- a/tests/task/test-private-task.sh +++ b/tests/task/test-private-task.sh @@ -166,7 +166,7 @@ python3 scripts/ci/wait_tcp.py --timeout 30 --port 9092 echo "Started 2-node cluster with private task enabled..." -sleep 9 +sleep 7 response9=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"SELECT c1 FROM t1 ORDER BY c1\"}") @@ -180,4 +180,51 @@ else echo "Expected: $expected" echo "Actual : $actual" exit 1 -fi \ No newline at end of file +fi + +# Show Task +response10=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"Describe Task my_task_1\"}") +state10=$(echo "$response10" | jq -r '.state') +if [ "$state10" != "Succeeded" ]; then + echo "❌ Failed" + exit 1 +fi +actual=$(echo "$response10" | jq -c '.data') +echo "\n\nDescribe Task my_task_1: $actual" + +response11=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"SHOW TASKS\"}") +state11=$(echo "$response11" | jq -r '.state') +if [ "$state10" != "Succeeded" ]; then + echo "❌ Failed" + exit 1 +fi +actual=$(echo "$response11" | jq -c '.data') +echo "\n\nSHOW TASKS: $actual" + +response12=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"SELECT * FROM system.task_history\"}") +state12=$(echo "$response12" | jq -r '.state') +if [ "$state10" != "Succeeded" ]; then + echo "❌ Failed" + exit 1 +fi +actual=$(echo "$response12" | jq -c '.data') +echo "\n\nSELECT * FROM system.task_history: $actual" + +# Drop Task +response13=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"DROP TASK my_task_1\"}") +state13=$(echo "$response13" | jq -r '.state') +if [ "$state13" != "Succeeded" ]; then + echo "❌ Failed" + exit 1 +else + echo "✅ Passed" +fi + +response14=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"EXECUTE TASK my_task_1\"}") +state14=$(echo "$response14" | jq -r '.state') +if [ "$state14" = "Succeeded" ]; then + echo "❌ Failed" + exit 1 +else + echo "✅ Passed" +fi From c15189b709e61aafbddce818eb1caf4b1fd94efc Mon Sep 17 00:00:00 2001 From: Kould Date: Wed, 16 Jul 2025 11:15:57 +0800 Subject: [PATCH 15/25] fix: TableFunctionFactory create fail --- src/query/service/src/catalogs/default/database_catalog.rs | 2 +- .../service/src/table_functions/table_function_factory.rs | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/query/service/src/catalogs/default/database_catalog.rs b/src/query/service/src/catalogs/default/database_catalog.rs index 2438bc3bc6e9c..26ac70b4cc5c6 100644 --- a/src/query/service/src/catalogs/default/database_catalog.rs +++ b/src/query/service/src/catalogs/default/database_catalog.rs @@ -138,10 +138,10 @@ impl Debug for DatabaseCatalog { impl DatabaseCatalog { #[async_backtrace::framed] pub async fn try_create_with_config(conf: InnerConfig) -> Result { + let table_function_factory = TableFunctionFactory::create(&conf); let immutable_catalog = ImmutableCatalog::try_create_with_config(Some(&conf), None)?; let mutable_catalog = MutableCatalog::try_create_with_config(conf).await?; let session_catalog = SessionCatalog::create(mutable_catalog, SessionState::default()); - let table_function_factory = TableFunctionFactory::create(); let res = DatabaseCatalog { immutable_catalog: Arc::new(immutable_catalog), mutable_catalog: Arc::new(session_catalog), diff --git a/src/query/service/src/table_functions/table_function_factory.rs b/src/query/service/src/table_functions/table_function_factory.rs index 423caad2a817f..e393bcf902298 100644 --- a/src/query/service/src/table_functions/table_function_factory.rs +++ b/src/query/service/src/table_functions/table_function_factory.rs @@ -16,7 +16,7 @@ use std::collections::HashMap; use std::sync::Arc; use databend_common_catalog::table_args::TableArgs; -use databend_common_config::GlobalConfig; +use databend_common_config::InnerConfig; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_meta_types::MetaId; @@ -99,7 +99,7 @@ pub struct TableFunctionFactory { } impl TableFunctionFactory { - pub fn create() -> Self { + pub fn create(config: &InnerConfig) -> Self { let mut id = SYS_TBL_FUNC_ID_BEGIN; let mut next_id = || -> MetaId { if id >= SYS_TBL_FUC_ID_END { @@ -296,7 +296,7 @@ impl TableFunctionFactory { ), ); - if !GlobalConfig::instance().task.on { + if !config.task.on { creators.insert( "task_dependents".to_string(), (next_id(), Arc::new(TaskDependentsTable::create)), From 739b8bef0564515a2de07a7df63f6877b545c27c Mon Sep 17 00:00:00 2001 From: Kould Date: Wed, 16 Jul 2025 13:22:41 +0800 Subject: [PATCH 16/25] ci: rename to private task test --- .../actions/{test_tasks => test_private_tasks}/action.yml | 6 +++--- .github/workflows/reuse.linux.yml | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) rename .github/actions/{test_tasks => test_private_tasks}/action.yml (89%) diff --git a/.github/actions/test_tasks/action.yml b/.github/actions/test_private_tasks/action.yml similarity index 89% rename from .github/actions/test_tasks/action.yml rename to .github/actions/test_private_tasks/action.yml index 9084b6cd1ef40..545a2435ada45 100644 --- a/.github/actions/test_tasks/action.yml +++ b/.github/actions/test_private_tasks/action.yml @@ -1,5 +1,5 @@ -name: "Test task for databend query" -description: "Test task for databend query" +name: "Test private task for databend query" +description: "Test private task for databend query" runs: using: "composite" steps: @@ -27,7 +27,7 @@ runs: aws --endpoint-url http://127.0.0.1:9900/ s3 mb s3://testbucket aws --endpoint-url http://127.0.0.1:9900/ s3 cp tests/data s3://testbucket/data --recursive --no-progress - - name: Run Task Tests + - name: Run Private Task Tests shell: bash run: | bash ./tests/task/test-private-task.sh diff --git a/.github/workflows/reuse.linux.yml b/.github/workflows/reuse.linux.yml index 5d7f97fee0282..d809f71ef7309 100644 --- a/.github/workflows/reuse.linux.yml +++ b/.github/workflows/reuse.linux.yml @@ -183,7 +183,7 @@ jobs: - uses: ./.github/actions/test_logs timeout-minutes: 20 - test_tasks: + test_private_tasks: needs: [ build, check ] runs-on: - self-hosted @@ -198,7 +198,7 @@ jobs: with: runner_provider: ${{ inputs.runner_provider }} type: ${{ inputs.license_type }} - - uses: ./.github/actions/test_tasks + - uses: ./.github/actions/test_private_tasks timeout-minutes: 20 test_meta_cluster: From 2e9d55ae0489868a0b2f44a5737c4cef5b35b928 Mon Sep 17 00:00:00 2001 From: Kould Date: Wed, 16 Jul 2025 14:44:12 +0800 Subject: [PATCH 17/25] chore: add cron test --- src/query/management/src/task/task_mgr.rs | 14 ++++++----- src/query/service/src/task/service.rs | 6 ++--- tests/task/test-private-task.sh | 30 +++++++++++++++++++++++ 3 files changed, 41 insertions(+), 9 deletions(-) diff --git a/src/query/management/src/task/task_mgr.rs b/src/query/management/src/task/task_mgr.rs index 22c8d1b915539..5982022cf9af3 100644 --- a/src/query/management/src/task/task_mgr.rs +++ b/src/query/management/src/task/task_mgr.rs @@ -274,12 +274,14 @@ impl TaskMgr { match schedule_options.schedule_type { ScheduleType::IntervalType => (), ScheduleType::CronType => { - if let Err(e) = schedule_options.time_zone.as_ref().unwrap().parse::() { - return Ok(Err(TaskError::InvalidTimezone { - tenant: self.tenant.tenant_name().to_string(), - name: task.task_name.to_string(), - reason: e.to_string(), - })); + if let Some(tz) = &schedule_options.time_zone { + if let Err(e) = tz.parse::() { + return Ok(Err(TaskError::InvalidTimezone { + tenant: self.tenant.tenant_name().to_string(), + name: task.task_name.to_string(), + reason: e.to_string(), + })); + } } if let Err(e) = Schedule::from_str(schedule_options.cron.as_ref().unwrap()) { return Ok(Err(TaskError::InvalidCron { diff --git a/src/query/service/src/task/service.rs b/src/query/service/src/task/service.rs index 479a398498ef5..400d9816ad800 100644 --- a/src/query/service/src/task/service.rs +++ b/src/query/service/src/task/service.rs @@ -290,9 +290,9 @@ impl TaskService { let tz = schedule_options .time_zone .as_ref() - .unwrap() - .parse::() - .unwrap(); + .map(|tz| tz.parse::()) + .transpose()? + .unwrap_or(Tz::UCT); let schedule = Schedule::from_str(cron_expr).unwrap(); runtime diff --git a/tests/task/test-private-task.sh b/tests/task/test-private-task.sh index 173a0f729bc48..aa3fe7582871f 100644 --- a/tests/task/test-private-task.sh +++ b/tests/task/test-private-task.sh @@ -228,3 +228,33 @@ if [ "$state14" = "Succeeded" ]; then else echo "✅ Passed" fi + +response15=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"CREATE TABLE t2 (c1 int)\"}") +create_table_query_id_1=$(echo $response15 | jq -r '.id') +echo "Create Table Query ID: $create_table_query_id_1" + +response16=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"CREATE TASK my_task_4 WAREHOUSE = 'mywh' SCHEDULE = USING CRON '*/5 * * * * ?' AS insert into t2 values(0)\"}") +create_task_4_query_id=$(echo $response16 | jq -r '.id') +echo "Create Task 4 Query ID: $create_task_4_query_id" + +sleep 1 + +response17=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"ALTER TASK my_task_4 RESUME\"}") +alter_task_4_query_id=$(echo $response17 | jq -r '.id') +echo "Resume Task 4 ID: $alter_task_4_query_id" + +sleep 11 + +response18=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"SELECT c1 FROM t2 ORDER BY c1\"}") + +actual=$(echo "$response18" | jq -c '.data') +expected='[["0"],["0"]]' + +if [ "$actual" = "$expected" ]; then + echo "✅ Query result matches expected" +else + echo "❌ Mismatch" + echo "Expected: $expected" + echo "Actual : $actual" + exit 1 +fi From a5f49314d901bc43ca92bad0396dfd81b7c959c9 Mon Sep 17 00:00:00 2001 From: Kould Date: Wed, 16 Jul 2025 17:02:45 +0800 Subject: [PATCH 18/25] feat: add system table: `system.tasks` --- .../src/databases/system/system_database.rs | 2 + .../service/src/interpreters/task/private.rs | 54 +------ src/query/storages/system/src/lib.rs | 2 + .../system/src/private_tasks_table.rs | 138 ++++++++++++++++++ tests/task/test-private-task.sh | 11 +- 5 files changed, 158 insertions(+), 49 deletions(-) create mode 100644 src/query/storages/system/src/private_tasks_table.rs diff --git a/src/query/service/src/databases/system/system_database.rs b/src/query/service/src/databases/system/system_database.rs index 36cf36da60243..52cd2b78e6319 100644 --- a/src/query/service/src/databases/system/system_database.rs +++ b/src/query/service/src/databases/system/system_database.rs @@ -50,6 +50,7 @@ use databend_common_storages_system::NotificationsTable; use databend_common_storages_system::OneTable; use databend_common_storages_system::PasswordPoliciesTable; use databend_common_storages_system::PrivateTaskHistoryTable; +use databend_common_storages_system::PrivateTasksTable; use databend_common_storages_system::ProceduresTable; use databend_common_storages_system::ProcessesTable; use databend_common_storages_system::QueryCacheTable; @@ -166,6 +167,7 @@ impl SystemDatabase { )), ]); if config.task.on { + table_list.push(PrivateTasksTable::create(sys_db_meta.next_table_id())); table_list.push(PrivateTaskHistoryTable::create(sys_db_meta.next_table_id())); } else { table_list.push(TasksTable::create(sys_db_meta.next_table_id())); diff --git a/src/query/service/src/interpreters/task/private.rs b/src/query/service/src/interpreters/task/private.rs index 72415d3b3aeff..50ed7c43da273 100644 --- a/src/query/service/src/interpreters/task/private.rs +++ b/src/query/service/src/interpreters/task/private.rs @@ -17,19 +17,18 @@ use std::sync::Arc; use chrono::Utc; use databend_common_ast::ast::TaskSql; use databend_common_catalog::table_context::TableContext; -use databend_common_cloud_control::pb; use databend_common_cloud_control::task_utils; use databend_common_exception::Result; use databend_common_management::task::TaskMgr; use databend_common_meta_app::principal::task::EMPTY_TASK_ID; use databend_common_meta_app::principal::Status; -use databend_common_meta_app::principal::Task; use databend_common_sql::plans::AlterTaskPlan; use databend_common_sql::plans::CreateTaskPlan; use databend_common_sql::plans::DescribeTaskPlan; use databend_common_sql::plans::DropTaskPlan; use databend_common_sql::plans::ExecuteTaskPlan; use databend_common_sql::plans::ShowTasksPlan; +use databend_common_storages_system::PrivateTasksTable; use databend_common_users::UserApiProvider; use crate::interpreters::task::TaskInterpreter; @@ -37,50 +36,6 @@ use crate::sessions::QueryContext; pub(crate) struct PrivateTaskInterpreter; -impl PrivateTaskInterpreter { - fn task_trans(task: Task) -> Result { - Ok(task_utils::Task { - task_id: task.task_id, - task_name: task.task_name, - query_text: task.query_text, - condition_text: task.when_condition.unwrap_or_default(), - after: task.after, - comment: task.comment, - owner: task.owner, - schedule_options: task - .schedule_options - .map(|schedule_options| { - let options = pb::ScheduleOptions { - interval: schedule_options.interval, - cron: schedule_options.cron, - time_zone: schedule_options.time_zone, - schedule_type: schedule_options.schedule_type as i32, - milliseconds_interval: schedule_options.milliseconds_interval, - }; - task_utils::format_schedule_options(&options) - }) - .transpose()?, - warehouse_options: task.warehouse_options.map(|warehouse_options| { - pb::WarehouseOptions { - warehouse: warehouse_options.warehouse, - using_warehouse_size: warehouse_options.using_warehouse_size, - } - }), - next_scheduled_at: task.next_scheduled_at, - suspend_task_after_num_failures: task.suspend_task_after_num_failures.map(|i| i as i32), - error_integration: task.error_integration, - status: match task.status { - Status::Suspended => task_utils::Status::Suspended, - Status::Started => task_utils::Status::Started, - }, - created_at: task.created_at, - updated_at: task.updated_at, - last_suspended_at: task.last_suspended_at, - session_params: task.session_params, - }) - } -} - impl TaskInterpreter for PrivateTaskInterpreter { async fn create_task(&self, ctx: &Arc, plan: &CreateTaskPlan) -> Result<()> { let plan = plan.clone(); @@ -149,7 +104,7 @@ impl TaskInterpreter for PrivateTaskInterpreter { .task_api(&plan.tenant) .describe_task(&plan.task_name) .await??; - task.map(Self::task_trans).transpose() + task.map(PrivateTasksTable::task_trans).transpose() } async fn drop_task(&self, _ctx: &Arc, plan: &DropTaskPlan) -> Result<()> { @@ -171,6 +126,9 @@ impl TaskInterpreter for PrivateTaskInterpreter { .list_task() .await?; - tasks.into_iter().map(Self::task_trans).try_collect() + tasks + .into_iter() + .map(PrivateTasksTable::task_trans) + .try_collect() } } diff --git a/src/query/storages/system/src/lib.rs b/src/query/storages/system/src/lib.rs index c5a2a5e7a365e..a4d5723f701e5 100644 --- a/src/query/storages/system/src/lib.rs +++ b/src/query/storages/system/src/lib.rs @@ -47,6 +47,7 @@ mod notifications_table; mod one_table; mod password_policies_table; mod private_task_history_table; +mod private_tasks_table; mod procedures_table; mod processes_table; mod query_cache_table; @@ -100,6 +101,7 @@ pub use notifications_table::NotificationsTable; pub use one_table::OneTable; pub use password_policies_table::PasswordPoliciesTable; pub use private_task_history_table::PrivateTaskHistoryTable; +pub use private_tasks_table::PrivateTasksTable; pub use procedures_table::ProceduresTable; pub use processes_table::ProcessesTable; pub use query_cache_table::QueryCacheTable; diff --git a/src/query/storages/system/src/private_tasks_table.rs b/src/query/storages/system/src/private_tasks_table.rs new file mode 100644 index 0000000000000..76ca8db802316 --- /dev/null +++ b/src/query/storages/system/src/private_tasks_table.rs @@ -0,0 +1,138 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use databend_common_catalog::plan::PushDownInfo; +use databend_common_catalog::table::Table; +use databend_common_catalog::table_context::TableContext; +use databend_common_cloud_control::pb; +use databend_common_cloud_control::task_utils; +use databend_common_exception::Result; +use databend_common_expression::infer_table_schema; +use databend_common_expression::DataBlock; +use databend_common_meta_app::principal::Status; +use databend_common_meta_app::principal::Task; +use databend_common_meta_app::schema::TableIdent; +use databend_common_meta_app::schema::TableInfo; +use databend_common_meta_app::schema::TableMeta; +use databend_common_sql::plans::task_schema; +use databend_common_users::UserApiProvider; +use itertools::Itertools; + +use crate::parse_tasks_to_datablock; +use crate::table::AsyncOneBlockSystemTable; +use crate::table::AsyncSystemTable; + +pub struct PrivateTasksTable { + table_info: TableInfo, +} + +#[async_trait::async_trait] +impl AsyncSystemTable for PrivateTasksTable { + const NAME: &'static str = "system.tasks"; + + fn get_table_info(&self) -> &TableInfo { + &self.table_info + } + + async fn get_full_data( + &self, + ctx: Arc, + push_downs: Option, + ) -> Result { + let tenant = ctx.get_tenant(); + + let tasks = UserApiProvider::instance() + .task_api(&tenant) + .list_task() + .await?; + let tasks_len = tasks.len(); + let trans_tasks = tasks + .into_iter() + .take( + push_downs + .as_ref() + .and_then(|v| v.limit) + .unwrap_or(tasks_len), + ) + .map(Self::task_trans) + .try_collect()?; + + parse_tasks_to_datablock(trans_tasks) + } +} + +impl PrivateTasksTable { + pub fn create(table_id: u64) -> Arc { + let schema = infer_table_schema(&task_schema()).expect("failed to parse task table schema"); + + let table_info = TableInfo { + desc: "'system'.'tasks'".to_string(), + name: "tasks".to_string(), + ident: TableIdent::new(table_id, 0), + meta: TableMeta { + schema, + engine: "SystemTasks".to_string(), + + ..Default::default() + }, + ..Default::default() + }; + + AsyncOneBlockSystemTable::create(Self { table_info }) + } + + pub fn task_trans(task: Task) -> Result { + Ok(task_utils::Task { + task_id: task.task_id, + task_name: task.task_name, + query_text: task.query_text, + condition_text: task.when_condition.unwrap_or_default(), + after: task.after, + comment: task.comment, + owner: task.owner, + schedule_options: task + .schedule_options + .map(|schedule_options| { + let options = pb::ScheduleOptions { + interval: schedule_options.interval, + cron: schedule_options.cron, + time_zone: schedule_options.time_zone, + schedule_type: schedule_options.schedule_type as i32, + milliseconds_interval: schedule_options.milliseconds_interval, + }; + task_utils::format_schedule_options(&options) + }) + .transpose()?, + warehouse_options: task.warehouse_options.map(|warehouse_options| { + pb::WarehouseOptions { + warehouse: warehouse_options.warehouse, + using_warehouse_size: warehouse_options.using_warehouse_size, + } + }), + next_scheduled_at: task.next_scheduled_at, + suspend_task_after_num_failures: task.suspend_task_after_num_failures.map(|i| i as i32), + error_integration: task.error_integration, + status: match task.status { + Status::Suspended => task_utils::Status::Suspended, + Status::Started => task_utils::Status::Started, + }, + created_at: task.created_at, + updated_at: task.updated_at, + last_suspended_at: task.last_suspended_at, + session_params: task.session_params, + }) + } +} diff --git a/tests/task/test-private-task.sh b/tests/task/test-private-task.sh index aa3fe7582871f..c68000da9b324 100644 --- a/tests/task/test-private-task.sh +++ b/tests/task/test-private-task.sh @@ -203,7 +203,7 @@ echo "\n\nSHOW TASKS: $actual" response12=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"SELECT * FROM system.task_history\"}") state12=$(echo "$response12" | jq -r '.state') -if [ "$state10" != "Succeeded" ]; then +if [ "$state12" != "Succeeded" ]; then echo "❌ Failed" exit 1 fi @@ -258,3 +258,12 @@ else echo "Actual : $actual" exit 1 fi + +response19=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"SELECT * FROM system.tasks\"}") +state19=$(echo "$response19" | jq -r '.state') +if [ "$state19" != "Succeeded" ]; then + echo "❌ Failed" + exit 1 +fi +actual=$(echo "$response19" | jq -c '.data') +echo "\n\nSELECT * FROM system.tasks: $actual" From 863f1209620cdfe97b330a91d7ce54495219e78f Mon Sep 17 00:00:00 2001 From: Kould Date: Wed, 16 Jul 2025 17:20:52 +0800 Subject: [PATCH 19/25] chore: fix `update_or_create_task_run` correct on `ScheduleTask` --- src/meta/app/src/principal/task.rs | 9 +++++++ src/query/service/src/task/service.rs | 35 +++++++++++++++------------ 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/src/meta/app/src/principal/task.rs b/src/meta/app/src/principal/task.rs index 08109fccfd4b0..e840f6951c599 100644 --- a/src/meta/app/src/principal/task.rs +++ b/src/meta/app/src/principal/task.rs @@ -136,6 +136,15 @@ impl TaskMessage { 0 } + /// Returns the inclusive range of key prefixes used by `TaskMessage`. + /// + /// This range can be used to scan all keys generated by `TaskMessage::key()` + /// and related methods (e.g., `schedule_key`). The prefix `0` is prepended + /// to all task-related keys to group them under the same prefix range, + /// enabling efficient key scanning or iteration. + /// + /// The returned range is (0, 1), which includes all keys starting with `0-` + /// (as produced by `TaskMessage::prefix()`), and excludes any other unrelated prefixes. pub fn prefix_range() -> (i64, i64) { (0, 1) } diff --git a/src/query/service/src/task/service.rs b/src/query/service/src/task/service.rs index 400d9816ad800..b0c3c1551ed01 100644 --- a/src/query/service/src/task/service.rs +++ b/src/query/service/src/task/service.rs @@ -234,21 +234,24 @@ impl TaskService { .await }; - task_service - .update_or_create_task_run(&TaskRun { - task: task.clone(), - run_id: Self::make_run_id(), - attempt_number: task.suspend_task_after_num_failures.unwrap_or(0) - as i32, - state: State::Scheduled, - scheduled_at: Utc::now(), - completed_at: None, - error_code: 0, - error_message: None, - root_task_id: EMPTY_TASK_ID, - }) - .await?; - + let fn_new_task_run = async |task_service: &TaskService, task: &Task| { + task_service + .update_or_create_task_run(&TaskRun { + task: task.clone(), + run_id: Self::make_run_id(), + attempt_number: task + .suspend_task_after_num_failures + .unwrap_or(0) + as i32, + state: State::Scheduled, + scheduled_at: Utc::now(), + completed_at: None, + error_code: 0, + error_message: None, + root_task_id: EMPTY_TASK_ID, + }) + .await + }; match schedule_options.schedule_type { ScheduleType::IntervalType => { let task_mgr = task_mgr.clone(); @@ -269,6 +272,7 @@ impl TaskService { let Some(_guard) = fn_lock(&task_service, &task_key, duration.as_millis() as u64).await? else { continue; }; + fn_new_task_run(&task_service, &task).await?; task_mgr.send(TaskMessage::ExecuteTask(task.clone())).await?; } _ = child_token.cancelled() => { @@ -312,6 +316,7 @@ impl TaskService { let Some(_guard) = fn_lock(&task_service, &task_key, duration.as_millis() as u64).await? else { continue; }; + fn_new_task_run(&task_service, &task).await?; task_mgr.send(TaskMessage::ExecuteTask(task.clone())).await?; } _ = child_token.cancelled() => { From 547aed3ba39d76874bb901fca1f6e779c58020ef Mon Sep 17 00:00:00 2001 From: Kould Date: Thu, 17 Jul 2025 14:54:48 +0800 Subject: [PATCH 20/25] chore: add Task when test on `test-private-task.sh` --- src/query/service/src/task/service.rs | 94 +++++++++++++--------- tests/task/test-private-task.sh | 111 +++++++++++++++++++++++++- 2 files changed, 167 insertions(+), 38 deletions(-) diff --git a/src/query/service/src/task/service.rs b/src/query/service/src/task/service.rs index b0c3c1551ed01..a34015d142a14 100644 --- a/src/query/service/src/task/service.rs +++ b/src/query/service/src/task/service.rs @@ -223,6 +223,7 @@ impl TaskService { let task_name = task.task_name.to_string(); let task_name_clone = task_name.clone(); let task_service = TaskService::instance(); + let owner = Self::get_task_owner(&task, &tenant).await?; let fn_lock = async |task_service: &TaskService, @@ -272,6 +273,9 @@ impl TaskService { let Some(_guard) = fn_lock(&task_service, &task_key, duration.as_millis() as u64).await? else { continue; }; + if !Self::check_when(&task, &owner, &task_service).await.unwrap() { + continue; + } fn_new_task_run(&task_service, &task).await?; task_mgr.send(TaskMessage::ExecuteTask(task.clone())).await?; } @@ -311,11 +315,15 @@ impl TaskService { .unwrap_or(Duration::ZERO); task.next_scheduled_at = Some(Utc::now() + duration); + task_mgr.update_task(task.clone()).await??; tokio::select! { _ = sleep(duration) => { let Some(_guard) = fn_lock(&task_service, &task_key, duration.as_millis() as u64).await? else { continue; }; + if !Self::check_when(&task, &owner, &task_service).await? { + continue; + } fn_new_task_run(&task_service, &task).await?; task_mgr.send(TaskMessage::ExecuteTask(task.clone())).await?; } @@ -389,9 +397,20 @@ impl TaskService { .ok_or_else(|| { ErrorCode::UnknownTask(next_task) })?; - task_mgr - .send(TaskMessage::ExecuteTask(next_task)) - .await?; + let next_owner = + Self::get_task_owner(&next_task, &tenant) + .await?; + if Self::check_when( + &next_task, + &next_owner, + &task_service, + ) + .await? + { + task_mgr + .send(TaskMessage::ExecuteTask(next_task)) + .await?; + } } break; } @@ -422,12 +441,12 @@ impl TaskService { )?; } TaskMessage::DeleteTask(task_name) => { - if task_mgr.accept(&task_key).await? { - self.clean_task_afters(&task_name).await?; - } if let Some(token) = scheduled_tasks.remove(&task_name) { token.cancel(); } + if task_mgr.accept(&task_key).await? { + self.clean_task_afters(&task_name).await?; + } task_mgr .accept(&TaskMessageIdent::new( tenant, @@ -495,35 +514,6 @@ impl TaskService { async fn spawn_task(task: Task, user: UserInfo) -> Result<()> { let task_service = TaskService::instance(); - if let Some(when_condition) = &task.when_condition { - let result = task_service - .execute_sql(Some(user.clone()), &format!("SELECT {when_condition}")) - .await?; - let is_met = result - .first() - .and_then(|block| block.get_by_offset(0).index(0)) - .and_then(|scalar| { - scalar - .as_boolean() - .cloned() - .map(Ok) - .or_else(|| scalar.as_string().map(|str| str.trim().parse::())) - }) - .transpose() - .map_err(|err| { - ErrorCode::TaskWhenConditionNotMet(format!( - "when condition error for task: {}, {}", - task.task_name, err - )) - })? - .unwrap_or(false); - if !is_met { - return Err(ErrorCode::TaskWhenConditionNotMet(format!( - "when condition not met for task: {}", - task.task_name - ))); - } - } task_service .execute_sql(Some(user), &task.query_text) .await?; @@ -531,6 +521,38 @@ impl TaskService { Ok(()) } + async fn check_when( + task: &Task, + user: &UserInfo, + task_service: &Arc, + ) -> Result { + let Some(when_condition) = &task.when_condition else { + return Ok(true); + }; + let result = task_service + .execute_sql(Some(user.clone()), &format!("SELECT {when_condition}")) + .await + .unwrap(); + Ok(result + .first() + .and_then(|block| block.get_by_offset(0).index(0)) + .and_then(|scalar| { + scalar + .as_boolean() + .cloned() + .map(Ok) + .or_else(|| scalar.as_string().map(|str| str.trim().parse::())) + }) + .transpose() + .map_err(|err| { + ErrorCode::TaskWhenConditionNotMet(format!( + "when condition error for task: {}, {}", + task.task_name, err + )) + })? + .unwrap_or(false)) + } + pub async fn create_context(&self, other_user: Option) -> Result> { let (user, role) = if let Some(other_user) = other_user { (other_user, None) @@ -792,7 +814,7 @@ WHERE ta.task_name = '{task_name}' task.query_text.replace('\'', "''"), task.when_condition .as_ref() - .map(|s| format!("'{s}'").replace('\'', "''")) + .map(|s| format!("'{}'", s.replace('\'', "''"))) .unwrap_or_else(|| "null".to_string()), if !task.after.is_empty() { format!("'{}'", task.after.join(", ")) diff --git a/tests/task/test-private-task.sh b/tests/task/test-private-task.sh index c68000da9b324..7f446fe998f84 100644 --- a/tests/task/test-private-task.sh +++ b/tests/task/test-private-task.sh @@ -132,7 +132,7 @@ response8=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content alter_task_2_query_id=$(echo $response8 | jq -r '.id') echo "Resume Task 2 ID: $alter_task_2_query_id" -sleep 10 +sleep 8 response9=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"SELECT c1 FROM t1 ORDER BY c1\"}") @@ -166,7 +166,7 @@ python3 scripts/ci/wait_tcp.py --timeout 30 --port 9092 echo "Started 2-node cluster with private task enabled..." -sleep 7 +sleep 9 response9=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"SELECT c1 FROM t1 ORDER BY c1\"}") @@ -267,3 +267,110 @@ if [ "$state19" != "Succeeded" ]; then fi actual=$(echo "$response19" | jq -c '.data') echo "\n\nSELECT * FROM system.tasks: $actual" + +# Test Task When on After & Schedule & Execute +response20=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"CREATE TABLE t3 (c1 int, c2 int)\"}") +create_table_query_id_2=$(echo $response20 | jq -r '.id') +echo "Create Table Query ID: $create_table_query_id_2" + +response21=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"CREATE TASK my_task_5 WAREHOUSE = 'mywh' SCHEDULE = 3 SECOND WHEN EXISTS (SELECT 1 FROM t3 WHERE c2 = 1) AS insert into t3 values(1, 0)\"}") +create_task_5_query_id=$(echo $response21 | jq -r '.id') +echo "Create Task 5 Query ID: $create_task_5_query_id" + +response22=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"CREATE TASK my_task_6 WAREHOUSE = 'mywh' SCHEDULE = 5 SECOND WHEN EXISTS (SELECT 1 FROM t3 WHERE c2 = 1) AS insert into t3 values(2, 0)\"}") +create_task_6_query_id=$(echo $response22 | jq -r '.id') +echo "Create Task 6 Query ID: $create_task_6_query_id" + +response23=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"CREATE TASK my_task_7 WAREHOUSE = 'mywh' AFTER 'my_task_5', 'my_task_6' WHEN EXISTS (SELECT 1 FROM t3 WHERE c2 = 2) AS insert into t3 values(3, 0)\"}") +create_task_7_query_id=$(echo $response23 | jq -r '.id') +echo "Create Task 7 Query ID: $create_task_7_query_id" + +sleep 1 + +response24=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"ALTER TASK my_task_5 RESUME\"}") +alter_task_5_query_id=$(echo $response24 | jq -r '.id') +echo "Resume Task 5 ID: $alter_task_5_query_id" + +response25=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"ALTER TASK my_task_6 RESUME\"}") +alter_task_6_query_id=$(echo $response25 | jq -r '.id') +echo "Resume Task 6 ID: $alter_task_6_query_id" + +response26=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"ALTER TASK my_task_7 RESUME\"}") +alter_task_7_query_id=$(echo $response26 | jq -r '.id') +echo "Resume Task 7 ID: $alter_task_7_query_id" + +sleep 6 + +response27=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"SELECT c1 FROM t3 ORDER BY c1\"}") + +actual=$(echo "$response27" | jq -c '.data') +expected='[]' + +if [ "$actual" = "$expected" ]; then + echo "✅ Query result matches expected" +else + echo "❌ Mismatch" + echo "Expected: $expected" + echo "Actual : $actual" + exit 1 +fi + +response28=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"INSERT INTO t3 VALUES (1, 1)\"}") +insert_t3_query_id=$(echo $response28 | jq -r '.id') +echo "INSERT T3 (1, 1) ID: $insert_t3_query_id" + +sleep 5 + +response29=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"SELECT c1, c2 FROM t3 ORDER BY c1, c2\"}") + +actual=$(echo "$response29" | jq -c '.data') +expected='[["1","0"],["1","1"],["2","0"]]' + +if [ "$actual" = "$expected" ]; then + echo "✅ Query result matches expected" +else + echo "❌ Mismatch" + echo "Expected: $expected" + echo "Actual : $actual" + exit 1 +fi + +response30=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"EXECUTE TASK my_task_7\"}") +execute_task_7_query_id=$(echo $response30 | jq -r '.id') +echo "Execute Task 7 ID: $execute_task_7_query_id" + +sleep 1 + +response31=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"SELECT c1, c2 FROM t3 ORDER BY c1, c2\"}") + +actual=$(echo "$response31" | jq -c '.data') +expected='[["1","0"],["1","1"],["2","0"],["3","0"]]' + +if [ "$actual" = "$expected" ]; then + echo "✅ Query result matches expected" +else + echo "❌ Mismatch" + echo "Expected: $expected" + echo "Actual : $actual" + exit 1 +fi + +response32=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"INSERT INTO t3 VALUES (2, 2)\"}") +insert_t3_query_id_1=$(echo $response32 | jq -r '.id') +echo "INSERT T3 (2, 2) ID: $insert_t3_query_id_1" + +sleep 6 + +response33=$(curl -s -u root: -XPOST "http://localhost:8000/v1/query" -H 'Content-Type: application/json' -d "{\"sql\": \"SELECT c1, c2 FROM t3 ORDER BY c1, c2\"}") + +actual=$(echo "$response33" | jq -c '.data') +expected='[["1","0"],["1","0"],["1","0"],["1","1"],["2","0"],["2","0"],["2","2"],["3","0"],["3","0"],["3","0"]]' + +if [ "$actual" = "$expected" ]; then + echo "✅ Query result matches expected" +else + echo "❌ Mismatch" + echo "Expected: $expected" + echo "Actual : $actual" + exit 1 +fi From fc5fe535c5d6fcf031a3b60a653fddd5f46ca628 Mon Sep 17 00:00:00 2001 From: Kould Date: Thu, 17 Jul 2025 15:12:58 +0800 Subject: [PATCH 21/25] chore: add private task config check on `GlobalServices::init_with` --- src/query/service/src/global_services.rs | 5 +++++ tests/task/test-private-task.sh | 1 + 2 files changed, 6 insertions(+) diff --git a/src/query/service/src/global_services.rs b/src/query/service/src/global_services.rs index 8243b630aa357..195a29a40974d 100644 --- a/src/query/service/src/global_services.rs +++ b/src/query/service/src/global_services.rs @@ -176,6 +176,11 @@ impl GlobalServices { GlobalHistoryLog::init(config).await?; } if config.task.on { + if config.query.cloud_control_grpc_server_address.is_some() { + return Err(ErrorCode::InvalidConfig( + "Private Task is enabled but `cloud_control_grpc_server_address` is not empty", + )); + } TaskService::init(config).await?; } diff --git a/tests/task/test-private-task.sh b/tests/task/test-private-task.sh index 7f446fe998f84..23e5fd13e5981 100644 --- a/tests/task/test-private-task.sh +++ b/tests/task/test-private-task.sh @@ -19,6 +19,7 @@ for node in 1 2; do echo "Appending history table config to node-${node}" cat ./tests/task/private_task.toml >> "$CONFIG_FILE" + sed -i '/^cloud_control_grpc_server_address/d' $CONFIG_FILE done # Start meta cluster (3 nodes - needed for HA) From 22ea27f426fbf1e37ec7baf5171b247071bbf11d Mon Sep 17 00:00:00 2001 From: Kould Date: Thu, 17 Jul 2025 15:19:34 +0800 Subject: [PATCH 22/25] chore: update Task version on Meta --- src/meta/proto-conv/tests/it/main.rs | 2 +- .../tests/it/{v135_add_task.rs => v136_add_task.rs} | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) rename src/meta/proto-conv/tests/it/{v135_add_task.rs => v136_add_task.rs} (93%) diff --git a/src/meta/proto-conv/tests/it/main.rs b/src/meta/proto-conv/tests/it/main.rs index 4405a09b49683..8d227caabe162 100644 --- a/src/meta/proto-conv/tests/it/main.rs +++ b/src/meta/proto-conv/tests/it/main.rs @@ -127,4 +127,4 @@ mod v132_remove_sequence_meta_start; mod v133_stage_file_compression; mod v134_add_sequence_meta_storage_version; mod v135_udf_immutable; -mod v135_add_task; +mod v136_add_task; diff --git a/src/meta/proto-conv/tests/it/v135_add_task.rs b/src/meta/proto-conv/tests/it/v136_add_task.rs similarity index 93% rename from src/meta/proto-conv/tests/it/v135_add_task.rs rename to src/meta/proto-conv/tests/it/v136_add_task.rs index df8f7730e58ca..f45479ffc957a 100644 --- a/src/meta/proto-conv/tests/it/v135_add_task.rs +++ b/src/meta/proto-conv/tests/it/v136_add_task.rs @@ -23,8 +23,8 @@ use maplit::btreemap; use crate::common; #[test] -fn test_decode_v135_add_task() -> anyhow::Result<()> { - let sequence_meta_v134 = vec![ +fn test_decode_v136_add_task() -> anyhow::Result<()> { + let task_v136 = vec![ 8, 11, 18, 6, 116, 97, 115, 107, 95, 99, 34, 16, 83, 69, 76, 69, 67, 84, 32, 42, 32, 70, 82, 79, 77, 32, 116, 49, 42, 7, 99, 111, 109, 109, 101, 110, 116, 50, 6, 112, 117, 98, 108, 105, 99, 58, 22, 8, 11, 18, 11, 51, 48, 32, 49, 50, 32, 42, 32, 42, 32, 42, 26, 3, 85, 84, @@ -35,7 +35,7 @@ fn test_decode_v135_add_task() -> anyhow::Result<()> { 48, 58, 48, 48, 58, 49, 50, 32, 85, 84, 67, 130, 1, 23, 49, 57, 55, 48, 45, 48, 49, 45, 48, 49, 32, 48, 48, 58, 48, 48, 58, 49, 51, 32, 85, 84, 67, 138, 1, 6, 116, 97, 115, 107, 95, 97, 138, 1, 6, 116, 97, 115, 107, 95, 98, 146, 1, 6, 99, 49, 32, 62, 32, 49, 154, 1, 6, 10, - 1, 97, 18, 1, 98, 170, 1, 2, 109, 101, 160, 6, 135, 1, 168, 6, 24, + 1, 97, 18, 1, 98, 170, 1, 2, 109, 101, 160, 6, 136, 1, 168, 6, 24, ]; let want = || mt::Task { @@ -68,7 +68,7 @@ fn test_decode_v135_add_task() -> anyhow::Result<()> { session_params: btreemap! { s("a") => s("b") }, }; common::test_pb_from_to(func_name!(), want())?; - common::test_load_old(func_name!(), sequence_meta_v134.as_slice(), 135, want())?; + common::test_load_old(func_name!(), task_v136.as_slice(), 136, want())?; Ok(()) } From 578694f794736396aed950f2c1c649aff0988e1b Mon Sep 17 00:00:00 2001 From: Kould Date: Thu, 17 Jul 2025 15:43:59 +0800 Subject: [PATCH 23/25] chore: codefmt --- src/query/service/src/task/service.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/query/service/src/task/service.rs b/src/query/service/src/task/service.rs index a34015d142a14..508cdbc8cb447 100644 --- a/src/query/service/src/task/service.rs +++ b/src/query/service/src/task/service.rs @@ -223,7 +223,7 @@ impl TaskService { let task_name = task.task_name.to_string(); let task_name_clone = task_name.clone(); let task_service = TaskService::instance(); - let owner = Self::get_task_owner(&task, &tenant).await?; + let owner = Self::get_task_owner(&task, tenant).await?; let fn_lock = async |task_service: &TaskService, From 98addfde119eb83154bb6a8580c0ac867be652d4 Mon Sep 17 00:00:00 2001 From: Kould Date: Fri, 18 Jul 2025 00:36:44 +0800 Subject: [PATCH 24/25] chore: remove `TaskMgr::list_task_fallback` --- src/query/management/src/task/task_mgr.rs | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/src/query/management/src/task/task_mgr.rs b/src/query/management/src/task/task_mgr.rs index 5982022cf9af3..abb4e3b8ec5b7 100644 --- a/src/query/management/src/task/task_mgr.rs +++ b/src/query/management/src/task/task_mgr.rs @@ -219,25 +219,7 @@ impl TaskMgr { let key = DirName::new(TaskIdent::new(&self.tenant, "")); let strm = self.kv_api.list_pb_values(&key).await?; - match strm.try_collect().await { - Ok(tasks) => Ok(tasks), - Err(_) => self.list_task_fallback().await, - } - } - - #[async_backtrace::framed] - #[fastrace::trace] - pub async fn list_task_fallback(&self) -> Result, MetaError> { - let key = TaskIdent::new(&self.tenant, "dummy"); - let dir = DirName::new(key); - let tasks = self - .kv_api - .list_pb_values(&dir) - .await? - .try_collect::>() - .await?; - - Ok(tasks) + strm.try_collect().await } #[async_backtrace::framed] From 6848da557c0ff6e933eb5132dc9c0029987b1a26 Mon Sep 17 00:00:00 2001 From: Kould Date: Sat, 19 Jul 2025 18:01:54 +0800 Subject: [PATCH 25/25] chore: codefmt --- src/meta/app/src/principal/task.rs | 46 +++++++++++++++++---------- src/query/service/src/task/service.rs | 3 +- 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/src/meta/app/src/principal/task.rs b/src/meta/app/src/principal/task.rs index e840f6951c599..f30e921bd2408 100644 --- a/src/meta/app/src/principal/task.rs +++ b/src/meta/app/src/principal/task.rs @@ -94,17 +94,26 @@ pub struct TaskRun { pub root_task_id: u64, } -impl TaskRun { - pub fn key(&self) -> String { - format!("{}@{}", self.task.task_name, self.run_id) - } +#[derive(Debug, Clone, PartialEq)] +pub enum TaskMessageType { + Execute, + Schedule, + Delete, + After, } #[derive(Debug, Clone, PartialEq)] pub enum TaskMessage { + // Execute Task immediately. If an error occurs, if it is a SQL error in the task, + // it will be recorded in the error message of the task run. ExecuteTask(Task), + // Schedule Task will try to spawn a thread in Query to continue running according to the time set in schedule ScheduleTask(Task), + // Delete the task information and try to cancel the scheduled task in the query. DeleteTask(String), + // After Task will bind Task to the tasks in Task.afters. + // When Execute Task is executed, after all the after tasks of Task are completed, + // the execution will continue. AfterTask(Task), } @@ -118,22 +127,27 @@ impl TaskMessage { } } - pub fn key(&self) -> String { - let ty = match self { - TaskMessage::ExecuteTask(_) => 0, - TaskMessage::ScheduleTask(_) => 1, - TaskMessage::DeleteTask(_) => 2, - TaskMessage::AfterTask(_) => 3, - }; - format!("{}-{}-{}", TaskMessage::prefix(), ty, self.task_name()) + pub fn ty(&self) -> TaskMessageType { + match self { + TaskMessage::ExecuteTask(_) => TaskMessageType::Execute, + TaskMessage::ScheduleTask(_) => TaskMessageType::Schedule, + TaskMessage::DeleteTask(_) => TaskMessageType::Delete, + TaskMessage::AfterTask(_) => TaskMessageType::After, + } } - pub fn schedule_key(task_name: &str) -> String { - format!("{}-1-{task_name}", TaskMessage::prefix()) + pub fn key(&self) -> String { + Self::key_with_type(self.ty(), self.task_name()) } - pub fn prefix() -> i64 { - 0 + pub fn key_with_type(ty: TaskMessageType, task_name: &str) -> String { + let ty_num = match ty { + TaskMessageType::Execute => 0, + TaskMessageType::Schedule => 1, + TaskMessageType::Delete => 2, + TaskMessageType::After => 3, + }; + format!("{}-{}-{}", TaskMessage::prefix_range().0, ty_num, task_name) } /// Returns the inclusive range of key prefixes used by `TaskMessage`. diff --git a/src/query/service/src/task/service.rs b/src/query/service/src/task/service.rs index 508cdbc8cb447..fd69e554e256c 100644 --- a/src/query/service/src/task/service.rs +++ b/src/query/service/src/task/service.rs @@ -37,6 +37,7 @@ use databend_common_exception::Result; use databend_common_expression::DataBlock; use databend_common_meta_api::kv_pb_api::decode_seqv; use databend_common_meta_app::principal::task::TaskMessage; +use databend_common_meta_app::principal::task::TaskMessageType; use databend_common_meta_app::principal::task::EMPTY_TASK_ID; use databend_common_meta_app::principal::task_message_ident::TaskMessageIdent; use databend_common_meta_app::principal::ScheduleOptions; @@ -450,7 +451,7 @@ impl TaskService { task_mgr .accept(&TaskMessageIdent::new( tenant, - TaskMessage::schedule_key(&task_name), + TaskMessage::key_with_type(TaskMessageType::Schedule, &task_name), )) .await?; }