diff --git a/components/cluster/command/template.go b/components/cluster/command/template.go index c7b9abd9c9..e51acfcb93 100644 --- a/components/cluster/command/template.go +++ b/components/cluster/command/template.go @@ -35,6 +35,7 @@ type TemplateOptions struct { type LocalTemplate struct { GlobalUser string // global.user in yaml template GlobalGroup string // global.group in yaml template + GlobalSystemdMode string // global.systemd_mode in yaml template GlobalSSHPort int // global.ssh_port in yaml template GlobalDeployDir string // global.deploy_dir in yaml template GlobalDataDir string // global.data_dir in yaml template @@ -122,6 +123,7 @@ func newTemplateCmd() *cobra.Command { // template values for rendering cmd.Flags().StringVar(&localOpt.GlobalUser, "user", "tidb", "The user who runs the tidb cluster.") cmd.Flags().StringVar(&localOpt.GlobalGroup, "group", "", "group is used to specify the group name the user belong to if it's not the same as user.") + cmd.Flags().StringVar(&localOpt.GlobalSystemdMode, "systemd_mode", "system", "systemd_mode is used to select whether to use sudo permissions.") cmd.Flags().IntVar(&localOpt.GlobalSSHPort, "ssh-port", 22, "SSH port of servers in the managed cluster.") cmd.Flags().StringVar(&localOpt.GlobalDeployDir, "deploy-dir", "/tidb-deploy", "Storage directory for cluster deployment files, startup scripts, and configuration files.") cmd.Flags().StringVar(&localOpt.GlobalDataDir, "data-dir", "/tidb-data", "TiDB Cluster data storage directory.") diff --git a/components/dm/command/template.go b/components/dm/command/template.go index 96450e3fa0..abbc6ef5b1 100644 --- a/components/dm/command/template.go +++ b/components/dm/command/template.go @@ -33,6 +33,7 @@ type TemplateOptions struct { type LocalTemplate struct { GlobalUser string // global.user in yaml template GlobalGroup string // global.group in yaml template + GlobalSystemdMode string // global.systemd_mode in yaml template GlobalSSHPort int // global.ssh_port in yaml template GlobalDeployDir string // global.deploy_dir in yaml template GlobalDataDir string // global.data_dir in yaml template @@ -113,6 +114,7 @@ func newTemplateCmd() *cobra.Command { // template values for rendering cmd.Flags().StringVar(&localOpt.GlobalUser, "user", "tidb", "The user who runs the tidb cluster.") cmd.Flags().StringVar(&localOpt.GlobalGroup, "group", "", "group is used to specify the group name the user belong to if it's not the same as user.") + cmd.Flags().StringVar(&localOpt.GlobalSystemdMode, "systemd_mode", "system", "systemd_mode is used to select whether to use sudo permissions.") cmd.Flags().IntVar(&localOpt.GlobalSSHPort, "ssh-port", 22, "SSH port of servers in the managed cluster.") cmd.Flags().StringVar(&localOpt.GlobalDeployDir, "deploy-dir", "/tidb-deploy", "Storage directory for cluster deployment files, startup scripts, and configuration files.") cmd.Flags().StringVar(&localOpt.GlobalDataDir, "data-dir", "/tidb-data", "TiDB Cluster data storage directory.") diff --git a/embed/examples/cluster/local.tpl b/embed/examples/cluster/local.tpl index a5b0679551..f6f017fbb4 100644 --- a/embed/examples/cluster/local.tpl +++ b/embed/examples/cluster/local.tpl @@ -10,6 +10,10 @@ global: # group is used to specify the group name the user belong to if it's not the same as user. group: "{{ .GlobalGroup }}" {{- end }} + {{- if .GlobalSystemdMode }} + # # systemd_mode is used to select whether to use sudo permissions. + systemd_mode: "{{ .GlobalSystemdMode }}" + {{- end }} # # SSH port of servers in the managed cluster. ssh_port: {{ .GlobalSSHPort }} # # Storage directory for cluster deployment files, startup scripts, and configuration files. @@ -62,4 +66,4 @@ alertmanager_servers: {{- range .AlertManagerServers }} - host: {{ . }} {{- end }} -{{ end }} \ No newline at end of file +{{ end }} diff --git a/embed/examples/cluster/minimal.yaml b/embed/examples/cluster/minimal.yaml index 7d469a3758..521ec68b2f 100644 --- a/embed/examples/cluster/minimal.yaml +++ b/embed/examples/cluster/minimal.yaml @@ -5,6 +5,8 @@ global: user: "tidb" # # group is used to specify the group name the user belong to if it's not the same as user. # group: "tidb" + # # systemd_mode is used to select whether to use sudo permissions. When its value is set to user, there is no need to add global.user to sudoers. The default value is system. + # systemd_mode: "system" # # SSH port of servers in the managed cluster. ssh_port: 22 # # Storage directory for cluster deployment files, startup scripts, and configuration files. diff --git a/embed/examples/cluster/multi-dc.yaml b/embed/examples/cluster/multi-dc.yaml index 3fcb2bb7af..03e2b0f021 100644 --- a/embed/examples/cluster/multi-dc.yaml +++ b/embed/examples/cluster/multi-dc.yaml @@ -6,6 +6,8 @@ global: # # group is used to specify the group name the user belong to,if it's not the same as user. # group: "tidb" # # SSH port of servers in the managed cluster. + # # systemd_mode is used to select whether to use sudo permissions. When its value is set to user, there is no need to add global.user to sudoers. The default value is system. + # systemd_mode: "system" ssh_port: 22 # # Storage directory for cluster deployment files, startup scripts, and configuration files. deploy_dir: "/tidb-deploy" diff --git a/embed/examples/cluster/topology.example.yaml b/embed/examples/cluster/topology.example.yaml index 72998cc847..0af84a658e 100644 --- a/embed/examples/cluster/topology.example.yaml +++ b/embed/examples/cluster/topology.example.yaml @@ -6,6 +6,8 @@ global: # # group is used to specify the group name the user belong to,if it's not the same as user. # group: "tidb" # # SSH port of servers in the managed cluster. + # # systemd_mode is used to select whether to use sudo permissions. When its value is set to user, there is no need to add global.user to sudoers. The default value is system. + # systemd_mode: "system" ssh_port: 22 # # Storage directory for cluster deployment files, startup scripts, and configuration files. deploy_dir: "/tidb-deploy" diff --git a/embed/examples/dm/local.tpl b/embed/examples/dm/local.tpl index 8a3f5bf00d..9cfa78b60f 100644 --- a/embed/examples/dm/local.tpl +++ b/embed/examples/dm/local.tpl @@ -11,6 +11,10 @@ global: # group is used to specify the group name the user belong to if it's not the same as user. group: "{{ .GlobalGroup }}" {{- end }} + {{- if .GlobalSystemdMode }} + # # systemd_mode is used to select whether to use sudo permissions. + systemd_mode: "{{ .GlobalSystemdMode }}" + {{- end }} # # SSH port of servers in the managed cluster. ssh_port: {{ .GlobalSSHPort }} # # Storage directory for cluster deployment files, startup scripts, and configuration files. @@ -51,4 +55,4 @@ alertmanager_servers: {{- range .AlertManagerServers }} - host: {{ . }} {{- end }} -{{ end }} \ No newline at end of file +{{ end }} diff --git a/embed/examples/dm/minimal.yaml b/embed/examples/dm/minimal.yaml index ab8300b10e..acfdf84538 100644 --- a/embed/examples/dm/minimal.yaml +++ b/embed/examples/dm/minimal.yaml @@ -6,6 +6,7 @@ --- global: user: "tidb" + # systemd_mode: "system" ssh_port: 22 deploy_dir: "/home/tidb/dm/deploy" data_dir: "/home/tidb/dm/data" diff --git a/embed/examples/dm/topology.example.yaml b/embed/examples/dm/topology.example.yaml index b6faa986a5..24e7b20eec 100644 --- a/embed/examples/dm/topology.example.yaml +++ b/embed/examples/dm/topology.example.yaml @@ -3,6 +3,7 @@ # them if the specific deployment value missing. global: user: "tidb" + # systemd_mode: "system" ssh_port: 22 deploy_dir: "/dm-deploy" data_dir: "/dm-data" diff --git a/embed/templates/systemd/system.service.tpl b/embed/templates/systemd/system.service.tpl index 3d9966ab13..eea588f017 100644 --- a/embed/templates/systemd/system.service.tpl +++ b/embed/templates/systemd/system.service.tpl @@ -24,7 +24,9 @@ LimitSTACK=10485760 {{- if .GrantCapNetRaw}} AmbientCapabilities=CAP_NET_RAW {{- end}} +{{- if eq .SystemdMode "system"}} User={{.User}} +{{- end}} ExecStart=/bin/bash -c '{{.DeployDir}}/scripts/run_{{.ServiceName}}.sh' {{- if eq .ServiceName "prometheus"}} ExecReload=/bin/bash -c 'kill -HUP $MAINPID $(pidof {{.DeployDir}}/bin/ng-monitoring-server)' diff --git a/pkg/cluster/ctxt/context.go b/pkg/cluster/ctxt/context.go index dce2be3337..84481b97b8 100644 --- a/pkg/cluster/ctxt/context.go +++ b/pkg/cluster/ctxt/context.go @@ -37,9 +37,9 @@ const ( type ( // Executor is the executor interface for TiUP, all tasks will in the end - // be passed to a executor and then be actually performed. + // be passed to an executor and then be actually performed. Executor interface { - // Execute run the command, then return it's stdout and stderr + // Execute run the command, then return its stdout and stderr // NOTE: stdin is not supported as it seems we don't need it (for now). If // at some point in the future we need to pass stdin to a command, we'll // need to refactor this function and its implementations. diff --git a/pkg/cluster/executor/ssh.go b/pkg/cluster/executor/ssh.go index 805013bcf7..fcb9462bdb 100644 --- a/pkg/cluster/executor/ssh.go +++ b/pkg/cluster/executor/ssh.go @@ -143,8 +143,8 @@ func (e *EasySSHExecutor) Execute(ctx context.Context, cmd string, sudo bool, ti } // set a basic PATH in case it's empty on login - cmd = fmt.Sprintf("PATH=$PATH:/bin:/sbin:/usr/bin:/usr/sbin %s", cmd) - + cmd = fmt.Sprintf("PATH=$PATH:/bin:/sbin:/usr/bin:/usr/sbin; %s", cmd) + if e.Locale != "" { cmd = fmt.Sprintf("export LANG=%s; %s", e.Locale, cmd) } @@ -182,6 +182,7 @@ func (e *EasySSHExecutor) Execute(ctx context.Context, cmd string, sudo bool, ti e.Config.Server, color.YellowString(output))) } + return []byte(stdout), []byte(stderr), baseErr } diff --git a/pkg/cluster/manager/builder.go b/pkg/cluster/manager/builder.go index a945a664f4..0714e6f925 100644 --- a/pkg/cluster/manager/builder.go +++ b/pkg/cluster/manager/builder.go @@ -52,6 +52,7 @@ func buildReloadPromAndGrafanaTasks( } var tasks []*task.StepDisplay deletedNodes := set.NewStringSet(nodes...) + systemdMode := topo.BaseTopo().GlobalOptions.SystemdMode for _, inst := range instances { if deletedNodes.Exist(inst.ID()) { continue @@ -60,10 +61,10 @@ func buildReloadPromAndGrafanaTasks( t := task.NewBuilder(logger) if inst.ComponentName() == spec.ComponentPrometheus { // reload Prometheus - t = t.SystemCtl(inst.GetManageHost(), inst.ServiceName(), "reload", true, true) + t = t.SystemCtl(inst.GetManageHost(), inst.ServiceName(), "reload", true, true, string(systemdMode)) } else { // restart grafana - t = t.SystemCtl(inst.GetManageHost(), inst.ServiceName(), "restart", true, false) + t = t.SystemCtl(inst.GetManageHost(), inst.ServiceName(), "restart", true, false, string(systemdMode)) } tasks = append(tasks, t.BuildAsStep(fmt.Sprintf(" - Reload %s -> %s", inst.ComponentName(), inst.ID()))) @@ -99,6 +100,14 @@ func buildScaleOutTask( return nil, err } + var sudo bool + systemdMode := topo.BaseTopo().GlobalOptions.SystemdMode + if systemdMode == spec.UserMode { + sudo = false + } else { + sudo = true + } + // Initialize the environments initializedHosts := set.NewStringSet() metadata.GetTopology().IterInstance(func(instance spec.Instance) { @@ -151,9 +160,10 @@ func buildScaleOutTask( gOpt.SSHProxyTimeout, gOpt.SSHType, globalOptions.SSHType, + opt.User != "root" && systemdMode != spec.UserMode, ). - EnvInit(instance.GetManageHost(), base.User, base.Group, opt.SkipCreateUser || globalOptions.User == opt.User). - Mkdir(globalOptions.User, instance.GetManageHost(), dirs...). + EnvInit(instance.GetManageHost(), base.User, base.Group, opt.SkipCreateUser || globalOptions.User == opt.User, sudo). + Mkdir(globalOptions.User, instance.GetManageHost(), sudo, dirs...). BuildAsStep(fmt.Sprintf(" - Initialized host %s ", host)) envInitTasks = append(envInitTasks, t) }) @@ -186,9 +196,9 @@ func buildScaleOutTask( } // Deploy component tb := task.NewSimpleUerSSH(m.logger, inst.GetManageHost(), inst.GetSSHPort(), base.User, gOpt, p, sshType). - Mkdir(base.User, inst.GetManageHost(), deployDirs...). - Mkdir(base.User, inst.GetManageHost(), dataDirs...). - Mkdir(base.User, inst.GetManageHost(), logDir) + Mkdir(base.User, inst.GetManageHost(), sudo, deployDirs...). + Mkdir(base.User, inst.GetManageHost(), sudo, dataDirs...). + Mkdir(base.User, inst.GetManageHost(), sudo, logDir) srcPath := "" if patchedComponents.Exist(inst.ComponentName()) { @@ -424,7 +434,7 @@ type hostInfo struct { func buildMonitoredDeployTask( m *Manager, uniqueHosts map[string]hostInfo, // host -> ssh-port, os, arch - noAgentHosts set.StringSet, // hosts that do not deploy monitor agents + noAgentHosts set.StringSet, // hosts that do not deploy monitor agents globalOptions *spec.GlobalOptions, monitoredOptions *spec.MonitoredOptions, gOpt operator.Options, @@ -477,7 +487,7 @@ func buildMonitoredDeployTask( // Deploy component tb := task.NewSimpleUerSSH(m.logger, host, info.ssh, globalOptions.User, gOpt, p, globalOptions.SSHType). - Mkdir(globalOptions.User, host, deployDirs...). + Mkdir(globalOptions.User, host, globalOptions.SystemdMode != spec.UserMode, deployDirs...). CopyComponent( comp, info.os, @@ -498,7 +508,7 @@ func buildMonitoredCertificateTasks( m *Manager, name string, uniqueHosts map[string]hostInfo, // host -> ssh-port, os, arch - noAgentHosts set.StringSet, // hosts that do not deploy monitor agents + noAgentHosts set.StringSet, // hosts that do not deploy monitor agents globalOptions *spec.GlobalOptions, monitoredOptions *spec.MonitoredOptions, gOpt operator.Options, @@ -524,7 +534,7 @@ func buildMonitoredCertificateTasks( // Deploy component tb := task.NewSimpleUerSSH(m.logger, host, info.ssh, globalOptions.User, gOpt, p, globalOptions.SSHType). - Mkdir(globalOptions.User, host, tlsDir) + Mkdir(globalOptions.User, host, globalOptions.SystemdMode != spec.UserMode, tlsDir) if comp == spec.ComponentBlackboxExporter { ca, innerr := crypto.ReadCA( @@ -604,6 +614,7 @@ func buildInitMonitoredConfigTasks( Log: logDir, Cache: specManager.Path(name, spec.TempConfigPath), }, + globalOptions.SystemdMode, ). BuildAsStep(fmt.Sprintf(" - Generate config %s -> %s", comp, host)) tasks = append(tasks, t) @@ -802,7 +813,7 @@ func buildTLSTask( // cleanup tls files only in tls disable if !topo.BaseTopo().GlobalOptions.TLSEnabled { builder.Func("Cleanup TLS files", func(ctx context.Context) error { - return operator.CleanupComponent(ctx, delFileMap) + return operator.CleanupComponent(ctx, delFileMap, topo.BaseTopo().GlobalOptions.SystemdMode != spec.UserMode) }) } @@ -842,7 +853,7 @@ func buildCertificateTasks( tlsDir := filepath.Join(deployDir, spec.TLSCertKeyDir) tb := task.NewSimpleUerSSH(m.logger, inst.GetManageHost(), inst.GetSSHPort(), base.User, gOpt, p, topo.BaseTopo().GlobalOptions.SSHType). - Mkdir(base.User, inst.GetManageHost(), deployDir, tlsDir) + Mkdir(base.User, inst.GetManageHost(), topo.BaseTopo().GlobalOptions.SystemdMode != spec.UserMode, deployDir, tlsDir) ca, err := crypto.ReadCA( name, diff --git a/pkg/cluster/manager/check.go b/pkg/cluster/manager/check.go index ab2d59dfc0..72a4d5997e 100644 --- a/pkg/cluster/manager/check.go +++ b/pkg/cluster/manager/check.go @@ -124,7 +124,14 @@ func (m *Manager) CheckCluster(clusterOrTopoName, scaleoutTopo string, opt Check } } - if err := m.fillHost(sshConnProps, sshProxyProps, &topo, &gOpt, opt.User); err != nil { + var sudo bool + if topo.BaseTopo().GlobalOptions.SystemdMode == spec.UserMode { + sudo = false + } else { + sudo = opt.User != "root" + } + + if err := m.fillHost(sshConnProps, sshProxyProps, &topo, &gOpt, opt.User, sudo); err != nil { return err } @@ -193,6 +200,12 @@ func checkSystemInfo( components := topo.ComponentsByStartOrder() components = operator.FilterComponent(components, roleFilter) + systemdDir := "/etc/systemd/system/" + systemdMode := topo.BaseTopo().GlobalOptions.SystemdMode + if systemdMode == spec.UserMode { + systemdDir = "~/.config/systemd/user/" + } + for _, comp := range components { instances := operator.FilterInstance(comp.Instances(), nodeFilter) if len(instances) < 1 { @@ -249,7 +262,7 @@ func checkSystemInfo( ). CheckSys( inst.GetManageHost(), - fmt.Sprintf("/etc/systemd/system/%s-%d.service", inst.ComponentName(), inst.GetPort()), + fmt.Sprintf("%s%s-%d.service", systemdDir, inst.ComponentName(), inst.GetPort()), task.ChecktypeIsExist, topo, opt.Opr, @@ -321,8 +334,9 @@ func checkSystemInfo( gOpt.SSHProxyTimeout, gOpt.SSHType, topo.GlobalOptions.SSHType, + opt.User != "root" && systemdMode != spec.UserMode, ). - Mkdir(opt.User, inst.GetManageHost(), filepath.Join(task.CheckToolsPathDir, "bin")). + Mkdir(opt.User, inst.GetManageHost(), systemdMode != spec.UserMode, filepath.Join(task.CheckToolsPathDir, "bin")). CopyComponent( spec.ComponentCheckCollector, inst.OS(), @@ -360,6 +374,7 @@ func checkSystemInfo( gOpt.SSHProxyTimeout, gOpt.SSHType, topo.GlobalOptions.SSHType, + opt.User != "root" && systemdMode != spec.UserMode, ). Rmdir(inst.GetManageHost(), task.CheckToolsPathDir). BuildAsStep(" - Cleanup check files on " + utils.JoinHostPort(inst.GetManageHost(), inst.GetSSHPort())) @@ -416,7 +431,7 @@ func checkSystemInfo( host, "sysctl -a", "", - true, + systemdMode != spec.UserMode, ). CheckSys( host, @@ -507,8 +522,9 @@ func checkSystemInfo( gOpt.SSHProxyTimeout, gOpt.SSHType, topo.GlobalOptions.SSHType, + opt.User != "root" && systemdMode != spec.UserMode, ) - res, err := handleCheckResults(ctx, host, opt, tf) + res, err := handleCheckResults(ctx, host, opt, tf, string(topo.BaseTopo().GlobalOptions.SystemdMode)) if err != nil { continue } @@ -561,7 +577,7 @@ func checkSystemInfo( } // handleCheckResults parses the result of checks -func handleCheckResults(ctx context.Context, host string, opt *CheckOptions, t *task.Builder) ([]HostCheckResult, error) { +func handleCheckResults(ctx context.Context, host string, opt *CheckOptions, t *task.Builder, systemdMode string) ([]HostCheckResult, error) { rr, _ := ctxt.GetInner(ctx).GetCheckResults(host) if len(rr) < 1 { return nil, fmt.Errorf("no check results found for %s", host) @@ -585,7 +601,7 @@ func handleCheckResults(ctx context.Context, host string, opt *CheckOptions, t * items = append(items, item) continue } - msg, err := fixFailedChecks(host, r, t) + msg, err := fixFailedChecks(host, r, t, systemdMode) if err != nil { ctx.Value(logprinter.ContextKeyLogger).(*logprinter.Logger). Debugf("%s: fail to apply fix to %s (%s)", host, r.Name, err) @@ -627,8 +643,9 @@ func formatHostCheckResults(results []HostCheckResult) [][]string { } // fixFailedChecks tries to automatically apply changes to fix failed checks -func fixFailedChecks(host string, res *operator.CheckResult, t *task.Builder) (string, error) { +func fixFailedChecks(host string, res *operator.CheckResult, t *task.Builder, systemdMode string) (string, error) { msg := "" + sudo := systemdMode != string(spec.UserMode) switch res.Name { case operator.CheckNameSysService: if strings.Contains(res.Msg, "not found") { @@ -638,21 +655,21 @@ func fixFailedChecks(host string, res *operator.CheckResult, t *task.Builder) (s if len(fields) < 2 { return "", fmt.Errorf("can not perform action of service, %s", res.Msg) } - t.SystemCtl(host, fields[1], fields[0], false, false) + t.SystemCtl(host, fields[1], fields[0], false, false, systemdMode) msg = fmt.Sprintf("will try to '%s'", color.HiBlueString(res.Msg)) case operator.CheckNameSysctl: fields := strings.Fields(res.Msg) if len(fields) < 3 { return "", fmt.Errorf("can not set kernel parameter, %s", res.Msg) } - t.Sysctl(host, fields[0], fields[2]) + t.Sysctl(host, fields[0], fields[2], sudo) msg = fmt.Sprintf("will try to set '%s'", color.HiBlueString(res.Msg)) case operator.CheckNameLimits: fields := strings.Fields(res.Msg) if len(fields) < 4 { return "", fmt.Errorf("can not set limits, %s", res.Msg) } - t.Limit(host, fields[0], fields[1], fields[2], fields[3]) + t.Limit(host, fields[0], fields[1], fields[2], fields[3], sudo) msg = fmt.Sprintf("will try to set '%s'", color.HiBlueString(res.Msg)) case operator.CheckNameSELinux: t.Shell(host, @@ -662,13 +679,13 @@ func fixFailedChecks(host string, res *operator.CheckResult, t *task.Builder) (s "setenforce 0", ), "", - true) + sudo) msg = fmt.Sprintf("will try to %s, reboot might be needed", color.HiBlueString("disable SELinux")) case operator.CheckNameTHP: t.Shell(host, fmt.Sprintf(`if [ -d %[1]s ]; then echo never > %[1]s/enabled; fi`, "/sys/kernel/mm/transparent_hugepage"), "", - true) + sudo) msg = fmt.Sprintf("will try to %s, please check again after reboot", color.HiBlueString("disable THP")) case operator.CheckNameSwap: // not applying swappiness setting here, it should be fixed @@ -676,7 +693,7 @@ func fixFailedChecks(host string, res *operator.CheckResult, t *task.Builder) (s // t.Sysctl(host, "vm.swappiness", "0") t.Shell(host, "swapoff -a || exit 0", // ignore failure - "", true, + "", sudo, ) msg = "will try to disable swap, please also check /etc/fstab manually" default: diff --git a/pkg/cluster/manager/cleanup.go b/pkg/cluster/manager/cleanup.go index 0cff6c2390..de60fcac94 100644 --- a/pkg/cluster/manager/cleanup.go +++ b/pkg/cluster/manager/cleanup.go @@ -67,6 +67,10 @@ func (m *Manager) CleanCluster(name string, gOpt operator.Options, cleanOpt oper m.logger.Infof("Cleanup cluster...") + sudo := true + if topo.BaseTopo().GlobalOptions.SystemdMode == spec.UserMode { + sudo = false + } b, err := m.sshTaskBuilder(name, topo, base.User, gOpt) if err != nil { return err @@ -82,7 +86,7 @@ func (m *Manager) CleanCluster(name string, gOpt operator.Options, cleanOpt oper ) }). Func("CleanupCluster", func(ctx context.Context) error { - return operator.CleanupComponent(ctx, delFileMap) + return operator.CleanupComponent(ctx, delFileMap, sudo) }). Build() diff --git a/pkg/cluster/manager/deploy.go b/pkg/cluster/manager/deploy.go index c9b413edd3..48b4a64e26 100644 --- a/pkg/cluster/manager/deploy.go +++ b/pkg/cluster/manager/deploy.go @@ -148,7 +148,25 @@ func (m *Manager) Deploy( } } - if err := m.fillHost(sshConnProps, sshProxyProps, topo, &gOpt, opt.User); err != nil { + var sudo bool + systemdMode := topo.BaseTopo().GlobalOptions.SystemdMode + if systemdMode == spec.UserMode { + sudo = false + hint := fmt.Sprintf("loginctl enable-linger %s", opt.User) + + msg := "The value of systemd_mode is set to `user` in the topology, please note that you'll need to manually execute the following command using root or sudo on the host(s) to enable lingering for the systemd user instance.\n" + msg += color.GreenString(hint) + msg += "\nYou can read the systemd documentation for reference: https://wiki.archlinux.org/title/Systemd/User#Automatic_start-up_of_systemd_user_instances." + m.logger.Warnf(msg) + err = tui.PromptForConfirmOrAbortError("Do you want to continue? [y/N]: ") + if err != nil { + return err + } + } else { + sudo = true + } + + if err := m.fillHost(sshConnProps, sshProxyProps, topo, &gOpt, opt.User, opt.User != "root" && systemdMode != spec.UserMode); err != nil { return err } @@ -233,9 +251,10 @@ func (m *Manager) Deploy( gOpt.SSHProxyTimeout, gOpt.SSHType, globalOptions.SSHType, + opt.User != "root" && systemdMode != spec.UserMode, ). - EnvInit(host, globalOptions.User, globalOptions.Group, opt.SkipCreateUser || globalOptions.User == opt.User). - Mkdir(globalOptions.User, host, dirs...). + EnvInit(host, globalOptions.User, globalOptions.Group, opt.SkipCreateUser || globalOptions.User == opt.User, sudo). + Mkdir(globalOptions.User, host, sudo, dirs...). BuildAsStep(fmt.Sprintf(" - Prepare %s:%d", host, hostInfo.ssh)) envInitTasks = append(envInitTasks, t) } @@ -265,8 +284,8 @@ func (m *Manager) Deploy( } t := task.NewSimpleUerSSH(m.logger, inst.GetManageHost(), inst.GetSSHPort(), globalOptions.User, gOpt, sshProxyProps, globalOptions.SSHType). - Mkdir(globalOptions.User, inst.GetManageHost(), deployDirs...). - Mkdir(globalOptions.User, inst.GetManageHost(), dataDirs...) + Mkdir(globalOptions.User, inst.GetManageHost(), sudo, deployDirs...). + Mkdir(globalOptions.User, inst.GetManageHost(), sudo, dataDirs...) if deployerInstance, ok := inst.(DeployerInstance); ok { deployerInstance.Deploy(t, "", deployDir, version, name, clusterVersion) diff --git a/pkg/cluster/manager/display.go b/pkg/cluster/manager/display.go index ce0986f6ba..5fabc8072e 100644 --- a/pkg/cluster/manager/display.go +++ b/pkg/cluster/manager/display.go @@ -586,7 +586,7 @@ func (m *Manager) GetClusterTopology(dopt DisplayOption, opt operator.Options) ( } clusterInstInfos := []InstInfo{} - + systemdMode := string(topo.BaseTopo().GlobalOptions.SystemdMode) topo.IterInstance(func(ins spec.Instance) { // apply role filter if len(filterRoles) > 0 && !filterRoles.Exist(ins.Role()) { @@ -630,7 +630,7 @@ func (m *Manager) GetClusterTopology(dopt DisplayOption, opt operator.Options) ( var active string var systemdSince time.Duration nctx := checkpoint.NewContext(ctx) - active, memory, systemdSince, _ = operator.GetServiceStatus(nctx, e, ins.ServiceName()) + active, memory, systemdSince, _ = operator.GetServiceStatus(nctx, e, ins.ServiceName(), systemdMode, systemdMode) if status == "-" { if active == "active" { status = "Up" diff --git a/pkg/cluster/manager/manager.go b/pkg/cluster/manager/manager.go index bfe15feddd..d443683d7e 100644 --- a/pkg/cluster/manager/manager.go +++ b/pkg/cluster/manager/manager.go @@ -180,16 +180,16 @@ func (m *Manager) sshTaskBuilder(name string, topo spec.Topology, user string, g } // fillHost full host cpu-arch and kernel-name -func (m *Manager) fillHost(s, p *tui.SSHConnectionProps, topo spec.Topology, gOpt *operator.Options, user string) error { - if err := m.fillHostArchOrOS(s, p, topo, gOpt, user, spec.FullArchType); err != nil { +func (m *Manager) fillHost(s, p *tui.SSHConnectionProps, topo spec.Topology, gOpt *operator.Options, user string, sudo bool) error { + if err := m.fillHostArchOrOS(s, p, topo, gOpt, user, spec.FullArchType, sudo); err != nil { return err } - return m.fillHostArchOrOS(s, p, topo, gOpt, user, spec.FullOSType) + return m.fillHostArchOrOS(s, p, topo, gOpt, user, spec.FullOSType, sudo) } // fillHostArchOrOS full host cpu-arch or kernel-name -func (m *Manager) fillHostArchOrOS(s, p *tui.SSHConnectionProps, topo spec.Topology, gOpt *operator.Options, user string, fullType spec.FullHostType) error { +func (m *Manager) fillHostArchOrOS(s, p *tui.SSHConnectionProps, topo spec.Topology, gOpt *operator.Options, user string, fullType spec.FullHostType, sudo bool) error { globalSSHType := topo.BaseTopo().GlobalOptions.SSHType hostArchOrOS := map[string]string{} var detectTasks []*task.StepDisplay @@ -227,6 +227,7 @@ func (m *Manager) fillHostArchOrOS(s, p *tui.SSHConnectionProps, topo spec.Topol gOpt.SSHProxyTimeout, gOpt.SSHType, globalSSHType, + sudo, ) switch fullType { diff --git a/pkg/cluster/manager/scale_out.go b/pkg/cluster/manager/scale_out.go index 01d551502a..43c6a0ae45 100644 --- a/pkg/cluster/manager/scale_out.go +++ b/pkg/cluster/manager/scale_out.go @@ -121,7 +121,23 @@ func (m *Manager) ScaleOut( } } - if err := m.fillHost(sshConnProps, sshProxyProps, newPart, &gOpt, opt.User); err != nil { + var sudo bool + if topo.BaseTopo().GlobalOptions.SystemdMode == spec.UserMode { + sudo = false + hint := fmt.Sprintf("loginctl enable-linger %s", opt.User) + msg := "The value of systemd_mode is set to `user` in the topology, please note that you'll need to manually execute the following command using root or sudo on the host(s) to enable lingering for the systemd user instance.\n" + msg += color.GreenString(hint) + msg += "\nYou can read the systemd documentation for reference: https://wiki.archlinux.org/title/Systemd/User#Automatic_start-up_of_systemd_user_instances." + m.logger.Warnf(msg) + err = tui.PromptForConfirmOrAbortError("Do you want to continue? [y/N]: ") + if err != nil { + return err + } + } else { + sudo = opt.User != "root" + } + + if err := m.fillHost(sshConnProps, sshProxyProps, newPart, &gOpt, opt.User, sudo); err != nil { return err } diff --git a/pkg/cluster/manager/upgrade.go b/pkg/cluster/manager/upgrade.go index a2d8aba901..65eaffd83e 100644 --- a/pkg/cluster/manager/upgrade.go +++ b/pkg/cluster/manager/upgrade.go @@ -157,7 +157,7 @@ This operation will upgrade %s %s cluster %s to %s:%s`, // for some component, dataDirs might need to be created due to upgrade // eg: TiCDC support DataDir since v4.0.13 - tb = tb.Mkdir(topo.BaseTopo().GlobalOptions.User, inst.GetManageHost(), dataDirs...) + tb = tb.Mkdir(topo.BaseTopo().GlobalOptions.User, inst.GetManageHost(), topo.BaseTopo().GlobalOptions.SystemdMode != spec.UserMode, dataDirs...) if inst.IsImported() { switch inst.ComponentName() { diff --git a/pkg/cluster/module/systemd.go b/pkg/cluster/module/systemd.go index 076497e376..f38387b676 100644 --- a/pkg/cluster/module/systemd.go +++ b/pkg/cluster/module/systemd.go @@ -39,6 +39,7 @@ type SystemdModuleConfig struct { Force bool // add the `--force` arg to systemctl command Signal string // specify the signal to send to process Timeout time.Duration // timeout to execute the command + SystemdMode string } // SystemdModule is the module used to control systemd units @@ -53,7 +54,9 @@ type SystemdModule struct { func NewSystemdModule(config SystemdModuleConfig) *SystemdModule { systemctl := "systemctl" sudo := true - + if config.SystemdMode == "user" { + sudo = false + } if config.Force { systemctl = fmt.Sprintf("%s --force", systemctl) } diff --git a/pkg/cluster/operation/action.go b/pkg/cluster/operation/action.go index 54ad1434a9..797f4a26f7 100644 --- a/pkg/cluster/operation/action.go +++ b/pkg/cluster/operation/action.go @@ -61,7 +61,7 @@ func Enable( components = FilterComponent(components, roleFilter) monitoredOptions := cluster.GetMonitoredOptions() noAgentHosts := set.NewStringSet() - + systemdMode := string(cluster.BaseTopo().GlobalOptions.SystemdMode) instCount := map[string]int{} cluster.IterInstance(func(inst spec.Instance) { if inst.IgnoreMonitorAgent() { @@ -73,7 +73,7 @@ func Enable( for _, comp := range components { insts := FilterInstance(comp.Instances(), nodeFilter) - err := EnableComponent(ctx, insts, noAgentHosts, options, isEnable) + err := EnableComponent(ctx, insts, noAgentHosts, options, isEnable, systemdMode) if err != nil { return errors.Annotatef(err, "failed to enable/disable %s", comp.Name()) } @@ -98,7 +98,7 @@ func Enable( hosts = append(hosts, host) } - return EnableMonitored(ctx, hosts, noAgentHosts, monitoredOptions, options.OptTimeout, isEnable) + return EnableMonitored(ctx, hosts, noAgentHosts, monitoredOptions, options.OptTimeout, isEnable, systemdMode) } // Start the cluster. @@ -116,7 +116,7 @@ func Start( components = FilterComponent(components, roleFilter) monitoredOptions := cluster.GetMonitoredOptions() noAgentHosts := set.NewStringSet() - + systemdMode := string(cluster.BaseTopo().GlobalOptions.SystemdMode) cluster.IterInstance(func(inst spec.Instance) { if inst.IgnoreMonitorAgent() { noAgentHosts.Insert(inst.GetManageHost()) @@ -125,7 +125,7 @@ func Start( for _, comp := range components { insts := FilterInstance(comp.Instances(), nodeFilter) - err := StartComponent(ctx, insts, noAgentHosts, options, tlsCfg) + err := StartComponent(ctx, insts, noAgentHosts, options, tlsCfg, systemdMode) if err != nil { return errors.Annotatef(err, "failed to start %s", comp.Name()) } @@ -164,7 +164,7 @@ func Start( for host := range uniqueHosts { hosts = append(hosts, host) } - return StartMonitored(ctx, hosts, noAgentHosts, monitoredOptions, options.OptTimeout) + return StartMonitored(ctx, hosts, noAgentHosts, monitoredOptions, options.OptTimeout, systemdMode) } // Stop the cluster. @@ -181,7 +181,7 @@ func Stop( components = FilterComponent(components, roleFilter) monitoredOptions := cluster.GetMonitoredOptions() noAgentHosts := set.NewStringSet() - + systemdMode := string(cluster.BaseTopo().GlobalOptions.SystemdMode) instCount := map[string]int{} cluster.IterInstance(func(inst spec.Instance) { if inst.IgnoreMonitorAgent() { @@ -225,7 +225,7 @@ func Stop( hosts = append(hosts, host) } - if err := StopMonitored(ctx, hosts, noAgentHosts, monitoredOptions, options.OptTimeout); err != nil && !options.Force { + if err := StopMonitored(ctx, hosts, noAgentHosts, monitoredOptions, options.OptTimeout, systemdMode); err != nil && !options.Force { return err } return nil @@ -278,36 +278,36 @@ func Restart( } // StartMonitored start BlackboxExporter and NodeExporter -func StartMonitored(ctx context.Context, hosts []string, noAgentHosts set.StringSet, options *spec.MonitoredOptions, timeout uint64) error { - return systemctlMonitor(ctx, hosts, noAgentHosts, options, "start", timeout) +func StartMonitored(ctx context.Context, hosts []string, noAgentHosts set.StringSet, options *spec.MonitoredOptions, timeout uint64, systemdMode string) error { + return systemctlMonitor(ctx, hosts, noAgentHosts, options, "start", timeout, systemdMode) } // StopMonitored stop BlackboxExporter and NodeExporter -func StopMonitored(ctx context.Context, hosts []string, noAgentHosts set.StringSet, options *spec.MonitoredOptions, timeout uint64) error { - return systemctlMonitor(ctx, hosts, noAgentHosts, options, "stop", timeout) +func StopMonitored(ctx context.Context, hosts []string, noAgentHosts set.StringSet, options *spec.MonitoredOptions, timeout uint64, systemdMode string) error { + return systemctlMonitor(ctx, hosts, noAgentHosts, options, "stop", timeout, systemdMode) } // RestartMonitored stop BlackboxExporter and NodeExporter -func RestartMonitored(ctx context.Context, hosts []string, noAgentHosts set.StringSet, options *spec.MonitoredOptions, timeout uint64) error { - err := StopMonitored(ctx, hosts, noAgentHosts, options, timeout) +func RestartMonitored(ctx context.Context, hosts []string, noAgentHosts set.StringSet, options *spec.MonitoredOptions, timeout uint64, systemdMode string) error { + err := StopMonitored(ctx, hosts, noAgentHosts, options, timeout, systemdMode) if err != nil { return err } - return StartMonitored(ctx, hosts, noAgentHosts, options, timeout) + return StartMonitored(ctx, hosts, noAgentHosts, options, timeout, systemdMode) } // EnableMonitored enable/disable monitor service in a cluster -func EnableMonitored(ctx context.Context, hosts []string, noAgentHosts set.StringSet, options *spec.MonitoredOptions, timeout uint64, isEnable bool) error { +func EnableMonitored(ctx context.Context, hosts []string, noAgentHosts set.StringSet, options *spec.MonitoredOptions, timeout uint64, isEnable bool, systemdMode string) error { action := "disable" if isEnable { action = "enable" } - return systemctlMonitor(ctx, hosts, noAgentHosts, options, action, timeout) + return systemctlMonitor(ctx, hosts, noAgentHosts, options, action, timeout, systemdMode) } -func systemctlMonitor(ctx context.Context, hosts []string, noAgentHosts set.StringSet, options *spec.MonitoredOptions, action string, timeout uint64) error { +func systemctlMonitor(ctx context.Context, hosts []string, noAgentHosts set.StringSet, options *spec.MonitoredOptions, action string, timeout uint64, systemdMode string) error { logger := ctx.Value(logprinter.ContextKeyLogger).(*logprinter.Logger) ports := monitorPortMap(options) for _, comp := range []string{spec.ComponentNodeExporter, spec.ComponentBlackboxExporter} { @@ -326,7 +326,7 @@ func systemctlMonitor(ctx context.Context, hosts []string, noAgentHosts set.Stri e := ctxt.GetInner(nctx).Get(host) service := fmt.Sprintf("%s-%d.service", comp, ports[comp]) - if err := systemctl(nctx, e, service, action, timeout); err != nil { + if err := systemctl(nctx, e, service, action, timeout, systemdMode); err != nil { return toFailedActionError(err, action, host, service, "") } @@ -353,12 +353,12 @@ func systemctlMonitor(ctx context.Context, hosts []string, noAgentHosts set.Stri return nil } -func restartInstance(ctx context.Context, ins spec.Instance, timeout uint64, tlsCfg *tls.Config) error { +func restartInstance(ctx context.Context, ins spec.Instance, timeout uint64, tlsCfg *tls.Config, systemdMode string) error { e := ctxt.GetInner(ctx).Get(ins.GetManageHost()) logger := ctx.Value(logprinter.ContextKeyLogger).(*logprinter.Logger) logger.Infof("\tRestarting instance %s", ins.ID()) - if err := systemctl(ctx, e, ins.ServiceName(), "restart", timeout); err != nil { + if err := systemctl(ctx, e, ins.ServiceName(), "restart", timeout, systemdMode); err != nil { return toFailedActionError(err, "restart", ins.GetManageHost(), ins.ServiceName(), ins.LogDir()) } @@ -372,7 +372,7 @@ func restartInstance(ctx context.Context, ins spec.Instance, timeout uint64, tls return nil } -func enableInstance(ctx context.Context, ins spec.Instance, timeout uint64, isEnable bool) error { +func enableInstance(ctx context.Context, ins spec.Instance, timeout uint64, isEnable bool, systemdMode string) error { e := ctxt.GetInner(ctx).Get(ins.GetManageHost()) logger := ctx.Value(logprinter.ContextKeyLogger).(*logprinter.Logger) @@ -383,7 +383,7 @@ func enableInstance(ctx context.Context, ins spec.Instance, timeout uint64, isEn logger.Infof("\t%s instance %s", actionPrevMsgs[action], ins.ID()) // Enable/Disable by systemd. - if err := systemctl(ctx, e, ins.ServiceName(), action, timeout); err != nil { + if err := systemctl(ctx, e, ins.ServiceName(), action, timeout, systemdMode); err != nil { return toFailedActionError(err, action, ins.GetManageHost(), ins.ServiceName(), ins.LogDir()) } @@ -392,12 +392,12 @@ func enableInstance(ctx context.Context, ins spec.Instance, timeout uint64, isEn return nil } -func startInstance(ctx context.Context, ins spec.Instance, timeout uint64, tlsCfg *tls.Config) error { +func startInstance(ctx context.Context, ins spec.Instance, timeout uint64, tlsCfg *tls.Config, systemdMode string) error { e := ctxt.GetInner(ctx).Get(ins.GetManageHost()) logger := ctx.Value(logprinter.ContextKeyLogger).(*logprinter.Logger) logger.Infof("\tStarting instance %s", ins.ID()) - if err := systemctl(ctx, e, ins.ServiceName(), "start", timeout); err != nil { + if err := systemctl(ctx, e, ins.ServiceName(), "start", timeout, systemdMode); err != nil { return toFailedActionError(err, "start", ins.GetManageHost(), ins.ServiceName(), ins.LogDir()) } @@ -411,13 +411,14 @@ func startInstance(ctx context.Context, ins spec.Instance, timeout uint64, tlsCf return nil } -func systemctl(ctx context.Context, executor ctxt.Executor, service string, action string, timeout uint64) error { +func systemctl(ctx context.Context, executor ctxt.Executor, service string, action string, timeout uint64, scope string) error { logger := ctx.Value(logprinter.ContextKeyLogger).(*logprinter.Logger) c := module.SystemdModuleConfig{ Unit: service, ReloadDaemon: true, Action: action, Timeout: time.Second * time.Duration(timeout), + Scope: scope, } systemd := module.NewSystemdModule(c) stdout, stderr, err := systemd.Execute(ctx, executor) @@ -443,7 +444,7 @@ func systemctl(ctx context.Context, executor ctxt.Executor, service string, acti } // EnableComponent enable/disable the instances -func EnableComponent(ctx context.Context, instances []spec.Instance, noAgentHosts set.StringSet, options Options, isEnable bool) error { +func EnableComponent(ctx context.Context, instances []spec.Instance, noAgentHosts set.StringSet, options Options, isEnable bool, systemdMode string) error { if len(instances) == 0 { return nil } @@ -476,7 +477,7 @@ func EnableComponent(ctx context.Context, instances []spec.Instance, noAgentHost // of checkpoint context every time put it into a new goroutine. nctx := checkpoint.NewContext(ctx) errg.Go(func() error { - err := enableInstance(nctx, ins, options.OptTimeout, isEnable) + err := enableInstance(nctx, ins, options.OptTimeout, isEnable, systemdMode) if err != nil { return err } @@ -488,7 +489,7 @@ func EnableComponent(ctx context.Context, instances []spec.Instance, noAgentHost } // StartComponent start the instances. -func StartComponent(ctx context.Context, instances []spec.Instance, noAgentHosts set.StringSet, options Options, tlsCfg *tls.Config) error { +func StartComponent(ctx context.Context, instances []spec.Instance, noAgentHosts set.StringSet, options Options, tlsCfg *tls.Config, systemdMode string) error { if len(instances) == 0 { return nil } @@ -506,7 +507,7 @@ func StartComponent(ctx context.Context, instances []spec.Instance, noAgentHosts case spec.ComponentPD, spec.ComponentTiFlash, spec.ComponentDMMaster: - return serialStartInstances(ctx, instances, options, tlsCfg) + return serialStartInstances(ctx, instances, options, tlsCfg, systemdMode) } } @@ -531,31 +532,31 @@ func StartComponent(ctx context.Context, instances []spec.Instance, noAgentHosts if err := ins.PrepareStart(nctx, tlsCfg); err != nil { return err } - return startInstance(nctx, ins, options.OptTimeout, tlsCfg) + return startInstance(nctx, ins, options.OptTimeout, tlsCfg, systemdMode) }) } return errg.Wait() } -func serialStartInstances(ctx context.Context, instances []spec.Instance, options Options, tlsCfg *tls.Config) error { +func serialStartInstances(ctx context.Context, instances []spec.Instance, options Options, tlsCfg *tls.Config, systemdMode string) error { for _, ins := range instances { if err := ins.PrepareStart(ctx, tlsCfg); err != nil { return err } - if err := startInstance(ctx, ins, options.OptTimeout, tlsCfg); err != nil { + if err := startInstance(ctx, ins, options.OptTimeout, tlsCfg, systemdMode); err != nil { return err } } return nil } -func stopInstance(ctx context.Context, ins spec.Instance, timeout uint64) error { +func stopInstance(ctx context.Context, ins spec.Instance, timeout uint64, systemdMode string) error { e := ctxt.GetInner(ctx).Get(ins.GetManageHost()) logger := ctx.Value(logprinter.ContextKeyLogger).(*logprinter.Logger) logger.Infof("\tStopping instance %s", ins.GetManageHost()) - if err := systemctl(ctx, e, ins.ServiceName(), "stop", timeout); err != nil { + if err := systemctl(ctx, e, ins.ServiceName(), "stop", timeout, systemdMode); err != nil { return toFailedActionError(err, "stop", ins.GetManageHost(), ins.ServiceName(), ins.LogDir()) } @@ -581,7 +582,7 @@ func StopComponent(ctx context.Context, logger := ctx.Value(logprinter.ContextKeyLogger).(*logprinter.Logger) name := instances[0].ComponentName() logger.Infof("Stopping component %s", name) - + systemdMode := string(topo.BaseTopo().GlobalOptions.SystemdMode) errg, _ := errgroup.WithContext(ctx) for _, ins := range instances { @@ -607,7 +608,7 @@ func StopComponent(ctx context.Context, return err } } - if err := stopInstance(nctx, ins, options.OptTimeout); err != nil { + if err := stopInstance(nctx, ins, options.OptTimeout, systemdMode); err != nil { return err } // continue here, to skip the logic below. @@ -628,7 +629,7 @@ func StopComponent(ctx context.Context, } } } - err := stopInstance(nctx, ins, options.OptTimeout) + err := stopInstance(nctx, ins, options.OptTimeout, systemdMode) if err != nil { return err } diff --git a/pkg/cluster/operation/check.go b/pkg/cluster/operation/check.go index cc2d2227fa..f849c8b356 100644 --- a/pkg/cluster/operation/check.go +++ b/pkg/cluster/operation/check.go @@ -504,7 +504,7 @@ func CheckKernelParameters(opt *CheckOptions, p []byte) []*CheckResult { } // CheckServices checks if a service is running on the host -func CheckServices(ctx context.Context, e ctxt.Executor, host, service string, disable bool) *CheckResult { +func CheckServices(ctx context.Context, e ctxt.Executor, host, service string, disable bool, systemdMode spec.SystemdMode) *CheckResult { result := &CheckResult{ Name: CheckNameSysService, } @@ -514,7 +514,7 @@ func CheckServices(ctx context.Context, e ctxt.Executor, host, service string, d ctx, fmt.Sprintf( "systemctl list-unit-files --type service | grep -i %s.service | wc -l", service), - true) + systemdMode != spec.UserMode) if err != nil { result.Err = err return result @@ -526,8 +526,8 @@ func CheckServices(ctx context.Context, e ctxt.Executor, host, service string, d result.Msg = fmt.Sprintf("service %s not found, ignore", service) return result } - - active, _, _, err := GetServiceStatus(ctx, e, service+".service") + // The service checked here needs to use systemctl in system mode, so the value passed by scope is empty. + active, _, _, err := GetServiceStatus(ctx, e, service+".service", "", string(systemdMode)) if err != nil { result.Err = err } @@ -549,14 +549,14 @@ func CheckServices(ctx context.Context, e ctxt.Executor, host, service string, d } // CheckSELinux checks if SELinux is enabled on the host -func CheckSELinux(ctx context.Context, e ctxt.Executor) *CheckResult { +func CheckSELinux(ctx context.Context, e ctxt.Executor, sudo bool) *CheckResult { result := &CheckResult{ Name: CheckNameSELinux, } m := module.NewShellModule(module.ShellModuleConfig{ // ignore grep errors, the file may not exist for some systems Command: "grep -E '^\\s*SELINUX=enforcing' /etc/selinux/config 2>/dev/null | wc -l", - Sudo: true, + Sudo: sudo, }) stdout, stderr, err := m.Execute(ctx, e) if err != nil { @@ -825,14 +825,14 @@ func CheckFIOResult(rr, rw, lat []byte) []*CheckResult { } // CheckTHP checks THP in /sys/kernel/mm/transparent_hugepage/enabled -func CheckTHP(ctx context.Context, e ctxt.Executor) *CheckResult { +func CheckTHP(ctx context.Context, e ctxt.Executor, sudo bool) *CheckResult { result := &CheckResult{ Name: CheckNameTHP, } m := module.NewShellModule(module.ShellModuleConfig{ Command: fmt.Sprintf(`if [ -d %[1]s ]; then cat %[1]s/enabled; fi`, "/sys/kernel/mm/transparent_hugepage"), - Sudo: true, + Sudo: sudo, }) stdout, stderr, err := m.Execute(ctx, e) if err != nil { diff --git a/pkg/cluster/operation/destroy.go b/pkg/cluster/operation/destroy.go index 1860ab438a..08e53b9e8d 100644 --- a/pkg/cluster/operation/destroy.go +++ b/pkg/cluster/operation/destroy.go @@ -59,7 +59,7 @@ func Destroy( instCount[inst.GetManageHost()]-- if instCount[inst.GetManageHost()] == 0 { if cluster.GetMonitoredOptions() != nil { - if err := DestroyMonitored(ctx, inst, cluster.GetMonitoredOptions(), options.OptTimeout); err != nil && !options.Force { + if err := DestroyMonitored(ctx, inst, cluster.GetMonitoredOptions(), options.OptTimeout, cluster.BaseTopo().GlobalOptions.SystemdMode); err != nil && !options.Force { return err } } @@ -137,13 +137,13 @@ func StopAndDestroyInstance( monitoredOptions := cluster.GetMonitoredOptions() if monitoredOptions != nil && !instance.IgnoreMonitorAgent() { - if err := StopMonitored(ctx, []string{instance.GetManageHost()}, noAgentHosts, monitoredOptions, options.OptTimeout); err != nil { + if err := StopMonitored(ctx, []string{instance.GetManageHost()}, noAgentHosts, monitoredOptions, options.OptTimeout, string(cluster.BaseTopo().GlobalOptions.SystemdMode)); err != nil { if !ignoreErr { return perrs.Annotatef(err, "failed to stop monitor") } logger.Warnf("failed to stop %s: %v", "monitor", err) } - if err := DestroyMonitored(ctx, instance, monitoredOptions, options.OptTimeout); err != nil { + if err := DestroyMonitored(ctx, instance, monitoredOptions, options.OptTimeout, cluster.BaseTopo().GlobalOptions.SystemdMode); err != nil { if !ignoreErr { return perrs.Annotatef(err, "failed to destroy monitor") } @@ -241,7 +241,7 @@ func DeletePublicKey(ctx context.Context, host string) error { } // DestroyMonitored destroy the monitored service. -func DestroyMonitored(ctx context.Context, inst spec.Instance, options *spec.MonitoredOptions, timeout uint64) error { +func DestroyMonitored(ctx context.Context, inst spec.Instance, options *spec.MonitoredOptions, timeout uint64, systemdMode spec.SystemdMode) error { e := ctxt.GetInner(ctx).Get(inst.GetManageHost()) logger := ctx.Value(logprinter.ContextKeyLogger).(*logprinter.Logger) @@ -262,13 +262,18 @@ func DestroyMonitored(ctx context.Context, inst spec.Instance, options *spec.Mon logger.Warnf("Monitored deploy dir %s not deleted for TiDB-Ansible imported instance %s.", options.DeployDir, inst.InstanceName()) } - - delPaths = append(delPaths, fmt.Sprintf("/etc/systemd/system/%s-%d.service", spec.ComponentNodeExporter, options.NodeExporterPort)) - delPaths = append(delPaths, fmt.Sprintf("/etc/systemd/system/%s-%d.service", spec.ComponentBlackboxExporter, options.BlackboxExporterPort)) + systemdDir := "/etc/systemd/system/" + sudo := true + if systemdMode == spec.UserMode { + systemdDir = "~/.config/systemd/user/" + sudo = false + } + delPaths = append(delPaths, fmt.Sprintf("%s%s-%d.service", systemdDir, spec.ComponentNodeExporter, options.NodeExporterPort)) + delPaths = append(delPaths, fmt.Sprintf("%s%s-%d.service", systemdDir, spec.ComponentBlackboxExporter, options.BlackboxExporterPort)) c := module.ShellModuleConfig{ Command: fmt.Sprintf("rm -rf %s;", strings.Join(delPaths, " ")), - Sudo: true, // the .service files are in a directory owned by root + Sudo: sudo, // the .service files are in a directory owned by root Chdir: "", UseShell: false, } @@ -303,7 +308,7 @@ func DestroyMonitored(ctx context.Context, inst spec.Instance, options *spec.Mon } // CleanupComponent cleanup the instances -func CleanupComponent(ctx context.Context, delFileMaps map[string]set.StringSet) error { +func CleanupComponent(ctx context.Context, delFileMaps map[string]set.StringSet, sudo bool) error { logger := ctx.Value(logprinter.ContextKeyLogger).(*logprinter.Logger) for host, delFiles := range delFileMaps { e := ctxt.GetInner(ctx).Get(host) @@ -311,7 +316,7 @@ func CleanupComponent(ctx context.Context, delFileMaps map[string]set.StringSet) logger.Debugf("Deleting paths on %s: %s", host, strings.Join(delFiles.Slice(), " ")) c := module.ShellModuleConfig{ Command: fmt.Sprintf("rm -rf %s;", strings.Join(delFiles.Slice(), " ")), - Sudo: true, // the .service files are in a directory owned by root + Sudo: sudo, // the .service files are in a directory owned by root Chdir: "", UseShell: true, } @@ -431,14 +436,21 @@ func DestroyComponent(ctx context.Context, instances []spec.Instance, cls spec.T delPaths.Insert(deployDir) } + systemdDir := "/etc/systemd/system/" + sudo := true + if cls.BaseTopo().GlobalOptions.SystemdMode == spec.UserMode { + systemdDir = "~/.config/systemd/user/" + sudo = false + } + if svc := ins.ServiceName(); svc != "" { - delPaths.Insert(fmt.Sprintf("/etc/systemd/system/%s", svc)) + delPaths.Insert(fmt.Sprintf("%s%s", systemdDir, svc)) } logger.Debugf("Deleting paths on %s: %s", ins.GetManageHost(), strings.Join(delPaths.Slice(), " ")) for _, delPath := range delPaths.Slice() { c := module.ShellModuleConfig{ Command: fmt.Sprintf("rm -rf %s;", delPath), - Sudo: true, // the .service files are in a directory owned by root + Sudo: sudo, // the .service files are in a directory owned by root Chdir: "", UseShell: false, } diff --git a/pkg/cluster/operation/systemd.go b/pkg/cluster/operation/systemd.go index 8c5ba4e2cc..bcad41824d 100644 --- a/pkg/cluster/operation/systemd.go +++ b/pkg/cluster/operation/systemd.go @@ -36,10 +36,12 @@ import ( Mar 09 13:56:19 ip-172-16-5-70 systemd[1]: Started drainer-8249 service. */ -func GetServiceStatus(ctx context.Context, e ctxt.Executor, name string) (active, memory string, since time.Duration, err error) { +func GetServiceStatus(ctx context.Context, e ctxt.Executor, name string, scope string, systemdMode string) (active, memory string, since time.Duration, err error) { c := module.SystemdModuleConfig{ - Unit: name, - Action: "status", + Unit: name, + Action: "status", + Scope: scope, + SystemdMode: systemdMode, } systemd := module.NewSystemdModule(c) // ignore error since stopped service returns exit code 3 diff --git a/pkg/cluster/operation/upgrade.go b/pkg/cluster/operation/upgrade.go index 5f4ab04b3f..5c4c420858 100644 --- a/pkg/cluster/operation/upgrade.go +++ b/pkg/cluster/operation/upgrade.go @@ -53,6 +53,7 @@ func Upgrade( components := topo.ComponentsByUpdateOrder(currentVersion) components = FilterComponent(components, roleFilter) logger := ctx.Value(logprinter.ContextKeyLogger).(*logprinter.Logger) + systemdMode := string(topo.BaseTopo().GlobalOptions.SystemdMode) noAgentHosts := set.NewStringSet() uniqueHosts := set.NewStringSet() @@ -214,7 +215,7 @@ func Upgrade( return nil } - return RestartMonitored(ctx, uniqueHosts.Slice(), noAgentHosts, topo.GetMonitoredOptions(), options.OptTimeout) + return RestartMonitored(ctx, uniqueHosts.Slice(), noAgentHosts, topo.GetMonitoredOptions(), options.OptTimeout, systemdMode) } func upgradeInstance( @@ -252,8 +253,8 @@ func upgradeInstance( return err } } - - if err := restartInstance(ctx, instance, options.OptTimeout, tlsCfg); err != nil && !options.Force { + systemdMode := string(topo.BaseTopo().GlobalOptions.SystemdMode) + if err := restartInstance(ctx, instance, options.OptTimeout, tlsCfg, systemdMode); err != nil && !options.Force { return err } diff --git a/pkg/cluster/spec/instance.go b/pkg/cluster/spec/instance.go index eb7027d9b1..83bd66aa48 100644 --- a/pkg/cluster/spec/instance.go +++ b/pkg/cluster/spec/instance.go @@ -184,13 +184,19 @@ func (i *BaseInstance) InitConfig(ctx context.Context, e ctxt.Executor, opt Glob return nil } + systemdMode := opt.SystemdMode + if len(systemdMode) == 0 { + systemdMode = SystemMode + } + resource := MergeResourceControl(opt.ResourceControl, i.ResourceControl()) systemCfg := system.NewConfig(comp, user, paths.Deploy). WithMemoryLimit(resource.MemoryLimit). WithCPUQuota(resource.CPUQuota). WithLimitCORE(resource.LimitCORE). WithIOReadBandwidthMax(resource.IOReadBandwidthMax). - WithIOWriteBandwidthMax(resource.IOWriteBandwidthMax) + WithIOWriteBandwidthMax(resource.IOWriteBandwidthMax). + WithSystemdMode(string(systemdMode)) // For not auto start if using binlogctl to offline. // bad design @@ -205,8 +211,17 @@ func (i *BaseInstance) InitConfig(ctx context.Context, e ctxt.Executor, opt Glob if err := e.Transfer(ctx, sysCfg, tgt, false, 0, false); err != nil { return errors.Annotatef(err, "transfer from %s to %s failed", sysCfg, tgt) } - cmd := fmt.Sprintf("mv %s /etc/systemd/system/%s-%d.service", tgt, comp, port) - if _, _, err := e.Execute(ctx, cmd, true); err != nil { + systemdDir := "/etc/systemd/system/" + sudo := true + if opt.SystemdMode == UserMode { + systemdDir = "~/.config/systemd/user/" + if err := os.MkdirAll(systemdDir, os.ModePerm); err != nil { + return err + } + sudo = false + } + cmd := fmt.Sprintf("mv %s %s%s-%d.service", tgt, systemdDir, comp, port) + if _, _, err := e.Execute(ctx, cmd, sudo); err != nil { return errors.Annotatef(err, "execute: %s", cmd) } diff --git a/pkg/cluster/spec/spec.go b/pkg/cluster/spec/spec.go index 0ddfbee29b..bd5b99fbdb 100644 --- a/pkg/cluster/spec/spec.go +++ b/pkg/cluster/spec/spec.go @@ -55,6 +55,16 @@ const ( FullOSType FullHostType = "OS" ) +// SystemdMode is the mode used by systemctl +type SystemdMode string + +const ( + // SystemMode system mode + SystemMode SystemdMode = "system" + // UserMode user mode + UserMode SystemdMode = "user" +) + // general role names var ( RoleMonitor = "monitor" @@ -90,6 +100,7 @@ type ( OS string `yaml:"os,omitempty" default:"linux"` Arch string `yaml:"arch,omitempty"` Custom any `yaml:"custom,omitempty" validate:"custom:ignore"` + SystemdMode SystemdMode `yaml:"systemd_mode,omitempty" default:"system"` } // MonitoredOptions represents the monitored node configuration diff --git a/pkg/cluster/spec/tispark.go b/pkg/cluster/spec/tispark.go index bf0105ace5..bee1f1e3c9 100644 --- a/pkg/cluster/spec/tispark.go +++ b/pkg/cluster/spec/tispark.go @@ -256,8 +256,14 @@ func (i *TiSparkMasterInstance) InitConfig( if err := e.Transfer(ctx, sysCfg, tgt, false, 0, false); err != nil { return errors.Annotatef(err, "transfer from %s to %s failed", sysCfg, tgt) } - cmd := fmt.Sprintf("mv %s /etc/systemd/system/%s-%d.service", tgt, comp, port) - if _, _, err := e.Execute(ctx, cmd, true); err != nil { + systemdDir := "/etc/systemd/system/" + sudo := true + if i.topo.BaseTopo().GlobalOptions.SystemdMode == UserMode { + systemdDir = "~/.config/systemd/user/" + sudo = false + } + cmd := fmt.Sprintf("mv %s %s%s-%d.service", tgt, systemdDir, comp, port) + if _, _, err := e.Execute(ctx, cmd, sudo); err != nil { return errors.Annotatef(err, "execute: %s", cmd) } @@ -435,8 +441,14 @@ func (i *TiSparkWorkerInstance) InitConfig( if err := e.Transfer(ctx, sysCfg, tgt, false, 0, false); err != nil { return errors.Annotatef(err, "transfer from %s to %s failed", sysCfg, tgt) } - cmd := fmt.Sprintf("mv %s /etc/systemd/system/%s-%d.service", tgt, comp, port) - if _, _, err := e.Execute(ctx, cmd, true); err != nil { + systemdDir := "/etc/systemd/system/" + sudo := true + if i.topo.BaseTopo().GlobalOptions.SystemdMode == UserMode { + systemdDir = "~/.config/systemd/user/" + sudo = false + } + cmd := fmt.Sprintf("mv %s %s%s-%d.service", tgt, systemdDir, comp, port) + if _, _, err := e.Execute(ctx, cmd, sudo); err != nil { return errors.Annotatef(err, "execute: %s", cmd) } diff --git a/pkg/cluster/task/builder.go b/pkg/cluster/task/builder.go index 96e74cbb09..3d001e7517 100644 --- a/pkg/cluster/task/builder.go +++ b/pkg/cluster/task/builder.go @@ -46,7 +46,7 @@ func NewBuilder(logger *logprinter.Logger) *Builder { func (b *Builder) RootSSH( host string, port int, user, password, keyFile, passphrase string, sshTimeout, exeTimeout uint64, proxyHost string, proxyPort int, proxyUser, proxyPassword, proxyKeyFile, proxyPassphrase string, proxySSHTimeout uint64, - sshType, defaultSSHType executor.SSHType, + sshType, defaultSSHType executor.SSHType, sudo bool, ) *Builder { if sshType == "" { sshType = defaultSSHType @@ -68,6 +68,7 @@ func (b *Builder) RootSSH( proxyPassphrase: proxyPassphrase, proxyTimeout: proxySSHTimeout, sshType: sshType, + sudo: sudo, }) return b } @@ -292,16 +293,17 @@ func (b *Builder) ScaleConfig(clusterName, clusterVersion string, specManager *s } // MonitoredConfig appends a CopyComponent task to the current task collection -func (b *Builder) MonitoredConfig(name, comp, host string, globResCtl meta.ResourceControl, options *spec.MonitoredOptions, deployUser string, tlsEnabled bool, paths meta.DirPaths) *Builder { +func (b *Builder) MonitoredConfig(name, comp, host string, globResCtl meta.ResourceControl, options *spec.MonitoredOptions, deployUser string, tlsEnabled bool, paths meta.DirPaths, systemdMode spec.SystemdMode) *Builder { b.tasks = append(b.tasks, &MonitoredConfig{ - name: name, - component: comp, - host: host, - globResCtl: globResCtl, - options: options, - deployUser: deployUser, - tlsEnabled: tlsEnabled, - paths: paths, + name: name, + component: comp, + host: host, + globResCtl: globResCtl, + options: options, + deployUser: deployUser, + tlsEnabled: tlsEnabled, + paths: paths, + systemdMode: systemdMode, }) return b } @@ -324,12 +326,13 @@ func (b *Builder) SSHKeySet(privKeyPath, pubKeyPath string) *Builder { } // EnvInit appends a EnvInit task to the current task collection -func (b *Builder) EnvInit(host, deployUser string, userGroup string, skipCreateUser bool) *Builder { +func (b *Builder) EnvInit(host, deployUser string, userGroup string, skipCreateUser bool, sudo bool) *Builder { b.tasks = append(b.tasks, &EnvInit{ host: host, deployUser: deployUser, userGroup: userGroup, skipCreateUser: skipCreateUser, + sudo: sudo, }) return b } @@ -363,11 +366,12 @@ func (b *Builder) ClusterOperate( } // Mkdir appends a Mkdir task to the current task collection -func (b *Builder) Mkdir(user, host string, dirs ...string) *Builder { +func (b *Builder) Mkdir(user, host string, sudo bool, dirs ...string) *Builder { b.tasks = append(b.tasks, &Mkdir{ user: user, host: host, dirs: dirs, + sudo: sudo, }) return b } @@ -393,35 +397,38 @@ func (b *Builder) Shell(host, command, cmdID string, sudo bool) *Builder { } // SystemCtl run systemctl on host -func (b *Builder) SystemCtl(host, unit, action string, daemonReload, checkActive bool) *Builder { +func (b *Builder) SystemCtl(host, unit, action string, daemonReload, checkActive bool, scope string) *Builder { b.tasks = append(b.tasks, &SystemCtl{ host: host, unit: unit, action: action, daemonReload: daemonReload, checkactive: checkActive, + scope: scope, }) return b } // Sysctl set a kernel parameter -func (b *Builder) Sysctl(host, key, val string) *Builder { +func (b *Builder) Sysctl(host, key, val string, sudo bool) *Builder { b.tasks = append(b.tasks, &Sysctl{ host: host, key: key, val: val, + sudo: sudo, }) return b } // Limit set a system limit -func (b *Builder) Limit(host, domain, limit, item, value string) *Builder { +func (b *Builder) Limit(host, domain, limit, item, value string, sudo bool) *Builder { b.tasks = append(b.tasks, &Limit{ host: host, domain: domain, limit: limit, item: item, value: value, + sudo: sudo, }) return b } diff --git a/pkg/cluster/task/check.go b/pkg/cluster/task/check.go index 181f38e53c..e08c18ce26 100644 --- a/pkg/cluster/task/check.go +++ b/pkg/cluster/task/check.go @@ -68,7 +68,10 @@ func (c *CheckSys) Execute(ctx context.Context) error { if len(stderr) > 0 && len(stdout) == 0 { return ErrNoOutput } - + sudo := true + if c.topo.BaseTopo().GlobalOptions.SystemdMode == spec.UserMode { + sudo = false + } switch c.check { case CheckTypeSystemInfo: storeResults(ctx, c.host, operator.CheckSystemInfo(c.opt, stdout)) @@ -82,8 +85,8 @@ func (c *CheckSys) Execute(ctx context.Context) error { } results = append( results, - operator.CheckSELinux(ctx, e), - operator.CheckTHP(ctx, e), + operator.CheckSELinux(ctx, e, sudo), + operator.CheckTHP(ctx, e, sudo), ) storeResults(ctx, c.host, results) case CheckTypePort: @@ -98,9 +101,9 @@ func (c *CheckSys) Execute(ctx context.Context) error { // check services results = append( results, - operator.CheckServices(ctx, e, c.host, "irqbalance", false), + operator.CheckServices(ctx, e, c.host, "irqbalance", false, spec.SystemdMode(string(c.topo.BaseTopo().GlobalOptions.SystemdMode))), // FIXME: set firewalld rules in deploy, and not disabling it anymore - operator.CheckServices(ctx, e, c.host, "firewalld", true), + operator.CheckServices(ctx, e, c.host, "firewalld", true, spec.SystemdMode(string(c.topo.BaseTopo().GlobalOptions.SystemdMode))), ) storeResults(ctx, c.host, results) case CheckTypePackage: // check if a command present, and if a package installed diff --git a/pkg/cluster/task/env_init.go b/pkg/cluster/task/env_init.go index 140a09a761..af6d50f3b2 100644 --- a/pkg/cluster/task/env_init.go +++ b/pkg/cluster/task/env_init.go @@ -40,6 +40,7 @@ type EnvInit struct { deployUser string userGroup string skipCreateUser bool + sudo bool } // Execute implements the Task interface @@ -77,8 +78,13 @@ func (e *EnvInit) exec(ctx context.Context) error { } // Authorize - cmd := `su - ` + e.deployUser + ` -c 'mkdir -p ~/.ssh && chmod 700 ~/.ssh'` - _, _, err = exec.Execute(ctx, cmd, true) + var cmd string + if e.sudo { + cmd = `su - ` + e.deployUser + ` -c 'mkdir -p ~/.ssh && chmod 700 ~/.ssh'` + } else { + cmd = `mkdir -p ~/.ssh && chmod 700 ~/.ssh` + } + _, _, err = exec.Execute(ctx, cmd, e.sudo) if err != nil { return wrapError(errEnvInitSubCommandFailed. Wrap(err, "Failed to create '~/.ssh' directory for user '%s'", e.deployUser)) @@ -86,9 +92,15 @@ func (e *EnvInit) exec(ctx context.Context) error { pk := strings.TrimSpace(string(pubKey)) sshAuthorizedKeys := executor.FindSSHAuthorizedKeysFile(ctx, exec) - cmd = fmt.Sprintf(`su - %[1]s -c 'grep $(echo %[2]s) %[3]s || echo %[2]s >> %[3]s && chmod 600 %[3]s'`, - e.deployUser, pk, sshAuthorizedKeys) - _, _, err = exec.Execute(ctx, cmd, true) + if e.sudo { + cmd = fmt.Sprintf(`su - %[1]s -c 'grep $(echo %[2]s) %[3]s || echo %[2]s >> %[3]s && chmod 600 %[3]s'`, + e.deployUser, pk, sshAuthorizedKeys) + } else { + cmd = fmt.Sprintf(`grep $(echo %[1]s) %[2]s || echo %[1]s >> %[2]s && chmod 600 %[2]s`, + pk, sshAuthorizedKeys) + } + + _, _, err = exec.Execute(ctx, cmd, e.sudo) if err != nil { return wrapError(errEnvInitSubCommandFailed. Wrap(err, "Failed to write public keys to '%s' for user '%s'", sshAuthorizedKeys, e.deployUser)) diff --git a/pkg/cluster/task/limits.go b/pkg/cluster/task/limits.go index 6e1ca36724..021049bea2 100644 --- a/pkg/cluster/task/limits.go +++ b/pkg/cluster/task/limits.go @@ -33,6 +33,7 @@ type Limit struct { limit string // limit type item string value string + sudo bool } // Execute implements the Task interface @@ -50,7 +51,7 @@ func (l *Limit) Execute(ctx context.Context) error { l.domain, l.limit, l.item, l.value, limitsFilePath), }, " && ") - stdout, stderr, err := e.Execute(ctx, cmd, true) + stdout, stderr, err := e.Execute(ctx, cmd, l.sudo) ctxt.GetInner(ctx).SetOutputs(l.host, stdout, stderr) if err != nil { return errors.Trace(err) diff --git a/pkg/cluster/task/mkdir.go b/pkg/cluster/task/mkdir.go index 466a07db49..24caae294f 100644 --- a/pkg/cluster/task/mkdir.go +++ b/pkg/cluster/task/mkdir.go @@ -27,6 +27,7 @@ type Mkdir struct { user string host string dirs []string + sudo bool } // Execute implements the Task interface @@ -53,12 +54,22 @@ func (m *Mkdir) Execute(ctx context.Context) error { if xs[i] == "" { continue } - cmd := fmt.Sprintf( - `test -d %[1]s || (mkdir -p %[1]s && chown %[2]s:$(id -g -n %[2]s) %[1]s)`, - strings.Join(xs[:i+1], "/"), - m.user, - ) - _, _, err := exec.Execute(ctx, cmd, true) // use root to create the dir + + cmd := "" + if m.sudo { + cmd = fmt.Sprintf( + `test -d %[1]s || (mkdir -p %[1]s && chown %[2]s:$(id -g -n %[2]s) %[1]s)`, + strings.Join(xs[:i+1], "/"), + m.user, + ) + } else { + cmd = fmt.Sprintf( + `test -d %[1]s || (mkdir -p %[1]s)`, + strings.Join(xs[:i+1], "/"), + ) + } + + _, _, err := exec.Execute(ctx, cmd, m.sudo) // use root to create the dir if err != nil { return errors.Trace(err) } diff --git a/pkg/cluster/task/monitored_config.go b/pkg/cluster/task/monitored_config.go index 65d91dd0e6..d5fee0c703 100644 --- a/pkg/cluster/task/monitored_config.go +++ b/pkg/cluster/task/monitored_config.go @@ -34,14 +34,15 @@ import ( // MonitoredConfig is used to generate the monitor node configuration type MonitoredConfig struct { - name string - component string - host string - globResCtl meta.ResourceControl - options *spec.MonitoredOptions - deployUser string - tlsEnabled bool - paths meta.DirPaths + name string + component string + host string + globResCtl meta.ResourceControl + options *spec.MonitoredOptions + deployUser string + tlsEnabled bool + paths meta.DirPaths + systemdMode spec.SystemdMode } // Execute implements the Task interface @@ -60,7 +61,7 @@ func (m *MonitoredConfig) Execute(ctx context.Context) error { return err } - if err := m.syncMonitoredSystemConfig(ctx, exec, m.component, ports[m.component]); err != nil { + if err := m.syncMonitoredSystemConfig(ctx, exec, m.component, ports[m.component], m.systemdMode); err != nil { return err } @@ -85,7 +86,7 @@ func (m *MonitoredConfig) Execute(ctx context.Context) error { return m.syncMonitoredScript(ctx, exec, m.component, cfg) } -func (m *MonitoredConfig) syncMonitoredSystemConfig(ctx context.Context, exec ctxt.Executor, comp string, port int) (err error) { +func (m *MonitoredConfig) syncMonitoredSystemConfig(ctx context.Context, exec ctxt.Executor, comp string, port int, systemdMode spec.SystemdMode) (err error) { sysCfg := filepath.Join(m.paths.Cache, fmt.Sprintf("%s-%s-%d.service", comp, m.host, port)) // insert checkpoint @@ -97,12 +98,17 @@ func (m *MonitoredConfig) syncMonitoredSystemConfig(ctx context.Context, exec ct return nil } + if len(systemdMode) == 0 { + systemdMode = spec.SystemMode + } + resource := spec.MergeResourceControl(m.globResCtl, m.options.ResourceControl) systemCfg := system.NewConfig(comp, m.deployUser, m.paths.Deploy). WithMemoryLimit(resource.MemoryLimit). WithCPUQuota(resource.CPUQuota). WithIOReadBandwidthMax(resource.IOReadBandwidthMax). - WithIOWriteBandwidthMax(resource.IOWriteBandwidthMax) + WithIOWriteBandwidthMax(resource.IOWriteBandwidthMax). + WithSystemdMode(string(systemdMode)) // blackbox_exporter needs cap_net_raw to send ICMP ping packets if comp == spec.ComponentBlackboxExporter { @@ -116,7 +122,13 @@ func (m *MonitoredConfig) syncMonitoredSystemConfig(ctx context.Context, exec ct if err := exec.Transfer(ctx, sysCfg, tgt, false, 0, false); err != nil { return err } - if outp, errp, err := exec.Execute(ctx, fmt.Sprintf("mv %s /etc/systemd/system/%s-%d.service", tgt, comp, port), true); err != nil { + systemdDir := "/etc/systemd/system/" + sudo := true + if systemdMode == spec.UserMode { + systemdDir = "~/.config/systemd/user/" + sudo = false + } + if outp, errp, err := exec.Execute(ctx, fmt.Sprintf("mv %s %s%s-%d.service", tgt, systemdDir, comp, port), sudo); err != nil { if len(outp) > 0 { fmt.Println(string(outp)) } diff --git a/pkg/cluster/task/ssh.go b/pkg/cluster/task/ssh.go index bfe2732dd2..952aa95455 100644 --- a/pkg/cluster/task/ssh.go +++ b/pkg/cluster/task/ssh.go @@ -45,6 +45,7 @@ type RootSSH struct { proxyPassphrase string // passphrase of the private key file proxyTimeout uint64 // timeout in seconds when connecting via SSH sshType executor.SSHType // the type of SSH chanel + sudo bool } // Execute implements the Task interface @@ -70,7 +71,7 @@ func (s *RootSSH) Execute(ctx context.Context) error { Timeout: time.Second * time.Duration(s.proxyTimeout), } } - e, err := executor.New(s.sshType, s.user != "root", sc) + e, err := executor.New(s.sshType, s.sudo, sc) if err != nil { return err } @@ -93,7 +94,7 @@ func (s RootSSH) String() string { return fmt.Sprintf("RootSSH: user=%s, host=%s, port=%d", s.user, s.host, s.port) } -// UserSSH is used to establish a SSH connection to the target host with generated key +// UserSSH is used to establish an SSH connection to the target host with generated key type UserSSH struct { host string port int diff --git a/pkg/cluster/task/sysctl.go b/pkg/cluster/task/sysctl.go index 6d1462b5cf..249ab53bba 100644 --- a/pkg/cluster/task/sysctl.go +++ b/pkg/cluster/task/sysctl.go @@ -31,6 +31,7 @@ type Sysctl struct { host string key string val string + sudo bool } // Execute implements the Task interface @@ -46,8 +47,7 @@ func (s *Sysctl) Execute(ctx context.Context) error { fmt.Sprintf("echo '%s=%s' >> %s", s.key, s.val, sysctlFilePath), fmt.Sprintf("sysctl -p %s", sysctlFilePath), }, " && ") - - stdout, stderr, err := e.Execute(ctx, cmd, true) + stdout, stderr, err := e.Execute(ctx, cmd, s.sudo) ctxt.GetInner(ctx).SetOutputs(s.host, stdout, stderr) if err != nil { return errors.Trace(err) diff --git a/pkg/cluster/task/systemd.go b/pkg/cluster/task/systemd.go index e7d15f5247..c6b6fb62b8 100644 --- a/pkg/cluster/task/systemd.go +++ b/pkg/cluster/task/systemd.go @@ -29,6 +29,7 @@ type SystemCtl struct { action string daemonReload bool checkactive bool + scope string } // Execute implements the Task interface @@ -43,6 +44,7 @@ func (c *SystemCtl) Execute(ctx context.Context) error { Action: c.action, ReloadDaemon: c.daemonReload, CheckActive: c.checkactive, + Scope: c.scope, } systemd := module.NewSystemdModule(cfg) stdout, stderr, err := systemd.Execute(ctx, e) diff --git a/pkg/cluster/template/systemd/system.go b/pkg/cluster/template/systemd/system.go index 841a15be6a..ec84b034cc 100644 --- a/pkg/cluster/template/systemd/system.go +++ b/pkg/cluster/template/systemd/system.go @@ -36,7 +36,8 @@ type Config struct { GrantCapNetRaw bool // Takes one of no, on-success, on-failure, on-abnormal, on-watchdog, on-abort, or always. // The Template set as always if this is not setted. - Restart string + Restart string + SystemdMode string } // NewConfig returns a Config with given arguments @@ -78,6 +79,12 @@ func (c *Config) WithLimitCORE(core string) *Config { return c } +// WithSystemdMode set the SystemdMode field of Config +func (c *Config) WithSystemdMode(mode string) *Config { + c.SystemdMode = mode + return c +} + // ConfigToFile write config content to specific path func (c *Config) ConfigToFile(file string) error { config, err := c.Config()