From 268383d3688f64705a6f973f2ebe9c896ecd41a4 Mon Sep 17 00:00:00 2001 From: liubo Date: Wed, 27 Mar 2019 11:41:37 +0800 Subject: [PATCH] Over-upgrade from versions prior to 3.0 (#718) --- excessive_rolling_update.yml | 422 +++++++++++++++++++++++++++++++++++ 1 file changed, 422 insertions(+) create mode 100644 excessive_rolling_update.yml diff --git a/excessive_rolling_update.yml b/excessive_rolling_update.yml new file mode 100644 index 000000000..985b0b10c --- /dev/null +++ b/excessive_rolling_update.yml @@ -0,0 +1,422 @@ +--- +# Copyright 2016 PingCAP, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# See the License for the specific language governing permissions and +# limitations under the License. + +# The rolling update playbook of TiDB + +- name: check config locally + hosts: localhost + any_errors_fatal: true + tags: + - always + roles: + - check_config_static + +- name: check system environment + hosts: monitored_servers + any_errors_fatal: true + tags: + - always + roles: + - check_system_dynamic + +- name: gather all facts, and check dest + hosts: all + any_errors_fatal: true + tags: + - always + roles: + - check_config_dynamic + + +- name: rolling update PD cluster + hosts: pd_servers + any_errors_fatal: true + serial: 1 + tags: + - pd + + pre_tasks: + - set_fact: + pd_addr: "{{ ansible_host }}:{{ pd_client_port }}" + + - include_tasks: "common_tasks/get_pd_name.yml" + + - name: display PD name + debug: + var: pd_name + + - name: display PD address + debug: + var: pd_addr + + - include_tasks: "common_tasks/get_pd_leader.yml" + when: not enable_tls|default(false) + + - include_tasks: "common_tasks/get_pd_leader_tls.yml" + when: enable_tls|default(false) + + - include_tasks: "common_tasks/transfer_pd_leader.yml" + + - name: stop PD by supervise + shell: cd {{ deploy_dir }}/scripts && ./stop_pd.sh + when: process_supervision == 'supervise' + + - name: stop PD by systemd + systemd: name=pd.service state=stopped + become: true + when: process_supervision == 'systemd' + + - name: wait until the PD port is down + wait_for: + host: "{{ ansible_host }}" + port: "{{ pd_client_port }}" + state: stopped + msg: "the PD port {{ pd_client_port }} is not down" + + roles: + - pd + + post_tasks: + - name: start PD by supervise + shell: cd {{ deploy_dir }}/scripts && ./start_pd.sh + when: process_supervision == 'supervise' + + - name: start PD by systemd + systemd: name=pd-{{ pd_client_port }}.service state=started + become: true + when: process_supervision == 'systemd' + + - name: wait until the PD port is up + wait_for: + host: "{{ ansible_host }}" + port: "{{ pd_client_port }}" + state: started + msg: "the PD port {{ pd_client_port }} is not up" + + - name: wait until the PD health page is available + uri: + url: "http://{{ ansible_host }}:{{ pd_client_port }}/health" + return_content: yes + register: pd_http_result + until: pd_http_result.status == 200 and 'true' in pd_http_result.content + retries: 12 + delay: 5 + when: not enable_tls|default(false) + + - name: wait until the PD health page is available when enable_tls + uri: + url: "https://{{ ansible_host }}:{{ pd_client_port }}/health" + validate_certs: no + client_cert: "{{ pd_cert_dir }}/pd-server-{{ ansible_host }}.pem" + client_key: "{{ pd_cert_dir }}/pd-server-{{ ansible_host }}-key.pem" + return_content: yes + register: pd_https_result + until: pd_https_result.status == 200 and 'true' in pd_https_result.content + retries: 12 + delay: 5 + when: enable_tls|default(false) + + +- name: rolling update TiKV cluster + hosts: tikv_servers + any_errors_fatal: true + serial: 1 + tags: + - tikv + + pre_tasks: + - include_tasks: "common_tasks/get_pd_tikv_addr.yml" + + - include_tasks: "common_tasks/get_store_id.yml" + when: not enable_tls|default(false) + + - include_tasks: "common_tasks/get_store_id_tls.yml" + when: enable_tls|default(false) + + - include_tasks: "common_tasks/add_evict_leader_scheduler.yml" + + - name: stop TiKV by supervise + shell: cd {{ deploy_dir }}/scripts && ./stop_tikv.sh + when: process_supervision == 'supervise' + + - name: stop TiKV by systemd + systemd: name=tikv-{{ tikv_port }}.service state=stopped + become: true + when: process_supervision == 'systemd' + + - name: wait until the TiKV port is down + wait_for: + host: "{{ ansible_host }}" + port: "{{ tikv_port }}" + state: stopped + msg: "the TiKV port {{ tikv_port }} is not down" + + - command: cat {{ deploy_dir }}/status/tikv.pid + register: old_tikv_pid + ignore_errors: yes + changed_when: false + + - name: display old tikv pid + debug: + msg: "tikv binary or docker pid: {{ old_tikv_pid.stdout }}" + + roles: + - tikv + + post_tasks: + - name: start TiKV by supervise + shell: cd {{ deploy_dir }}/scripts && ./start_tikv.sh + when: process_supervision == 'supervise' + + - name: start TiKV by systemd + systemd: name=tikv-{{ tikv_port }}.service state=started + become: true + when: process_supervision == 'systemd' + + - name: wait until the TiKV port is up + wait_for: + host: "{{ ansible_host }}" + port: "{{ tikv_port }}" + state: started + msg: "the TiKV port {{ tikv_port }} is not up" + + - name: wait until the TiKV status page is available + uri: + url: "http://{{ ansible_host }}:{{ tikv_status_port }}/status" + return_content: yes + register: tikv_http_result + until: tikv_http_result.status == 200 + retries: 12 + delay: 5 + when: not enable_tls|default(false) and (tikv_metric_method is defined and tikv_metric_method != "push") + + - name: wait until the TiKV status page is available when enable_tls + uri: + url: "https://{{ ansible_host }}:{{ tikv_status_port }}/status" + validate_certs: no + client_cert: "{{ tikv_cert_dir }}/tikv-server-{{ ansible_host }}.pem" + client_key: "{{ tikv_cert_dir }}/tikv-server-{{ ansible_host }}-key.pem" + return_content: yes + register: tikv_https_result + until: tikv_https_result.status == 200 + retries: 10 + delay: 5 + when: enable_tls|default(false) and (tikv_metric_method is defined and tikv_metric_method != "push") + + - name: wait until TiKV process is up + wait_for_pid: | + pid_file={{ deploy_dir }}/status/tikv.pid timeout=300 + thread_name_regex='tikv-server' state=present + when: deployment_method == 'binary' and (tikv_metric_method is defined and tikv_metric_method == "push") + + - command: cat {{ deploy_dir }}/status/tikv.pid + register: new_tikv_pid + ignore_errors: yes + changed_when: false + + - name: display new tikv pid + debug: + msg: "tikv binary or docker pid: {{ new_tikv_pid.stdout }}" + + - include_tasks: "common_tasks/remove_evict_leader_scheduler.yml" + + +- name: rolling update pump cluster + hosts: pump_servers + any_errors_fatal: true + serial: 1 + tags: + - pump + + pre_tasks: + - name: stop pump by supervise + shell: cd {{ deploy_dir }}/scripts && ./stop_{{ item }}.sh + with_items: + - pump + when: + - enable_binlog|default(false) + - binlog_version == "cluster" + - process_supervision == 'supervise' + + - name: stop pump by systemd + systemd: name=pump-{{ pump_port }}.service state=stopped + become: true + when: + - enable_binlog|default(false) + - binlog_version == "cluster" + - process_supervision == 'systemd' + + - name: wait until the pump port is down + wait_for: + host: "{{ ansible_host }}" + port: "{{ pump_port }}" + state: stopped + msg: "the pump port {{ pump_port }} is not down" + when: + - enable_binlog|default(false) + - binlog_version == "cluster" + + roles: + - { role: pump_cluster, when: enable_binlog|default(false) and binlog_version == "cluster" } + + post_tasks: + - name: start pump by supervise + shell: cd {{ deploy_dir }}/scripts && ./start_{{ item }}.sh + when: + - enable_binlog|default(false) + - binlog_version == "cluster" + - process_supervision == 'supervise' + with_items: + - pump + + - name: start pump by systemd + systemd: name=pump-{{ pump_port }}.service state=started + become: true + when: + - enable_binlog|default(false) + - binlog_version == "cluster" + - process_supervision == 'systemd' + + - name: wait until the pump port is up + wait_for: + host: "{{ ansible_host }}" + port: "{{ pump_port }}" + state: started + msg: "the pump port {{ pump_port }} is not up" + when: + - enable_binlog|default(false) + - binlog_version == "cluster" + + +- name: rolling update TiDB cluster + hosts: tidb_servers + any_errors_fatal: true + serial: 1 + tags: + - tidb + + pre_tasks: + - name: stop TiDB by supervise + shell: cd {{ deploy_dir }}/scripts && ./stop_tidb.sh + when: process_supervision == 'supervise' + + - name: stop TiDB by systemd + systemd: name=tidb-{{ tidb_port }}.service state=stopped + become: true + when: process_supervision == 'systemd' + + - name: wait until the TiDB port is down + wait_for: + host: "{{ ansible_host }}" + port: "{{ tidb_port }}" + state: stopped + msg: "the TiDB port {{ tidb_port }} is not down" + + - name: stop pump by supervise + shell: cd {{ deploy_dir }}/scripts && ./stop_{{ item }}.sh + when: + - enable_binlog|default(false) + - binlog_version == "kafka" + - process_supervision == 'supervise' + with_items: + - pump + + - name: stop pump by systemd + systemd: name=pump-{{ pump_port }}.service state=stopped + become: true + when: + - enable_binlog|default(false) + - binlog_version == "kafka" + - process_supervision == 'systemd' + + - name: wait until the pump port is down + wait_for: + host: "{{ ansible_host }}" + port: "{{ pump_port }}" + state: stopped + msg: "the pump port {{ pump_port }} is not down" + when: + - enable_binlog|default(false) + - binlog_version == "kafka" + + roles: + - { role: pump_kafka, when: enable_binlog|default(false) and binlog_version == "kafka" } + - { role: tidb } + + post_tasks: + - name: start pump by supervise + shell: cd {{ deploy_dir }}/scripts && ./start_{{ item }}.sh + when: + - enable_binlog|default(false) + - binlog_version == "kafka" + - process_supervision == 'supervise' + with_items: + - pump + + - name: start pump by systemd + systemd: name=pump-{{ pump_port }}.service state=started enabled=no + become: true + when: + - enable_binlog|default(false) + - binlog_version == "kafka" + - process_supervision == 'systemd' + + - name: wait until the pump port is up + wait_for: + host: "{{ ansible_host }}" + port: "{{ pump_port }}" + state: started + msg: "the pump port {{ pump_port }} is not up" + when: + - enable_binlog|default(false) + - binlog_version == "kafka" + + - name: start TiDB by supervise + shell: cd {{ deploy_dir }}/scripts && ./start_tidb.sh + when: process_supervision == 'supervise' + + - name: start TiDB by systemd + systemd: name=tidb-{{ tidb_port }}.service state=started + become: true + when: process_supervision == 'systemd' + + - name: wait until the TiDB port is up + wait_for: + host: "{{ ansible_host }}" + port: "{{ tidb_port }}" + state: started + msg: "the TiDB port {{ tidb_port }} is not up" + + - name: wait until the TiDB status page is available + uri: + url: "http://{{ ansible_host }}:{{ tidb_status_port }}/status" + return_content: yes + register: tidb_http_result + until: tidb_http_result.status == 200 and 'TiDB' in tidb_http_result.content + retries: 12 + delay: 5 + when: not enable_tls|default(false) + + - name: wait until the TiDB status page is available when enable_tls + uri: + url: "https://{{ ansible_host }}:{{ tidb_status_port }}/status" + validate_certs: no + client_cert: "{{ tidb_cert_dir }}/tidb-server-{{ ansible_host }}.pem" + client_key: "{{ tidb_cert_dir }}/tidb-server-{{ ansible_host }}-key.pem" + return_content: yes + register: tidb_https_result + until: tidb_https_result.status == 200 and 'TiDB' in tidb_https_result.content + retries: 10 + delay: 5 + when: enable_tls|default(false)