forked from rancherlabs/support-tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathetcd-join.sh
360 lines (337 loc) · 14.3 KB
/
etcd-join.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
#!/bin/bash
if hash tput 2>/dev/null; then
red=$(tput setaf 1)
green=$(tput setaf 2)
reset=$(tput sgr0)
fi
USAGE='Automatic mode usage: ./etcd-join.sh <ssh user> <remote etcd IP> [path to ssh key for remote box]
Manual mode usage: ./etcd-join.sh MANUAL_MODE'
function grecho() {
echo "${green}$1${reset}"
}
function recho() {
echo "${red}$1${reset}"
}
rootcmd() {
if [[ $EUID -ne 0 ]]; then
grecho "Running as non root user, issuing command with sudo."
sudo $1
EXITCODE="$?"
return ${EXITCODE}
else
$1
EXITCODE="$?"
return ${EXITCODE}
fi
}
sshcmd() {
if [[ ${#REMOTE_SSH_KEY} == 0 ]]; then
ssh -o StrictHostKeyChecking=no -l "${REMOTE_SSH_USER}" "${REMOTE_SSH_IP}" "$1"
else
ssh -o StrictHostKeyChecking=no -i "${REMOTE_SSH_KEY}" -l "${REMOTE_SSH_USER}" "${REMOTE_SSH_IP}" "$1"
fi
}
function askcontinue() {
shopt -s nocasematch
response=''
i=0
while [[ ${response} != 'yes' ]]; do
i=$((i + 1))
if [ $i -gt 10 ]; then
echo "${green}Script has detected a response other than 'yes' more than ten times, aborting script!${reset}"
exit 1
fi
printf "${green}Is it OK to proceed to the next step? Type 'yes' to proceed: ${reset}"
read response
echo
done
shopt -u nocasematch
}
function asksetvar() {
shopt -s nocasematch
response=''
i=0
while [[ ${response} != 'yes' ]]; do
i=$((i + 1))
if [ $i -gt 10 ]; then
grecho "Script has detected a response other than 'continue' more than ten times, aborting!"
exit 1
fi
printf 'Result?: '
read "$1"
declare tmp="$1"
grecho "Is this correct?:${reset} ${!tmp}
${green}Type yes and press enter to continue: "
read response
done
shopt -u nocasematch
recho "$1 has been set to${green} ${!tmp}"
}
function checkpipecmd() {
RC=("${PIPESTATUS[@]}")
if [[ "$2" != "" ]]; then
PIPEINDEX=$2
else
PIPEINDEX=0
fi
if [ "${RC[${PIPEINDEX}]}" != "0" ]; then
echo "${green}$1${reset}"
exit 1
fi
}
function setendpoint() {
if [[ "$REQUIRE_ENDPOINT" =~ ":::" ]]; then
grecho "etcd is listening on ${REQUIRE_ENDPOINT}, no need to pass --endpoints"
ETCD_ADD_MEMBER_CMD="etcdctl --cacert $ETCDCTL_CACERT --cert ${ETCDCTL_CERT} --key ${ETCDCTL_KEY} member add ${ETCD_NAME} --peer-urls=${INITIAL_ADVERTISE_PEER_URL}"
elif [[ -z "$REQUIRE_ENDPOINT" ]]; then
#etcd 3.4 removed netstat from the image and join cmd now fails if you pass any environment variables that are already set.
grecho "No return on REQUIRE_ENDPOINT, no need to set any environment variables."
ETCD_ADD_MEMBER_CMD="etcdctl member add ${ETCD_NAME} --peer-urls=${INITIAL_ADVERTISE_PEER_URL}"
else
grecho "etcd is only listening on ${REQUIRE_ENDPOINT}, we need to pass --endpoints"
ETCD_ADD_MEMBER_CMD="etcdctl --cacert $ETCDCTL_CACERT --cert ${ETCDCTL_CERT} --key ${ETCDCTL_KEY} member --endpoints ${REQUIRE_ENDPOINT} add ${ETCD_NAME} --peer-urls=${INITIAL_ADVERTISE_PEER_URL}"
fi
}
#Help menu
if [[ "$1" == '' ]] || [[ $@ =~ " -h" ]] || [[ $1 == "-h" ]] || [[ $@ =~ " --help" ]] || [[ $1 =~ "--help" ]]; then
grecho "${USAGE}"
exit 1
fi
if [[ $1 != 'MANUAL_MODE' ]] && [[ $2 == '' ]]; then
grecho "${USAGE}"
exit 1
fi
if [[ $1 == 'MANUAL_MODE' ]]; then
MANUAL_MODE=yes
fi
if [ "$(docker ps -a --filter "name=^/etcd-join$" --format '{{.Names}}')" == "etcd-join" ]; then
docker rm -f etcd-join
fi
if [[ -d "/opt/rke/var/lib/etcd" ]]; then
ETCD_DIR="/opt/rke/var/lib/etcd"
elif [[ -d "/var/lib/etcd" ]]; then
ETCD_DIR="/var/lib/etcd"
else
grecho "Unable to locate an etcd directory, either move an old backup back into the normal place for your operating system or create an empty directory. RancherOS/CoreOS is usually /opt/rke/var/lib/etcd/ and everything else uses /var/lib/etcd/ by default."
exit 1
fi
grecho "Found ${ETCD_DIR}, setting ETCD_DIR to this value"
if [[ -d "/opt/rke/etc/kubernetes" ]]; then
CERT_DIR="/opt/rke/etc/kubernetes"
elif [[ -d "/etc/kubernetes" ]]; then
CERT_DIR="/etc/kubernetes"
else
grecho "Unable to locate the kubernetes certificate directory, exiting script!"
exit 1
fi
grecho "Found ${CERT_DIR}, setting CERT_DIR to this value"
#check for runlike container
grecho "Gathering information about your etcd container with runlike"
RUNLIKE=$(docker run --rm -v /var/run/docker.sock:/var/run/docker.sock rancher/etcd-tools etcd)
if [[ $? -ne 0 ]]; then
grecho "runlike container failed to run, aborting script!"
exit 1
fi
if [[ "${MANUAL_MODE}" != "yes" ]]; then
REMOTE_SSH_USER=$1
REMOTE_SSH_IP=$2
REMOTE_SSH_KEY=$3
grecho "Verifying SSH connections..."
echo ssh user: ${REMOTE_SSH_USER}
echo ssh ip: ${REMOTE_SSH_IP}
echo ssh key: ${REMOTE_SSH_KEY}
#echo length ${#REMOTE_SSH_KEY}
if [[ ${#REMOTE_SSH_KEY} == 0 ]]; then
ssh -o StrictHostKeyChecking=no -l "${REMOTE_SSH_USER} ${REMOTE_SSH_IP}" exit
if [[ $? -ne 0 ]]; then
grecho "Unable to connect to remote SSH host, aborting script! Did you set your ssh key\?"
echo
grecho "${USAGE}"
exit 1
fi
else
ssh -o StrictHostKeyChecking=no -i "${REMOTE_SSH_KEY}" -l "${REMOTE_SSH_USER}" "${REMOTE_SSH_IP}" exit
if [[ $? -ne 0 ]]; then
grecho "Unable to connect to remote SSH host, aborting script!"
echo
grecho "${USAGE}"
exit 1
fi
fi
grecho "SSH test succesful."
echo
fi
if [[ "${MANUAL_MODE}" != "yes" ]]; then
#Check if etcd is actually running on the remote server
grecho "Checking to see if etcd is actually running on the remote host"
REMOTE_ETCD_RUNNING=$(sshcmd "docker ps --filter 'name=^/etcd$' --format '{{.Names}}'")
if [[ ! ${REMOTE_ETCD_RUNNING} == "etcd" ]]; then
grecho "etcd is not running on the remote host! Check that you have the correct host then try again."
exit 1
fi
grecho "etcd is running on the remote host, excellent!"
echo
else
grecho "MANUAL_MODE ENABLED: Please verify that etcd is running the host that you want to join before proceeding!"
recho "Run:${reset} docker ps | grep etcd | grep -v etcd-rolling-snapshots"
askcontinue
fi
export $(docker inspect etcd -f '{{.Config.Env}}' | sed 's/[][]//g')
docker inspect etcd &>/dev/null
if [[ $? -ne 0 ]]; then
grecho "Uable to inspect the etcd container, does it still exist? Aborting script!"
echo
grecho "${USAGE}"
exit 1
fi
grecho "I was able to inspect the local etcd container! Script will proceed..."
echo
recho "Setting etcd restart policy to never restart \"no\""
docker update --restart=no etcd
ETCD_BACKUP_TIME="$(date +%Y-%m-%d--%H%M%S)"
recho "Stopping etcd container"
docker stop etcd
recho "Moving old etcd data from ${ETCD_DIR} to ${ETCD_DIR}-old--${ETCD_BACKUP_TIME}"
rootcmd "mkdir ${ETCD_DIR}-old--${ETCD_BACKUP_TIME}"
checkpipecmd "Failed to created backup etcd directory, exiting script!"
if [[ "$(rootcmd "ls -A ${ETCD_DIR}")" ]]; then
if uname -r | grep rancher; then
recho "${ETCD_DIR} is not empty, moving files out into ${ETCD_DIR}-old--${ETCD_BACKUP_TIME}"
rootcmd "mv ${ETCD_DIR} ${ETCD_DIR}-old--${ETCD_BACKUP_TIME}/"
checkpipecmd "Failed to move etcd directory into backup directory ${ETCD_DIR} -> ${ETCD_DIR}-old--${ETCD_BACKUP_TIME}/, exiting script!"
rootcmd "mkdir ${ETCD_DIR}"
checkpipecmd "Failed to recreate etcd directory, exiting script."
rootcmd "chmod 700 ${ETCD_DIR}"
checkpipecmd "Failed to set permissions on etcd directory to 700, exiting script."
rootcmd "chown root:root ${ETCD_DIR}"
checkpipecmd "Failed to set ownership on etcd directory to root:root, exiting script."
else
recho "${ETCD_DIR} is not empty, moving files out into ${ETCD_DIR}-old--${ETCD_BACKUP_TIME}"
rootcmd "mv ${ETCD_DIR}/* ${ETCD_DIR}-old--${ETCD_BACKUP_TIME}/"
checkpipecmd "Failed to move etcd data files to backup directory ${ETCD_DIR}/* -> ${ETCD_DIR}-old--${ETCD_BACKUP_TIME}/, exiting script!"
fi
else
grecho "${ETCD_DIR} is empty, no need to move any files out."
fi
ETCD_NAME=$(sed 's,^.*name=\([^ ]*\).*,\1,g' <<<$RUNLIKE)
ETCD_HOSTNAME=$(sed 's,^.*--hostname=\([^ ]*\).*,\1,g' <<<$RUNLIKE)
ETCDCTL_ENDPOINT="https://0.0.0.0:2379"
ETCDCTL_CACERT=$(sed 's,^.*ETCDCTL_CACERT=\([^ ]*\).*,\1,g' <<<$RUNLIKE)
ETCDCTL_CERT=$(sed 's,^.*ETCDCTL_CERT=\([^ ]*\).*,\1,g' <<<$RUNLIKE)
ETCDCTL_KEY=$(sed 's,^.*ETCDCTL_KEY=\([^ ]*\).*,\1,g' <<<$RUNLIKE)
ETCD_VERSION=$(sed 's,^.*rancher/coreos-etcd:\([^ ]*\).*,\1,g' <<<$RUNLIKE)
INITIAL_ADVERTISE_PEER_URL=$(sed 's,^.*initial-advertise-peer-urls=\([^ ]*\).*,\1,g' <<<$RUNLIKE)
ETCD_NAME=$(sed 's,^.*name=\([^ ]*\).*,\1,g' <<<$RUNLIKE)
ETCD_INITIAL_CLUSTER=$(echo $RUNLIKE | sed 's/\s\+/\n/g' | grep -- '--initial-cluster=' | sed 's,--initial-cluster=,,g')
INITIAL_CLUSTER_TOKEN=$(sed 's,^.*initial-cluster-token=\([^ ]*\).*,\1,g' <<<$RUNLIKE)
ADVERTISE_CLIENT_URLS=$(sed 's,^.*advertise-client-urls=\([^ ]*\).*,\1,g' <<<$RUNLIKE)
ETCD_IMAGE=$(docker inspect etcd --format='{{.Config.Image}}')
if [[ "${MANUAL_MODE}" != "yes" ]]; then
#CHECK IF WE NEED TO ADD --endpoints TO THE COMMAND
REQUIRE_ENDPOINT=$(sshcmd "docker exec etcd netstat -lpna | grep \:2379 | grep tcp | grep LISTEN | tr -s ' ' | cut -d' ' -f4")
setendpoint
else
grecho "MANUAL_MODE ENABLED: Please run the following command on the etcd host you want to join then paste the results below."
echo "docker exec etcd netstat -lpna | grep \:2379 | grep tcp | grep LISTEN | tr -s ' ' | cut -d' ' -f4"
asksetvar REQUIRE_ENDPOINT
setendpoint
fi
if [[ "${MANUAL_MODE}" != "yes" ]]; then
recho "Connecting to remote etcd and issuing add member command"
export $(sshcmd "docker exec etcd ${ETCD_ADD_MEMBER_CMD} | grep ETCD_INITIAL_CLUSTER=")
recho "ETCD_INITIAL_CLUSTER has been set to ${ETCD_INITIAL_CLUSTER} ${green}<-If this is blank etcd-join will fail"
else
grecho "MANUAL_MODE ENABLED: Please run the following command on the etcd host you want to join then paste the last line of the output below."
grecho "it should look something like this:"
echo "etcd-ip-172-31-11-26=https://172.31.11.26:2380,etcd-ip-172-31-14-134=https://172.31.14.134:2380"
grecho "command below:"
echo "docker exec etcd ${ETCD_ADD_MEMBER_CMD} | grep ETCD_INITIAL_CLUSTER= | sed -r 's,ETCD_INITIAL_CLUSTER=\"(.*)\",\1,g'"
asksetvar ETCD_INITIAL_CLUSTER
askcontinue
fi
RESTORE_RUNLIKE='docker run
--name=etcd-join
--hostname='$ETCD_HOSTNAME'
--env="ETCDCTL_API=3"
--env="ETCDCTL_ENDPOINT='$ETCDCTL_ENDPOINT'"
--env="ETCDCTL_CACERT='$ETCDCTL_CACERT'"
--env="ETCDCTL_CERT='$ETCDCTL_CERT'"
--env="ETCDCTL_KEY='$ETCDCTL_KEY'"
--env="PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
--volume="'${ETCD_DIR}':/var/lib/rancher/etcd/:z"
--volume="'${CERT_DIR}':/etc/kubernetes:z"
--volume="/opt/rke:/opt/rke:z"
--network=host
--label io.rancher.rke.container.name="etcd"
--detach=true '${ETCD_IMAGE}' /usr/local/bin/etcd
--peer-client-cert-auth
--client-cert-auth
--initial-cluster='${ETCD_INITIAL_CLUSTER}'
--initial-cluster-state=existing
--trusted-ca-file='${ETCDCTL_CACERT}'
--listen-client-urls=https://0.0.0.0:2379
--initial-advertise-peer-urls='${INITIAL_ADVERTISE_PEER_URL}'
--listen-peer-urls=https://0.0.0.0:2380
--heartbeat-interval=500
--election-timeout=5000
--data-dir=/var/lib/rancher/etcd/
--initial-cluster-token='${INITIAL_CLUSTER_TOKEN}'
--peer-cert-file='${ETCDCTL_CERT}'
--peer-key-file='${ETCDCTL_KEY}'
--name='${ETCD_NAME}'
--advertise-client-urls='${ADVERTISE_CLIENT_URLS}'
--peer-trusted-ca-file='${ETCDCTL_CACERT}'
--cert-file='${ETCDCTL_CERT}'
--key-file='${ETCDCTL_KEY}''
grecho "Launching etcd-join with the following command:"
echo "${RESTORE_RUNLIKE}"
eval ${RESTORE_RUNLIKE}
echo
grecho "Script sleeping for 10 seconds."
sleep 10
if [ ! "$(docker ps --filter "name=^/etcd-join$" --format '{{.Names}}')" == "etcd-join" ]; then
grecho " etcd-join is not running, something went wrong. Make sure the etcd cluster only has healthy and online members then try again."
exit 1
fi
grecho "etcd-join appears to be running still, this is a good sign. Proceeding with cleanup."
recho "Stopping etcd-join"
docker stop etcd-join
recho "Deleting etcd-join"
docker rm etcd-join
recho "Starting etcd"
docker start etcd
if [ ! "$(docker ps --filter "name=^/etcd$" --format '{{.Names}}')" == "etcd" ]; then
grecho "etcd is not running, something went wrong."
exit 1
fi
grecho "etcd is running on local host."
if [[ "${MANUAL_MODE}" != "yes" ]]; then
grecho "checking members list on remote etcd host."
if [[ $REQUIRE_ENDPOINT =~ ":::" ]]; then
grecho "etcd is listening on ${REQUIRE_ENDPOINT}, no need to pass --endpoints"
sshcmd "docker exec etcd etcdctl member list"
elif [[ -z "$REQUIRE_ENDPOINT" ]]; then
#etcd 3.4 removed netstat from the image and join cmd now fails if you pass any environment variables that are already set.
grecho "No return on REQUIRE_ENDPOINT, no need to set any environment variables."
sshcmd "docker exec etcd etcdctl member list"
else
grecho "etcd is only listening on ${REQUIRE_ENDPOINT}, we need to pass --endpoints"
sshcmd "docker exec etcd etcdctl --endpoints ${REQUIRE_ENDPOINT} member list"
fi
else
grecho "MANUAL_MODE ENABLED: Script has completed, please run the following command on the remote etcd host to verify members list."
if [[ ${REQUIRE_ENDPOINT} =~ ":::" ]]; then
grecho "etcd is listening on ${REQUIRE_ENDPOINT}, no need to pass --endpoints"
echo "docker exec etcd etcdctl member list"
else
grecho "etcd is only listening on ${REQUIRE_ENDPOINT}, we need to pass --endpoints"
echo "docker exec etcd etcdctl --endpoints ${REQUIRE_ENDPOINT} member list"
fi
askcontinue
fi
recho "Setting etcd restart policy to always restart"
docker update --restart=always etcd
recho "Restarting kubelet and kube-apiserver if they exist"
docker restart kubelet kube-apiserver
echo
grecho "Script has completed!"