Skip to content

Commit 8fb38b5

Browse files
authored
Merge pull request #336 from puppetlabs/SOLARCH-674
Fix group letter assignments during upgrade
2 parents 29a0a87 + bcfadfb commit 8fb38b5

21 files changed

+271
-84
lines changed

.github/workflows/test-failover.yaml

Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ on:
2020
description: 'Boolean; whether or not to pause for ssh debugging'
2121
required: true
2222
default: 'false'
23+
log_level:
24+
description: 'Bolt log level'
25+
required: false
26+
default: 'debug'
2327

2428
env:
2529
HONEYCOMB_WRITEKEY: 7f3c63a70eecc61d635917de46bea4e6
@@ -37,13 +41,12 @@ jobs:
3741
strategy:
3842
fail-fast: false
3943
matrix:
40-
architecture:
41-
- "extra-large-with-dr-and-spare-replica"
4244
version:
4345
- "${{ github.event.inputs.version }}"
4446
image:
4547
- "${{ github.event.inputs.image }}"
46-
48+
architecture:
49+
- "extra-large-with-dr"
4750
steps:
4851
- name: 'Start SSH session'
4952
if: ${{ github.event.inputs.ssh-debugging == 'true' }}
@@ -89,7 +92,7 @@ jobs:
8992
echo STEP_START=$(date +%s) >> $GITHUB_ENV
9093
echo ::endgroup::
9194
92-
- name: 'Provision test cluster (specified architecture with added DR)'
95+
- name: 'Provision test cluster (XL with spare replica)'
9396
timeout-minutes: 15
9497
run: |
9598
echo ::group::prepare
@@ -106,13 +109,7 @@ jobs:
106109
--modulepath spec/fixtures/modules \
107110
provider=provision_service \
108111
image=${{ matrix.image }} \
109-
architecture=${{ matrix.architecture }}-with-dr
110-
buildevents cmd $TRACE_ID $STEP_ID 'bolt task run provision::provision_service' -- \
111-
bundle exec bolt bolt task run provision::provision_service \
112-
--modulepath spec/fixtures/modules \
113-
action=provision
114-
platform=${{ matrix.image }} \
115-
vars="role: primary"
112+
architecture=${{ matrix.architecture }}-and-spare-replica
116113
echo ::endgroup::
117114
118115
echo ::group::info:request
@@ -136,7 +133,7 @@ jobs:
136133
timeout-minutes: 120
137134
run: |
138135
buildevents cmd $TRACE_ID $STEP_ID 'bolt plan run peadm_spec::install_test_cluster' -- \
139-
bundle exec bolt plan run peadm_spec::install_test_cluster \
136+
bundle exec bolt plan run peadm_spec::install_test_cluster --log_level ${{ github.event.inputs.log_level }} \
140137
--inventoryfile spec/fixtures/litmus_inventory.yaml \
141138
--modulepath spec/fixtures/modules \
142139
architecture=${{ matrix.architecture }} \
@@ -154,11 +151,9 @@ jobs:
154151
- name: 'Perform failover'
155152
run: |
156153
buildevents cmd $TRACE_ID $STEP_ID 'bolt plan run peadm_spec::perform_failover' -- \
157-
bundle exec bolt plan run peadm_spec::perform_failover \
154+
bundle exec bolt plan run peadm_spec::perform_failover --log_level ${{ github.event.inputs.log_level }} \
158155
--inventoryfile spec/fixtures/litmus_inventory.yaml \
159-
--modulepath spec/fixtures/modules \
160-
platform=${{ matrix.image }} \
161-
vars="role: primary"
156+
--modulepath spec/fixtures/modules
162157
163158
- name: "Honeycomb: Record falover time"
164159
if: ${{ always() }}
@@ -178,20 +173,36 @@ jobs:
178173
done
179174
echo "${HOME}/pause absent, continuing workflow."
180175
176+
- name: Set up yq
177+
uses: frenck/action-setup-yq@v1
178+
with:
179+
version: v4.30.5
180+
181+
- name: 'Update inventory'
182+
run: |
183+
# Remove failed primary
184+
yq -i 'del(.groups[].targets[] | select(.vars.role == "primary"))' spec/fixtures/litmus_inventory.yaml
185+
# Swap primary and replica nodes
186+
sed -i.sedbak 's/primary/__tmp__/;s/spare-replica/__tmp2__/;s/replica/primary/;s/__tmp__/replica/;s/__tmp2__/replica/' \
187+
spec/fixtures/litmus_inventory.yaml
188+
echo ::group::info:inventory
189+
sed -e 's/password: .*/password: "[redacted]"/' < spec/fixtures/litmus_inventory.yaml || true
190+
echo ::endgroup::
191+
181192
- name: 'Upgrade PE on test cluster'
182-
if: ${{ always() && github.event.inputs.version_to_upgrade != '' }}
193+
if: ${{ success() && github.event.inputs.version_to_upgrade != '' }}
183194
timeout-minutes: 120
184195
run: |
185196
buildevents cmd $TRACE_ID $STEP_ID 'bolt plan run peadm_spec::upgrade_test_cluster' -- \
186-
bundle exec bolt plan run peadm_spec::upgrade_test_cluster \
197+
bundle exec bolt plan run peadm_spec::upgrade_test_cluster --log_level ${{ github.event.inputs.log_level }} \
187198
--inventoryfile spec/fixtures/litmus_inventory.yaml \
188199
--modulepath spec/fixtures/modules \
189-
architecture='extra-large-with-dr' \
200+
architecture=${{ matrix.architecture }} \
190201
download_mode='direct' \
191-
version=${{ matrix.version_to_upgrade }}
202+
version=${{ github.event.inputs.version_to_upgrade }}
192203
193204
- name: "Honeycomb: Record upgrade time"
194-
if: ${{ always() && github.event.inputs.version_to_upgrade != '' }}
205+
if: ${{ success() && github.event.inputs.version_to_upgrade != '' }}
195206
run: |
196207
echo ::group::honeycomb
197208
buildevents step $TRACE_ID $STEP_ID $STEP_START 'Upgrade PE on test cluster'

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,4 @@ spec/docker/**/*.tar.gz
3636
spec/docker/**/*.asc
3737
spec/docker/**/files/puppet-enterprise*
3838
spec/docker/.task_cache.json
39+
.vscode/settings.json

functions/assert_supported_architecture.pp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ function peadm::assert_supported_architecture (
3333
# lint:ignore:strict_indent
3434
default: { # Invalid
3535
out::message(inline_epp(@(HEREDOC)))
36-
Invalid architecture! Recieved:
36+
Invalid architecture! Received:
3737
- primary
3838
<% if $replica_host { -%>
3939
- primary-replica

functions/assert_supported_pe_version.pp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ function peadm::assert_supported_pe_version (
1212
if $permit_unsafe_versions {
1313
# lint:ignore:strict_indent
1414
warning(@("WARN"/L))
15-
WARNING: Permitting unsafe PE versions. This is not supported or tested.
15+
WARNING: Permitting unsafe PE versions. This is not supported or tested.
1616
Proceeding with this action could result in a broken PE Infrastructure.
1717
| WARN
1818
# lint:endignore
@@ -21,7 +21,7 @@ function peadm::assert_supported_pe_version (
2121
if (!$supported and $permit_unsafe_versions) {
2222
# lint:ignore:strict_indent
2323
warning(@("WARN"/L))
24-
WARNING: PE version ${version} is NOT SUPPORTED!
24+
WARNING: PE version ${version} is NOT SUPPORTED!
2525
| WARN
2626
# lint:endignore
2727
}

plans/add_replica.pp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,11 @@
55
# @summary Replace a replica host for a Standard or Large architecture.
66
# Supported use cases:
77
# 1: The existing replica is broken, we have a fresh new VM we want to provision the replica to.
8-
# The new replica should have the same certname as the broken one.
98
# @param primary_host - The hostname and certname of the primary Puppet server
109
# @param replica_host - The hostname and certname of the replica VM
11-
# @param replica_postgresql_host - The hostname and certname of the host with the replica PE-PosgreSQL database.
10+
# @param replica_postgresql_host - The hostname and certname of the host with the replica PE-PosgreSQL database.
11+
# @param token_file - (optional) the token file in a different location than the default.
12+
#
1213
# Can be a separate host in an XL architecture, or undef in Standard or Large.
1314
plan peadm::add_replica(
1415
# Standard or Large
@@ -119,7 +120,8 @@
119120
# Race condition, where the provision command checks PuppetDB status and
120121
# probably gets "starting", but fails out because that's not "running".
121122
# Can remove flag when that issue is fixed.
122-
legacy => true,
123+
legacy => false,
124+
# _catch_errors => true, # testing
123125
)
124126

125127
# start puppet service

plans/modify_certificate.pp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
# TODO: convert $add_extensions and $remov_extensions to OIDs, if friendly
2020
# names have been given
2121

22+
out::message("peadm::modify_certificate: primary host: ${primary_target} - ${primary_target.name} - ${primary_target.uri}")
2223
$primary_certname = run_task('peadm::cert_data', $primary_target).first['certname']
2324

2425
# Do the primary first, if it's in the list

plans/subplans/install.pp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@
138138
# lint:ignore:strict_indent
139139
warning(@("HEREDOC"))
140140
WARNING: Target name / hostname mismatch: target ${name} reports ${result['hostname']}
141-
Certificate name will be set to target name. Please ensure target name is correct and resolvable
141+
Certificate name will be set to target name. Please ensure target name is correct and resolvable
142142
|-HEREDOC
143143
# lint:endignore
144144
}

plans/subplans/modify_certificate.pp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@
106106
# The docs are broken, and the process is unclean. Sadface.
107107
run_task('service', $target, { action => 'stop', name => 'pe-puppetserver' })
108108
run_command(@("HEREDOC"/L), $target)
109-
rm -f \
109+
rm -f \
110110
/etc/puppetlabs/puppet/ssl/certs/${certname}.pem \
111111
/etc/puppetlabs/puppet/ssl/private_keys/${certname}.pem \
112112
/etc/puppetlabs/puppet/ssl/public_keys/${certname}.pem \

plans/subplans/prepare_agent.pp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
$agent_target = peadm::get_targets($targets, 1)
99
$primary_target = peadm::get_targets($primary_host, 1)
1010

11+
out::message("Preparing agent ${agent_target} to connect to ${primary_target}")
12+
out::message("agent target ${agent_target} to connect to ${primary_target}")
13+
1114
$dns_alt_names_flag = $dns_alt_names? {
1215
undef => [],
1316
default => ["main:dns_alt_names=${dns_alt_names.join(',')}"],
@@ -80,8 +83,9 @@
8083
8184
# If agent certificate is good but lacks appropriate extensions, plan will still
8285
# regenerate certificate
86+
out::message("primary target: ${primary_target}, certname: ${primary_target.peadm::certname()}, uri: ${primary_target[0].uri}")
8387
run_plan('peadm::modify_certificate', $agent_target,
84-
primary_host => $primary_target.peadm::certname(),
88+
primary_host => $primary_target,
8589
add_extensions => $certificate_extensions,
8690
force_regenerate => $force_regenerate
8791
)

plans/upgrade.pp

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -255,20 +255,45 @@
255255
},
256256
)
257257

258+
# Log the peadm configuration before node manager setup
259+
run_task('peadm::get_peadm_config', $primary_target)
260+
258261
# Update classification. This needs to be done now because if we don't, and
259262
# the PE Compiler node groups are wrong, then the compilers won't be able to
260263
# successfully classify and update
264+
265+
# First, determine the correct hosts for the A and B availability groups
266+
$server_a_host = $cert_extensions.dig($primary_target.peadm::certname(), peadm::oid('peadm_availability_group')) ? {
267+
'A' => $primary_target.peadm::certname(),
268+
default => $replica_target.peadm::certname(),
269+
}
270+
271+
$server_b_host = $server_a_host ? {
272+
$primary_target.peadm::certname() => $replica_target.peadm::certname(),
273+
default => $primary_target.peadm::certname(),
274+
}
275+
276+
$postgresql_a_host = $cert_extensions.dig($primary_postgresql_target.peadm::certname(), peadm::oid('peadm_availability_group')) ? {
277+
'A' => $primary_postgresql_target.peadm::certname(),
278+
default => $replica_postgresql_target.peadm::certname(),
279+
}
280+
281+
$postgresql_b_host = $postgresql_a_host ? {
282+
$primary_postgresql_target.peadm::certname() => $replica_postgresql_target.peadm::certname(),
283+
default => $primary_postgresql_target.peadm::certname(),
284+
}
285+
261286
apply($primary_target) {
262287
class { 'peadm::setup::node_manager_yaml':
263288
primary_host => $primary_target.peadm::certname(),
264289
}
265290

266291
class { 'peadm::setup::node_manager':
267292
primary_host => $primary_target.peadm::certname(),
268-
server_a_host => $primary_target.peadm::certname(),
269-
server_b_host => $replica_target.peadm::certname(),
270-
postgresql_a_host => $primary_postgresql_target.peadm::certname(),
271-
postgresql_b_host => $replica_postgresql_target.peadm::certname(),
293+
server_a_host => $server_a_host,
294+
server_b_host => $server_b_host,
295+
postgresql_a_host => $postgresql_a_host,
296+
postgresql_b_host => $postgresql_b_host,
272297
compiler_pool_address => $compiler_pool_address,
273298
internal_compiler_a_pool_address => $internal_compiler_a_pool_address,
274299
internal_compiler_b_pool_address => $internal_compiler_b_pool_address,

0 commit comments

Comments
 (0)