-
Notifications
You must be signed in to change notification settings - Fork 283
/
Copy pathsles4sap.pm
1466 lines (1097 loc) · 50.4 KB
/
sles4sap.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# SUSE's openQA tests
#
# Copyright 2017-2024 SUSE LLC
# SPDX-License-Identifier: FSFAP
#
# Summary: Functions for SAP tests
# Maintainer: QE-SAP <[email protected]>
## no critic (RequireFilenameMatchesPackage);
package sles4sap;
use Mojo::Base 'opensusebasetest';
use strict;
use warnings;
use testapi;
use serial_terminal qw(select_serial_terminal);
use utils;
use hacluster qw(get_hostname ha_export_logs pre_run_hook save_state wait_until_resources_started script_output_retry_check);
use isotovideo;
use ipmi_backend_utils;
use x11utils qw(ensure_unlocked_desktop);
use power_action_utils qw(power_action);
use Utils::Backends;
use registration qw(add_suseconnect_product);
use version_utils qw(is_sle);
use utils qw(zypper_call);
use Digest::MD5 qw(md5_hex);
use Utils::Systemd qw(systemctl);
use Utils::Logging qw(save_and_upload_log);
use Carp qw(croak);
our @EXPORT = qw(
$instance_password
$systemd_cgls_cmd
$resource_alias
$resource_role
SAPINIT_RE
SYSTEMD_RE
SYSTEMCTL_UNITS_RE
ASE_RESPONSE_FILE
download_hana_assets_from_server
ensure_serialdev_permissions_for_sap
fix_path
set_ps_cmd
set_sap_info
user_change
reset_user_change
get_total_mem
prepare_profile
copy_media
mount_media
add_hostname_to_hosts
test_pids_max
test_forkbomb
test_version_info
test_instance_properties
test_stop
test_start
reboot
check_replication_state
check_hanasr_attr
check_landscape
do_hana_sr_register
do_hana_takeover
install_libopenssl_legacy
startup_type
prepare_sapinst_profile
netweaver_installation_data
prepare_swpm
get_sidadm
get_instance_profile_path
load_ase_env
upload_ase_logs
);
=head1 SYNOPSIS
Package with common methods and default values for tests on SLES for
SAP Applications.
This package inherits from B<opensusebasetest> and should be used as
a class.
=cut
our $prev_console;
our $sapadmin;
our $sid;
our $instance;
our $product;
our $ps_cmd;
our $instance_password = get_var('INSTANCE_PASSWORD', 'Qwerty_123');
our $systemd_cgls_cmd = 'systemd-cgls --no-pager -u SAP.slice';
our $resource_alias = is_sle(">=15-SP4") ? 'cln' : 'msl';
our $resource_role = is_sle(">=15-SP4") ? "Promoted" : "Master";
=head2 SAPINIT_RE & SYSTEMD_RE
$self->SAPINIT_RE();
$self->SAPINIT_RE(qr/some regexp/);
$self->SYSTEMD_RE();
$self->SYSTEMD_RE(qr/some regexp/);
Set or get a regular expressions to test on the F</usr/sap/sapservices> file
whether the SAP workload was started via sapinit or systemd.
=cut
has SAPINIT_RE => undef;
has SYSTEMD_RE => undef;
=head2 SYSTEMCTL_UNITS_RE
$self->SYSTEMCTL_UNITS_RE();
$self->SYSTEMCTL_UNITS_RE(qr/some regexp/);
Set or get a regular expression to test in the output of C<systemctl --list-unit-files>
whether the SAP workload was started via systemd units.
=cut
has SYSTEMCTL_UNITS_RE => undef;
=head2 ASE_RESPONSE_FILE
$self->ASE_RESPONSE_FILE($filename);
Let the class methods know the name of the ASE response file currently in use. It is set to
undef by default. Test modules testing for SAP ASE should set this property before anything else.
=cut
has ASE_RESPONSE_FILE => undef;
=head2 download_hana_assets_from_server
$self->download_hana_assets_from_server()
Download and extract HANA installation media to /sapinst directory of the SUT.
The media location must be provided as ASSET_0 in the job settings and be
available as an uncompressed tar in the factory/other directory of the openQA
server
=cut
sub download_hana_assets_from_server {
my ($self, %params) = @_;
my $target = $params{target} // '/sapinst';
my $nettout = $params{nettout} // 2700;
# Each HANA asset is about 16GB. A ten minute timeout assumes a generous
# 27.3MB/s download speed. Adjust according to expected server conditions.
my $filename = get_required_var('ASSET_0');
my $hana_location = data_url('ASSET_0');
script_run "mkdir $target";
assert_script_run "cd $target";
# checks if asset is already downloaded
my $asset_lock = "/tmp/asset_0";
my $asset_lock_found = script_run "test -e $asset_lock"; # 0 if asset is already downloaded
if ($asset_lock_found) {
assert_script_run "wget -O - $hana_location | tar -xf -", timeout => $nettout;
assert_script_run "touch $asset_lock";
# Skip checksum check if DISABLE_CHECKSUM is set, or if checksum file is not
# part of the archive
my $sap_chksum_file = 'MD5FILE.DAT';
my $chksum_file = 'checksum.md5sum';
my $no_checksum_file = script_run "[[ -f $target/$chksum_file || -f $target/$sap_chksum_file ]]";
return 1 if (get_var('DISABLE_CHECKSUM') || $no_checksum_file);
# Switch to $target to verify copied contents are OK
assert_script_run "pushd $target";
# If SAP provided MD5 sum file is present convert it to the md5sum format
assert_script_run "[[ -f $sap_chksum_file ]] && awk '{print \$2\" \"\$1}' $target/$sap_chksum_file > $target/$chksum_file";
assert_script_run "md5sum -c --quiet $chksum_file", $nettout;
assert_script_run "popd";
}
}
=head2 ensure_serialdev_permissions_for_sap
Derived from 'ensure_serialdev_permissions' function available in 'utils'.
Grant user permission to access serial port immediately as well as persisting
over reboots. Used to ensure that testapi calls like script_run work for the
test user as well as root.
=cut
sub ensure_serialdev_permissions_for_sap {
my ($self) = @_;
# ownership has effect immediately, group change is for effect after
# reboot an alternative https://superuser.com/a/609141/327890 would need
# handling of optional sudo password prompt within the exec
my $serial_group = script_output "stat -c %G /dev/$testapi::serialdev";
assert_script_run "grep '^${serial_group}:.*:${sapadmin}\$' /etc/group || (chown $sapadmin /dev/$testapi::serialdev && gpasswd -a $sapadmin $serial_group)";
}
=head2 fix_path
$self->fix_path( $uri );
Given the path to a CIFS or NFS share in B<$uri>, this method will format the path
so it can be used directly by B<mount(8)>. Returns an array with the protocol name (cifs
or nfs) as the first element, and the formatted path as the second element. Croaks if
an unsupported protocol is passed in B<$uri> or if it cannot be parsed.
=cut
sub fix_path {
my ($self, $var) = @_;
my ($proto, $path) = split m|://|, $var;
my @aux = split '/', $path;
$proto = 'cifs' if ($proto eq 'smb' or $proto eq 'smbfs');
die 'Currently only supported protocols are nfs and smb/smbfs/cifs'
unless ($proto eq 'nfs' or $proto eq 'cifs');
$aux[0] .= ':' if ($proto eq 'nfs');
$aux[0] = '//' . $aux[0] if ($proto eq 'cifs');
$path = join '/', @aux;
return ($proto, $path);
}
=head2 set_ps_cmd
$self->set_ps_cmd( $procname );
Sets in the class instance the B<ps> command to be used to check for the presence
of SAP processes in the SUT. Returns the value of the internal variable B<$ps_cmd>.
=cut
sub set_ps_cmd {
my ($self, $procname) = @_;
$ps_cmd = 'ps auxw | grep ' . $procname . ' | grep -vw grep' if ($procname);
return $ps_cmd;
}
=head2 set_sap_info
$self->set_sap_info( $SID, $instance_number );
SAP software relies on 2 identifiers, the system id (SID) which is a 3-character
identifier, and the instance number. This method receives both via positional
arguments, and sets the internal variables for B<$sid>, B<$instance> and B<$sapadmin>
accordingly. It also sets accessors that depend on B<$sid> and B<$instance>
as well as the product type. Returns the value of B<$sapadmin>.
=cut
sub set_sap_info {
my ($self, $sid_env, $instance_env) = @_;
croak('missing mandatory arg') unless $sid_env and $instance_env;
$sid = uc($sid_env);
$instance = $instance_env;
$sapadmin = lc($sid_env) . 'adm';
$product = get_var('INSTANCE_TYPE', 'HDB'); # Default to HDB as INSTANCE_TYPE is only a required setting in NW tests
if (ref($self)) {
# Only set RE if called in OO mode
$self->SAPINIT_RE(qr|$sid/$product$instance/exe/sapstartsrv|);
$self->SYSTEMD_RE(qr|systemctl.+start SAP${sid}_$instance|);
$self->SYSTEMCTL_UNITS_RE(qr/SAP${sid}_$instance.service/);
}
return ($sapadmin);
}
=head2 user_change
$self->user_change();
Switch user in SUT to the SAP admin account, and grant serialdev
permissions to the SAP admin user.
=cut
sub user_change {
# Allow SAP Admin user to inform status via $testapi::serialdev
# Note: need to be keep here and during product installation to
# ensure compatibility with older generated images
ensure_serialdev_permissions_for_sap;
# Change to SAP Admin user
enter_cmd "su - $sapadmin";
# Change the working shell to bash as SAP's installer sets the admin
# user's shell to /bin/csh and csh has problems with strings that start
# with ~ which can be generated by testapi::hashed_string() leading to
# unexpected failures of script_output() or assert_script_run()
enter_cmd "exec bash";
# We need to change the 'serial_term_prompt' value for 'wait_serial'
my $serial_term_prompt = "$sapadmin> ";
enter_cmd(qq/PS1="$serial_term_prompt"/);
wait_serial(qr/PS1="$serial_term_prompt"/) if testapi::is_serial_terminal;
$testapi::distri->{serial_term_prompt} = "$serial_term_prompt";
}
=head2 reset_user_change
$self->reset_user_change();
Exit from the SAP admin account in SUT and change serialdev
permissions accordingly.
=cut
sub reset_user_change {
# Close the window
enter_cmd "exit";
# Reset 'serial_term_prompt' value for 'wait_serial'
$testapi::distri->{serial_term_prompt} = '# ';
# Rollback changes to $testapi::serialdev
ensure_serialdev_permissions;
}
=head2 get_total_mem
$self->get_total_mem();
Returns the total memory configured in SUT.
=cut
sub get_total_mem {
return get_required_var('QEMURAM') if (is_qemu);
my $mem = script_output q@grep ^MemTotal /proc/meminfo | awk '{print $2}'@;
$mem /= 1024;
return $mem;
}
=head2 is_saptune_installed
is_saptune_installed();
Checks if the B<saptune> package is installed in SUT. Returns true or false.
=cut
sub is_saptune_installed {
my $ret = script_run "rpm -q saptune";
return (defined $ret and $ret == 0);
}
=head2 prepare_profile
$self->prepare_profile( $profile );
Configures with B<saptune> (if available in SUT) or B<sapconf> the SUT according to
a profile passed as argument. B<$profile> must be either B<HANA> or B<NETWEAVER>.
Croaks on failure.
=cut
sub prepare_profile {
my ($self, $profile) = @_;
my @valid_profiles = qw(HANA NETWEAVER SAP-ASE);
return unless (grep /^$profile$/, @valid_profiles);
# Will prepare system with saptune only if it's available.
my $has_saptune = $self->is_saptune_installed();
if ($has_saptune) {
assert_script_run 'saptune service takeover';
assert_script_run "saptune solution apply $profile";
}
elsif (is_sle('15+')) {
# On SLE15+ the sapconf command was dropped
assert_script_run "/usr/lib/sapconf/sapconf start";
}
else {
assert_script_run("sapconf stop && sapconf " . lc($profile));
}
if (!$has_saptune) {
# Restart systemd-logind to ensure that all new connections will have the
# SAP tuning activated. Since saptune v2, the call to 'saptune solution apply'
# above can make the SUT change focus to the x11 console, which may not be ready
# for the systemctl command. If the systemctl command times out, change to
# root-console and try again. Run the first call to systemctl with
# ignore_failure => 1 to avoid stopping the test. Second call runs as usual
my $ret = systemctl('restart systemd-logind.service', ignore_failure => 1);
die "systemctl restart systemd-logind.service failed with retcode: [$ret]" if $ret;
if (!defined $ret) {
select_serial_terminal;
systemctl 'restart systemd-logind.service';
}
}
# X11 workaround only on ppc64le
if (get_var('OFW')) {
# 'systemctl restart systemd-logind' is causing the X11 console to move
# out of tty2 on SLES4SAP-15, which in turn is causing the change back to
# the previous console in post_run_hook() to fail when running on systems
# with DESKTOP=gnome, which is a false positive as the test has already
# finished by that step. The following prevents post_run_hook from attempting
# to return to the console that was set before this test started. For more
# info on why X is running in tty2 on SLES4SAP-15, see bsc#1054782
$prev_console = undef;
# If running in DESKTOP=gnome, systemd-logind restart may cause the graphical console to
# reset and appear in SUT, so need to select 'root-console' again
assert_screen(
[
qw(root-console displaymanager displaymanager-password-prompt generic-desktop
text-login linux-login started-x-displaymanager-info)
], 120);
select_serial_terminal unless (match_has_tag 'root-console');
}
else {
# If running in DESKTOP=gnome, systemd-logind restart may cause the graphical
# console to reset and appear in SUT, so need to select 'root-console' again
# 'root-console' can be re-selected safely even if DESKTOP=textmode
select_serial_terminal;
}
if ($has_saptune) {
assert_script_run 'saptune service takeover';
enter_cmd "saptune solution verify $profile; echo DONE-$\? > /dev/$serialdev";
my $ret = wait_serial qr/DONE-\d/, timeout => 30;
if (!defined $ret) {
# Command timed out. 'saptune service takeover' could have caused the SUT to
# move out of root-console, so select root-console and try again
select_serial_terminal;
$ret = script_run "saptune solution verify $profile";
}
record_soft_failure("poo#57464: 'saptune solution verify' returned warnings or errors! Please check!") if ($ret && !is_qemu());
my $output = script_output 'saptune service status', proceed_on_failure => 1;
if (!defined $output) {
# Command timed out or failed. 'saptune solution verify' could have caused
# the SUT to move out of root-console, so select root-console and try again
select_serial_terminal;
$output = script_output 'saptune service status';
}
record_info("saptune status", $output);
}
}
=head2 _do_mount
_do_mount( $proto, $path, $target);
Performs a call to the mount command (used by both C<mount_media> and C<copy_media>) with
appropriate options depending on the protocol. Function internal to the class.
=cut
sub _do_mount {
my ($proto, $path, $mnt_path) = @_;
# Set some NFS options in case we are using NFS
my $nfs_client_id = md5_hex(get_required_var('JOBTOKEN'));
my $options = 'ro';
if ($proto eq 'nfs') {
my $nfs_timeo = get_var('NFS_TIMEO');
$options = $nfs_timeo ? "timeo=$nfs_timeo,rsize=16384,wsize=16384,ro" : 'rsize=16384,wsize=16384,ro';
# Attempt to force a unique NFSv4 client id
assert_script_run "modprobe nfs nfs4_unique_id=$nfs_client_id";
# Check nfs4_unique_id parameter file exists
assert_script_run 'until ls /sys/module/nfs/parameters/nfs4_unique_id; do sleep 1; done';
}
assert_script_run "mount -t $proto -o $options $path $mnt_path", 90;
# Check NFS client ID
assert_script_run 'cat /sys/module/nfs/parameters/nfs4_unique_id' if ($proto eq 'nfs');
}
=head2 copy_media
$self->copy_media( $proto, $path, $timeout, $target);
Copies installation media in SUT from the share identified by B<$proto> and
B<$path> into the target directory B<$target>. B<$timeout> specifies how long
to wait for the copy to complete.
After installation files are copied, this method will also verify the existence
of a F<checksum.md5sum> file in the target directory and use it to check for the
integrity of the copied files. This test can be skipped by setting to a
true value the B<DISABLE_CHECKSUM> setting in the test.
The method will croak if any of the commands sent to SUT fail.
=cut
sub copy_media {
my ($self, $proto, $path, $nettout, $target) = @_;
my $mnt_path = '/mnt';
my $media_path = "$mnt_path/" . get_required_var('ARCH');
# First create $target and copy media there
assert_script_run "mkdir -p $target"; # create only if dir does not exists
_do_mount($proto, $path, $mnt_path);
$media_path = $mnt_path if script_run "[[ -d $media_path ]]"; # Check if specific ARCH subdir exists
my $rsync = 'rsync -azr --info=progress2';
record_info 'rsync stats (dry-run)', script_output("$rsync --dry-run --stats $media_path/ $target/", proceed_on_failure => 1);
assert_script_run "$rsync $media_path/ $target/", $nettout;
# Unmount the share, as we don't need it anymore
assert_script_run "umount $mnt_path";
# Skip checksum check if DISABLE_CHECKSUM is set, or if no
# checksum.md5sum file was copied to the $target directory
# NOTE: checksum is generated with this command: "find . -type f -exec md5sum {} \; > checksum.md5sum"
my $chksum_file = 'checksum.md5sum';
my $no_checksum_file = script_run "[[ -f $target/$chksum_file ]]";
return 1 if (get_var('DISABLE_CHECKSUM') || $no_checksum_file);
# Switch to $target to verify copied contents are OK
assert_script_run "pushd $target";
# We can't check the checksum file itself as well as the clustered NFS share part
assert_script_run "sed -i -e '/$chksum_file\$/d' -e '/\\/nfs_share/d' $chksum_file";
assert_script_run "md5sum -c --quiet $chksum_file", $nettout;
# Back to previous directory
assert_script_run 'popd';
}
=head2 mount_media
$self->mount_media( $proto, $path, $target );
Mount installation media in SUT from the share identified by B<$proto> and
B<$path> into the target directory B<$target>.
=cut
sub mount_media {
my ($self, $proto, $path, $target) = @_;
my $mnt_path = '/mnt';
my $media_path = "$mnt_path/" . get_required_var('ARCH');
assert_script_run "mkdir $target";
_do_mount($proto, $path, $mnt_path);
$media_path = $mnt_path if script_run "[[ -d $media_path ]]"; # Check if specific ARCH subdir exists
# Create a overlay to "allow" writes to the readonly filesystem
assert_script_run "mkdir /.workdir /.upperdir";
assert_script_run "mount -t overlay overlay -o lowerdir=$media_path,upperdir=/.upperdir,workdir=/.workdir $target";
}
=head2 add_hostname_to_hosts
$self->add_hostname_to_hosts();
Adds the IP address and the hostname of SUT to F</etc/hosts>. Croaks on failure.
=cut
sub add_hostname_to_hosts {
my $netdevice = get_var('SUT_NETDEVICE', 'eth0');
assert_script_run "echo \$(ip -4 addr show dev $netdevice | sed -rne '/inet/s/[[:blank:]]*inet ([0-9\\.]*).*/\\1/p') \$(hostname) >> /etc/hosts";
}
=head2 test_pids_max
$self->test_pids_max();
Checks in SUT that the SAP admin user has no limits in the number of processes
and threads that it can create.
=cut
sub test_pids_max {
# UserTasksMax should be set to "infinity" in /etc/systemd/logind.conf.d/sap.conf
my $uid = script_output "id -u $sapadmin";
# push the command to SUT by write_sut_file API instead of typing string
# it is not stable to type long string especially when high load on worker
# write_sut_file API is more stable in most case
my $test_script = "#!/bin/sh
# Compatibility with cgroup directory changes in old and new versions of systemd
if [[ -d /sys/fs/cgroup/pids/user.slice ]]; then
PREFIX_DIR='/sys/fs/cgroup/pids/user.slice'
elif [[ -d /sys/fs/cgroup/user.slice ]]; then
PREFIX_DIR='/sys/fs/cgroup/user.slice'
else
CMD='echo \"ERROR: no user.slice directory\" && exit 2'
fi
if [[ -n \$PREFIX_DIR ]]; then
CMD=\"cat \$PREFIX_DIR/user-${uid}.slice/pids.max\"
fi
# The systemd-run command generates syslog output that may end up in the console, so save the output to a file
systemd-run --slice user -qt su - $sapadmin -c \"\$CMD\" | tr -d '\\r' | tee /tmp/pids-max
";
write_sut_file '/root/test_script.sh', $test_script;
assert_script_run 'bash -eox pipefail /root/test_script.sh';
my $rc1 = script_run "grep -qx max /tmp/pids-max";
# nproc should be set to \"unlimited\" in /etc/security/limits.d/99-sapsys.conf
# Check that nproc * 2 + 1 >= threads-max
$test_script = "#!/bin/sh
systemd-run --slice user -qt su - $sapadmin -c 'ulimit -u' -s /bin/bash | tail -n 1 | tr -d '\\r' > /tmp/nproc
";
write_sut_file '/root/test_script.sh', $test_script;
assert_script_run 'bash -eox pipefail /root/test_script.sh';
assert_script_run "cat /tmp/nproc && sysctl -n kernel.threads-max";
$test_script = "#!/bin/sh
[[ \$(( \$(< /tmp/nproc) * 2 + 1)) -ge \$(sysctl -n kernel.threads-max) ]]
";
write_sut_file '/root/test_script.sh', $test_script;
my $rc2 = script_run "bash -eox pipefail /root/test_script.sh";
record_soft_failure "bsc#1031355" if ($rc1 or $rc2);
}
=head2 test_forkbomb
$self->test_forkbomb();
Runs a script in SUT to create as many processes as possible, both as the SAP
administrator and as root, and verifies that the SAP admin can create
as many as 99% of the amount of processes that root can. Croaks if any of the
commands sent to SUT fail, and record a soft failure if the SAP admin
user cannot create as many processes as root.
=cut
sub test_forkbomb {
my $script = 'forkbomb.pl';
assert_script_run "curl -f -v " . autoinst_url . "/data/sles4sap/$script -o /tmp/$script; chmod +x /tmp/$script";
# The systemd-run command generates syslog output that may end up in the console,
# so save the output to a file
assert_script_run "systemd-run --slice user -qt su - $sapadmin -c /tmp/$script | tr -d '\\r' > /tmp/user-procs", 600;
my $user_procs = script_output "cat /tmp/user-procs";
my $root_procs = script_output "/tmp/$script", 600;
# Check that the SIDadm user can create at least 99% of the processes root could create
record_soft_failure "bsc#1031355" if ($user_procs < $root_procs * 0.99);
}
=head2 test_version_info
$self->test_version_info();
Runs a B<sapcontrol> command with function B<GetVersionInfo> in SUT. Croaks on failure.
=cut
sub test_version_info {
my $output = script_output "sapcontrol -nr $instance -function GetVersionInfo";
die "sapcontrol: GetVersionInfo API failed\n\n$output" unless ($output =~ /GetVersionInfo[\r\n]+OK/);
}
=head2 test_instance_properties
$self->test_instance_properties();
Runs a B<sapcontrol> command with function B<GetInstanceProperties> and verifies that
the reported properties match with the SID stored in the class instance. Croaks on failure.
=cut
sub test_instance_properties {
my $output = script_output "sapcontrol -nr $instance -function GetInstanceProperties | grep ^SAP";
die "sapcontrol: GetInstanceProperties API failed\n\n$output" unless ($output =~ /SAPSYSTEM.+SAPSYSTEMNAME.+SAPLOCALHOST/s);
$output =~ /SAPSYSTEMNAME, Attribute, ([A-Z][A-Z0-9]{2})/m;
die "sapcontrol: SAP administrator [$sapadmin] does not match with System SID [$1]" if ($1 ne $sid);
}
=head2 test_stop
$self->test_stop();
Tests with B<sapcontrol> and functions B<Stop> and B<StopService> that the instance
and services are successfully stopped. Croaks on failure.
=cut
sub test_stop {
my ($self) = @_;
my $output = script_output "sapcontrol -nr $instance -function Stop";
die "sapcontrol: Stop API failed\n\n$output" unless ($output =~ /Stop[\r\n]+OK/);
# Check if instance is correctly stopped
$self->check_instance_state('gray');
$output = script_output "sapcontrol -nr $instance -function StopService";
die "sapcontrol: StopService API failed\n\n$output" unless ($output =~ /StopService[\r\n]+OK/);
# Check if service is correctly stopped
$self->check_service_state('stop');
}
=head2 test_start
$self->test_start();
Tests with B<sapcontrol> and functions B<Start> and B<StartService> that the instance
and services are succesfully started. Croaks on failure.
=cut
sub test_start {
my ($self) = @_;
my $output = script_output "sapcontrol -nr $instance -function StartService $sid";
die "sapcontrol: StartService API failed\n\n$output" unless ($output =~ /StartService.+OK/s);
# Check if service is correctly started
$self->check_service_state('start');
# Process can take some time to initialize all
sleep 10;
$self->check_instance_state('gray');
$output = script_output "sapcontrol -nr $instance -function Start";
die "sapcontrol: Start API failed\n\n$output" unless ($output =~ /Start[\r\n]+OK/);
$self->check_instance_state('green');
# Show list of processes
script_run $ps_cmd;
}
=head2 check_service_state
$self->check_service_state( $state );
Checks in the process table of SUT for B<sapstartsrv> up to the number of seconds
specified in the B<WAIT_INSTANCE_STOP_TIME> setting (defaults to 300, with a maximum
permitted value of 600). The B<$state> argument can be either B<start> or B<stop>,
and it controls whether this method waits for the process to appear in the process
table after service was started, or disappear from the process table after service was
stopped. Croaks on failure.
=cut
sub check_service_state {
my ($self, $state) = @_;
my $uc_state = uc $state;
my $time_to_wait = get_var('WAIT_INSTANCE_STOP_TIME', 300); # Wait by default for 5 minutes
$time_to_wait = 600 if ($time_to_wait > 600); # Limit this to 10 minutes max
while ($time_to_wait > 0) {
my $output = script_output "pgrep -a sapstartsrv | grep -w $sid", proceed_on_failure => 1;
my @olines = split(/\n/, $output);
# Exit if there is no more process
last if ((@olines == 0) && ($uc_state eq 'STOP'));
if (($output =~ /sapstartsrv/) && ($uc_state eq 'START')) {
die "sapcontrol: wrong number of processes running after a StartService\n\n" . @olines unless ((@olines == 1) || ($time_to_wait > 10));
# Exit if service is started
last;
}
$time_to_wait -= 10;
sleep 10;
}
die "Timed out waiting for SAP service status to turn $state" unless ($time_to_wait > 0);
}
=head2 check_instance_state
$self->check_instance_state( $state );
Uses B<sapcontrol> functions B<GetSystemInstanceList> and B<GetProcessList> to
check for up to the number of seconds defined in the B<WAIT_INSTANCE_STOP_TIME>
setting (defaults to 300, with a maximum permitted value of 600), whether the
instance is in the state specified by the B<$state> argument. This argument can
be either B<green> or B<gray>, and it controls whether this method waits for the
instance to turn to green status after a start or to turn to gray status after a
stop. Croaks on failure.
=cut
sub check_instance_state {
my ($self, $state) = @_;
my $uc_state = uc $state;
my $time_to_wait = get_var('WAIT_INSTANCE_STOP_TIME', 300); # Wait by default for 5 minutes
$time_to_wait = 600 if ($time_to_wait > 600); # Limit this to 10 minutes max
while ($time_to_wait > 0) {
my $output = script_output "sapcontrol -nr $instance -function GetSystemInstanceList";
die "sapcontrol: GetSystemInstanceList: command failed" unless ($output =~ /GetSystemInstanceList[\r\n]+OK/);
# Exit if instance is not running anymore
last if (($output =~ /GRAY/) && ($uc_state eq 'GRAY'));
if ((($output =~ /GREEN/) && ($uc_state eq 'GREEN')) || ($uc_state eq 'GRAY')) {
$output = script_output "sapcontrol -nr $instance -function GetProcessList | grep -E -i ^[a-z]", proceed_on_failure => 1;
die "sapcontrol: GetProcessList: command failed" unless ($output =~ /GetProcessList[\r\n]+OK/);
my $failing_services = 0;
for my $line (split(/\n/, $output)) {
next if ($line =~ /GetProcessList|OK|^name/);
$failing_services++ if ($line !~ /$uc_state/);
}
last unless $failing_services;
}
$time_to_wait -= 10;
sleep 10;
}
die "Timed out waiting for SAP instance status to turn $uc_state" unless ($time_to_wait > 0);
}
=head2 check_replication_state
$self->check_replication_state();
Check status of the HANA System Replication by running the
B<systemReplicationStatus.py> script in SUT. Waits for 5 minutes for
HANA System Replication to be in Active state or croaks on timeout.
Note: can only be run on active node in the cluster.
B<systemReplicationStatus.py> return codes are:
10: No System Replication
11: Error
12: Unknown
13: Initializing
14: Syncing
15: Active
=cut
sub check_replication_state {
my ($self) = @_;
my $sapadm = $self->set_sap_info(get_required_var('INSTANCE_SID'), get_required_var('INSTANCE_ID'));
# Wait by default for 10 minutes
my $time_to_wait = 600;
my $cmd = "su - $sapadm -c 'python exe/python_support/systemReplicationStatus.py'";
# Replication check can only be done on PRIMARY node
my $output = script_output($cmd, proceed_on_failure => 1, timeout => 200);
return if $output !~ /mode:[\r\n\s]+PRIMARY/;
# Loop until ACTIVE state or timeout is reached
while ($time_to_wait > 0) {
my $is_active = script_run($cmd);
# Exit if replication is in state "Active"
last if $is_active eq '15';
$time_to_wait -= 10;
sleep 10;
}
die 'Timed out waiting for HANA System Replication to turn Active' unless ($time_to_wait > 0);
}
=head2 check_hanasr_attr
$self->check_hanasr_attr();
Runs B<SAPHanaSR-showAttr> and checks in its output for up to a timeout
specified in the named argument B<timeout> (defaults to 90 seconds) that
the sync_state is B<SOK>. It also checks that no B<SFAIL> sync_status is
present in the output. Finishes by printing the full output of
B<SAPHanaSR-showAttr>. This method will only fail if B<SAPHanaSR-showAttr>
returns a non-zero return value.
=cut
sub check_hanasr_attr {
my ($self, %args) = @_;
$args{timeout} //= 90;
my $looptime = bmwqemu::scale_timeout($args{timeout});
my $out;
while ($out = script_output 'SAPHanaSR-showAttr') {
last if ($out =~ /SOK/ && $out !~ /SFAIL/);
sleep 5;
$looptime -= 5;
last if ($looptime <= 0);
}
record_info 'SOK not found', "sync_state is not in SOK after $args{timeout} seconds"
if ($looptime <= 0 && $out !~ /SOK/);
record_info 'SFAIL', "One of the HANA nodes still has SFAIL sync_state after $args{timeout} seconds"
if ($looptime <= 0 && $out =~ /SFAIL/);
record_info 'SAPHanaSR-showAttr', $out;
}
=head2 check_landscape
$self->check_landscape();
Runs B<lanscapeHostConfiguration.py> and records the information.
=cut
sub check_landscape {
my ($self, %args) = @_;
my $looptime = bmwqemu::scale_timeout($args{timeout} // 90);
my $sapadm = $self->set_sap_info(get_required_var('INSTANCE_SID'), get_required_var('INSTANCE_ID'));
# Use proceed_on_failure => 1 on call as landscapeHostConfiguration.py returns non zero value on success
my $out = script_output("su - $sapadm -c 'python exe/python_support/landscapeHostConfiguration.py'", proceed_on_failure => 1);
record_info 'landscapeHostConfiguration', $out;
die 'Overall host status not OK' unless ($out =~ /overall host status: ok/i);
}
=head2 reboot
$self->reboot();
Restart the SUT and reconnect to the console right after.
=cut
sub reboot {
my ($self) = @_;
if (is_ipmi) {
power_action('reboot', textmode => 1, keepconsole => 1);
# wait to not assert linux-login while system goes down
switch_from_ssh_to_sol_console;
wait_still_screen(30);
$self->wait_boot(textmode => 1, nologin => get_var('NOAUTOLOGIN', '0'));
}
elsif (is_pvm_hmc) {
power_action('reboot', textmode => 1);
reconnect_mgmt_console;
$self->wait_boot(textmode => 1, nologin => get_var('NOAUTOLOGIN', '0'));
}
else {
power_action('reboot', textmode => 1);
$self->wait_boot(nologin => 1, bootloader_time => 300);
}
select_serial_terminal;
}
=head2 do_hana_sr_register
$self->do_hana_sr_register( node => $node );
Register current HANA node to the node specified by the named argument B<node>. With the named
argument B<proceed_on_failure> set to 1, method will use B<script_run> and return the return
value of the B<script_run> call even if sr_register command fails, otherwise B<assert_script_run>
is used and the method croaks on failure.
=cut
sub do_hana_sr_register {
my ($self, %args) = @_;
my $current_node = get_hostname;
my $instance_id = get_required_var('INSTANCE_ID');
my $sid = get_required_var('INSTANCE_SID');
my $sapadm = $self->set_sap_info($sid, $instance_id);
# Node name is mandatory
die 'Node name should be set' if !defined $args{node};
# We may want to check cluster state without stopping the test
my $cmd = (defined $args{proceed_on_failure} && $args{proceed_on_failure} == 1) ? \&script_run : \&assert_script_run;
return ($cmd->("su - $sapadm -c 'hdbnsutil -sr_register --name=$current_node --remoteHost=$args{node} --remoteInstance=$instance_id --replicationMode=sync --operationMode=logreplay'"));
}
=head2 do_hana_takeover
$self->do_hana_takeover( node => $node [, manual_takeover => $manual_takeover] [, cluster => $cluster] [, timeout => $timeout] );
Do a takeover/takeback on a HANA cluster.
Set B<$node> to the node where HANA is/should be the primary server.
Set B<$manual_takeover> to true, so the method performs a manual rather than an automatic
takeover. Defaults to false.
Set B<$cluster> to true so the method runs also a C<crm resource cleanup>. Defaults to false.
Set B<$timeout> to the amount of seconds the internal calls will wait for. Defaults to 300 seconds.
=cut
sub do_hana_takeover {
# No need to do anything if AUTOMATED_REGISTER is set
return if check_var('AUTOMATED_REGISTER', 'true');
my ($self, %args) = @_;
my $instance_id = get_required_var('INSTANCE_ID');
my $sid = get_required_var('INSTANCE_SID');
my $sapadm = $self->set_sap_info($sid, $instance_id);
$args{timeout} //= 300;
# Node name is mandatory
die 'Node name should be set' if !defined $args{node};
# Do the takeover/failback
assert_script_run "su - $sapadm -c 'hdbnsutil -sr_takeover'" if ($args{manual_takeover});
my $res = $self->do_hana_sr_register(node => $args{node}, proceed_on_failure => 1);
if (defined $res && $res != 0) {
record_info "System not ready", "HANA has not finished starting as master/slave in the HA stack";
wait_until_resources_started(timeout => ($args{timeout} * 3));
save_state;
$self->check_replication_state;
$self->check_hanasr_attr;
script_run 'grep -E "expected_votes|two_node" /etc/corosync/corosync.conf';
$self->do_hana_sr_register(node => $args{node});
}
sleep bmwqemu::scale_timeout(10);
if ($args{cluster}) {
my $hana_resource = "rsc_SAPHanaCtl_${sid}_HDB$instance_id";