@@ -338,89 +338,95 @@ def is_healthy(host, instance_ip, ssh_identity_file) -> bool:
338
338
logger .error (f"systemd status: { systemd_status .stdout } " )
339
339
logger .error (f"systemd error: { systemd_status .stderr } " )
340
340
341
- # Get detailed systemd status
342
- logger .error ("Detailed systemd status:" )
343
- host .run ("sudo systemctl status postgresql -l --no-pager" )
341
+ # Check systemd service unit file
342
+ logger .error ("PostgreSQL systemd service unit file:" )
343
+ result = host .run ("sudo systemctl cat postgresql" )
344
+ logger .error (f"service unit file:\n { result .stdout } \n { result .stderr } " )
344
345
345
- # Check init script logs
346
- logger .error ("Init script logs:" )
347
- host .run ("sudo journalctl -u cloud-init --no-pager" )
346
+ # Check systemd service environment
347
+ logger .error ("PostgreSQL systemd service environment:" )
348
+ result = host .run ("sudo systemctl show postgresql" )
349
+ logger .error (f"service environment:\n { result .stdout } \n { result .stderr } " )
348
350
349
- # Check cloud-init logs
350
- logger .error ("Cloud-init logs:" )
351
- host .run ("sudo cat /var/log/cloud-init-output.log" )
351
+ # Check systemd service dependencies
352
+ logger .error ("PostgreSQL systemd service dependencies:" )
353
+ result = host .run ("sudo systemctl list-dependencies postgresql" )
354
+ logger .error (f"service dependencies:\n { result .stdout } \n { result .stderr } " )
352
355
353
- # Check if init script exists and its contents
354
- logger .error ("Init script status:" )
355
- host .run ("ls -la /tmp/init.sh " )
356
- host . run ( "cat /tmp/init.sh " )
356
+ # Check if service is enabled
357
+ logger .error ("PostgreSQL service enabled status:" )
358
+ result = host .run ("sudo systemctl is-enabled postgresql " )
359
+ logger . error ( f"service enabled status: \n { result . stdout } \n { result . stderr } " )
357
360
358
- # Check PostgreSQL configuration
359
- logger .error ("PostgreSQL configuration :" )
360
- host .run ("sudo cat /etc/ postgresql/*/main/postgresql.conf " )
361
- host . run ( "sudo cat /etc/postgresql/*/main/pg_hba.conf " )
361
+ # Check systemd journal for service execution logs
362
+ logger .error ("Systemd journal entries for PostgreSQL service execution :" )
363
+ result = host .run ("sudo journalctl -u postgresql -n 100 --no-pager " )
364
+ logger . error ( f"systemd journal: \n { result . stdout } \n { result . stderr } " )
362
365
363
- # Check PostgreSQL data directory permissions
364
- logger .error ("PostgreSQL data directory permissions:" )
365
- host .run ("sudo ls -la /var/lib/postgresql/*/main/" )
366
+ # Check systemd journal specifically for ExecStartPre and ExecStart
367
+ logger .error ("Systemd journal entries for ExecStartPre and ExecStart:" )
368
+ result = host .run ("sudo journalctl -u postgresql -n 100 --no-pager | grep -E 'ExecStartPre|ExecStart'" )
369
+ logger .error (f"execution logs:\n { result .stdout } \n { result .stderr } " )
366
370
367
- # Check PostgreSQL startup logs
368
- logger .error ("PostgreSQL startup logs:" )
369
- host .run ("sudo cat /var/log/postgresql/postgresql-*.log" )
371
+ # Check systemd journal for any errors
372
+ logger .error ("Systemd journal entries with error level:" )
373
+ result = host .run ("sudo journalctl -u postgresql -n 100 --no-pager -p err" )
374
+ logger .error (f"error logs:\n { result .stdout } \n { result .stderr } " )
370
375
371
- # Check systemd journal for PostgreSQL
372
- logger .error ("Systemd journal for PostgreSQL:" )
373
- host .run ("sudo journalctl -u postgresql -n 100 --no-pager" )
376
+ # Check pre-start script output
377
+ logger .error ("Checking pre-start script output:" )
378
+ result = host .run ("sudo -u postgres /usr/local/bin/postgres_prestart.sh" )
379
+ logger .error (f"pre-start script output:\n { result .stdout } \n { result .stderr } " )
374
380
375
- # Check for any PostgreSQL-related errors in system logs
376
- logger .error ("System logs with PostgreSQL errors:" )
377
- host .run ("sudo journalctl | grep -i postgres | tail -n 100" )
381
+ # Check PostgreSQL logs directory
382
+ logger .error ("Checking PostgreSQL logs directory:" )
383
+ result = host .run ("sudo ls -la /var/log/postgresql/" )
384
+ logger .error (f"log directory contents:\n { result .stdout } \n { result .stderr } " )
378
385
379
- # Check for any disk space issues
380
- logger .error ("Disk space information :" )
381
- host .run ("df -h " )
382
- host . run ( "sudo du -sh /var/lib/ postgresql/* " )
386
+ # Check any existing PostgreSQL logs
387
+ logger .error ("Checking existing PostgreSQL logs :" )
388
+ result = host .run ("sudo cat /var/log/postgresql/*.log " )
389
+ logger . error ( f" postgresql logs: \n { result . stdout } \n { result . stderr } " )
383
390
384
- # Check for any memory issues
385
- logger .error ("Memory information:" )
386
- host .run ("free -h" )
391
+ # Try starting PostgreSQL directly with pg_ctl and capture output
392
+ logger .error ("Attempting to start PostgreSQL directly with pg_ctl:" )
393
+ startup_log = "/tmp/postgres-start.log"
394
+ result = host .run (f"sudo -u postgres /usr/lib/postgresql/bin/pg_ctl -D /var/lib/postgresql/data start -l { startup_log } " )
395
+ logger .error (f"pg_ctl start attempt:\n { result .stdout } \n { result .stderr } " )
387
396
388
- # Check for any process conflicts
389
- logger .error ("Running processes:" )
390
- host .run ("ps aux | grep postgres" )
397
+ # Check the startup log
398
+ logger .error ("PostgreSQL startup log:" )
399
+ result = host .run (f"sudo cat { startup_log } " )
400
+ logger .error (f"startup log contents:\n { result .stdout } \n { result .stderr } " )
391
401
392
- # Check for any port conflicts
393
- logger .error ("Port usage:" )
394
- host .run ("sudo netstat -tulpn | grep 5432" )
395
-
396
- if socket_check .failed :
397
- logger .error ("PostgreSQL socket directory check failed" )
398
- logger .error (f"socket check: { socket_check .stdout } " )
399
- logger .error (f"socket error: { socket_check .stderr } " )
400
-
401
- if pg_isready .failed :
402
- logger .error ("pg_isready check failed" )
403
- logger .error (f"pg_isready output: { pg_isready .stdout } " )
404
- logger .error (f"pg_isready error: { pg_isready .stderr } " )
405
-
406
- # Check PostgreSQL logs for startup issues
407
- logger .error ("PostgreSQL logs:" )
408
- host .run ("sudo cat /var/log/postgresql/postgresql-*.log" )
409
- logger .error ("PostgreSQL systemd status:" )
410
- host .run ("sudo systemctl status postgresql" )
411
- logger .error ("PostgreSQL journal logs:" )
412
- host .run ("sudo journalctl -u postgresql --no-pager" )
413
-
414
- if any (cmd .failed for cmd in [systemd_status , socket_check , pg_isready ]):
415
- return False
416
- else :
417
- cmd = check (host )
418
- if cmd .failed is True :
419
- logger .warning (f"{ service } not ready" )
420
- logger .error (f"{ service } command failed with rc={ cmd .rc } " )
421
- logger .error (f"{ service } stdout: { cmd .stdout } " )
422
- logger .error (f"{ service } stderr: { cmd .stderr } " )
423
- return False
402
+ # Clean up the startup log
403
+ result = host .run (f"sudo rm -f { startup_log } " )
404
+
405
+ # Check PostgreSQL configuration
406
+ logger .error ("PostgreSQL configuration:" )
407
+ result = host .run ("sudo cat /etc/postgresql/postgresql.conf" )
408
+ logger .error (f"postgresql.conf:\n { result .stdout } \n { result .stderr } " )
409
+
410
+ # Check PostgreSQL authentication configuration
411
+ logger .error ("PostgreSQL authentication configuration:" )
412
+ result = host .run ("sudo cat /etc/postgresql/pg_hba.conf" )
413
+ logger .error (f"pg_hba.conf:\n { result .stdout } \n { result .stderr } " )
414
+
415
+ # Check PostgreSQL environment
416
+ logger .error ("PostgreSQL environment:" )
417
+ result = host .run ("sudo -u postgres env | grep POSTGRES" )
418
+ logger .error (f"postgres environment:\n { result .stdout } \n { result .stderr } " )
419
+
420
+ if any (cmd .failed for cmd in [systemd_status , socket_check , pg_isready ]):
421
+ return False
422
+ else :
423
+ cmd = check (host )
424
+ if cmd .failed is True :
425
+ logger .warning (f"{ service } not ready" )
426
+ logger .error (f"{ service } command failed with rc={ cmd .rc } " )
427
+ logger .error (f"{ service } stdout: { cmd .stdout } " )
428
+ logger .error (f"{ service } stderr: { cmd .stderr } " )
429
+ return False
424
430
except Exception as e :
425
431
logger .warning (
426
432
f"Connection failed during { service } check, attempting reconnect..."
0 commit comments