From e506c7ff1815b86ea70308aaeff31d3442bf1e86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Fern=C3=A1ndez=20Rodr=C3=ADguez?= Date: Wed, 30 Jul 2025 19:12:45 +0200 Subject: [PATCH] [cleaner] ignores hostnames in SSL certificates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pablo Fernández Rodríguez --- sos/cleaner/__init__.py | 45 +++++++++++++++++++++++++++++--- sos/cleaner/archives/__init__.py | 30 +++++++++++++++++++-- sos/cleaner/archives/sos.py | 3 ++- sos/collector/__init__.py | 10 +++++++ sos/report/__init__.py | 12 ++++++++- sos/utilities.py | 23 ++++++++++++++++ tests/unittests/cleaner_tests.py | 3 ++- 7 files changed, 117 insertions(+), 9 deletions(-) diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py index 4a1470b54e..3970f46934 100644 --- a/sos/cleaner/__init__.py +++ b/sos/cleaner/__init__.py @@ -34,7 +34,8 @@ SoSCollectorDirectory) from sos.cleaner.archives.generic import DataDirArchive, TarballArchive from sos.cleaner.archives.insights import InsightsArchive -from sos.utilities import (get_human_readable, import_module, ImporterHelper) +from sos.utilities import (get_human_readable, import_module, + ImporterHelper, is_executable) # an auxiliary method to kick off child processes over its instances @@ -96,7 +97,8 @@ class SoSCleaner(SoSComponent): 'no_update': False, 'keep_binary_files': False, 'target': '', - 'usernames': [] + 'usernames': [], + 'treat_certificates': 'obfuscate' } def __init__(self, parser=None, args=None, cmdline=None, in_place=False, @@ -300,6 +302,15 @@ def add_parser_options(cls, parser): clean_grp.add_argument('--usernames', dest='usernames', default=[], action='extend', help='List of usernames to obfuscate') + clean_grp.add_argument('--treat-certificates', default='obfuscate', + choices=['obfuscate', 'keep', 'remove'], + dest='treat_certificates', + help=( + 'How to treat certificate files ' + '[.csr .crt .pem]. Defaults to "obfuscate" ' + 'after convert the file to text. ' + '"Key" certificate files are always ' + 'removed.')) def set_target_path(self, path): """For use by report and collect to set the TARGET option appropriately @@ -320,12 +331,14 @@ def inspect_target_archive(self): for archive in self.archive_types: if archive.type_name == check_type: _arc = archive(self.opts.target, self.tmpdir, - self.opts.keep_binary_files) + self.opts.keep_binary_files, + self.opts.treat_certificates) else: for arc in self.archive_types: if arc.check_is_type(self.opts.target): _arc = arc(self.opts.target, self.tmpdir, - self.opts.keep_binary_files) + self.opts.keep_binary_files, + self.opts.treat_certificates) break if not _arc: return @@ -577,6 +590,30 @@ def obfuscate_report_paths(self): "WARNING: binary files that potentially contain sensitive " "information will NOT be removed from the final archive\n" ) + if (self.opts.treat_certificates == "obfuscate" + and not is_executable("openssl")): + self.opts.treat_certificates = "remove" + self.ui_log.warning( + "WARNING: No `openssl` command available. Replacing " + "`--treat-certificates` from `obfuscate` to `remove`." + ) + if self.opts.treat_certificates == "obfuscate": + self.ui_log.warning( + "WARNING: certificate files that potentially contain " + "sensitive information will be CONVERTED to text and " + "OBFUSCATED in the final archive.\n" + ) + elif self.opts.treat_certificates == "keep": + self.ui_log.warning( + "WARNING: certificate files that potentially contain " + "sensitive information will be KEPT in the final " + "archive as is.\n" + ) + elif self.opts.treat_certificates == "remove": + self.ui_log.warning( + "WARNING: certificate files that potentially contain " + "sensitive information will be REMOVED in the final " + "archive.\n") for report_path in self.report_paths: self.ui_log.info(f"Obfuscating {report_path.archive_path}") self.obfuscate_report(report_path) diff --git a/sos/cleaner/archives/__init__.py b/sos/cleaner/archives/__init__.py index 5f8a8aa082..e918e2e33d 100644 --- a/sos/cleaner/archives/__init__.py +++ b/sos/cleaner/archives/__init__.py @@ -17,7 +17,8 @@ import re from concurrent.futures import ProcessPoolExecutor -from sos.utilities import file_is_binary +from sos.utilities import (file_is_binary, sos_get_command_output, + file_is_certificate) # python older than 3.8 will hit a pickling error when we go to spawn a new @@ -63,7 +64,8 @@ class SoSObfuscationArchive(): is_nested = False prep_files = {} - def __init__(self, archive_path, tmpdir, keep_binary_files): + def __init__(self, archive_path, tmpdir, keep_binary_files, + treat_certificates): self.archive_path = archive_path self.final_archive_path = self.archive_path self.tmpdir = tmpdir @@ -76,6 +78,7 @@ def __init__(self, archive_path, tmpdir, keep_binary_files): self._load_self() self.archive_root = '' self.keep_binary_files = keep_binary_files + self.treat_certificates = treat_certificates self.parsers = () self.log_info( f"Loaded {self.archive_path} as type {self.description}" @@ -173,6 +176,19 @@ def obfuscate_arc_files(self, flist): # don't run the obfuscation on the link, but on the actual # file at some other point. continue + is_certificate = file_is_certificate(filename) + if is_certificate: + if is_certificate == "certificatekey": + # Always remove certificate Key files + self.remove_file(short_name) + continue + if self.treat_certificates == "keep": + continue + if self.treat_certificates == "remove": + self.remove_file(short_name) + continue + if self.treat_certificates == "obfuscate": + self.certificate_to_text(filename) _parsers = [ _p for _p in self.parsers if not any( @@ -289,6 +305,16 @@ def is_tarfile(self): except Exception: return False + def certificate_to_text(self, fname): + """Convert a certificate to text. This is used when cleaner encounters + a certificate file and the option 'treat_certificates' is 'obfuscate'. + """ + self.log_info(f"Converting certificate file '{fname}' to text") + sos_get_command_output( + f"openssl storeutl -noout -text -certs {str(fname)}", + to_file=f"{fname}.text") + os.remove(fname) + def remove_file(self, fname): """Remove a file from the archive. This is used when cleaner encounters a binary file, which we cannot reliably obfuscate. diff --git a/sos/cleaner/archives/sos.py b/sos/cleaner/archives/sos.py index d538e08f4f..bf010ec7aa 100644 --- a/sos/cleaner/archives/sos.py +++ b/sos/cleaner/archives/sos.py @@ -70,7 +70,8 @@ def get_nested_archives(self): arc_name = os.path.join(_path, fname) if 'sosreport-' in fname and tarfile.is_tarfile(arc_name): archives.append(SoSReportArchive(arc_name, self.tmpdir, - self.keep_binary_files)) + self.keep_binary_files, + self.treat_certificates)) return archives diff --git a/sos/collector/__init__.py b/sos/collector/__init__.py index 7a414501fb..23a191eb12 100644 --- a/sos/collector/__init__.py +++ b/sos/collector/__init__.py @@ -130,6 +130,7 @@ class SoSCollector(SoSComponent): 'ssh_user': 'root', 'timeout': 600, 'transport': 'auto', + 'treat_certificates': 'obfuscate', 'verify': False, 'usernames': [], 'upload': False, @@ -516,6 +517,15 @@ def add_parser_options(cls, parser): cleaner_grp.add_argument('--usernames', dest='usernames', default=[], action='extend', help='List of usernames to obfuscate') + cleaner_grp.add_argument('--treat-certificates', default='obfuscate', + choices=['obfuscate', 'keep', 'remove'], + dest='treat_certificates', + help=( + 'How to treat the certificate files ' + '[.csr .crt .pem]. Defaults to "obfuscate"' + ' after convert the file to text. ' + '"Key" certificate files are always ' + 'removed.')) @classmethod def display_help(cls, section): diff --git a/sos/report/__init__.py b/sos/report/__init__.py index 074afcff9a..e33842ec7d 100644 --- a/sos/report/__init__.py +++ b/sos/report/__init__.py @@ -149,7 +149,8 @@ class SoSReport(SoSComponent): 'upload_s3_object_prefix': None, 'upload_target': None, 'add_preset': '', - 'del_preset': '' + 'del_preset': '', + 'treat_certificates': 'obfuscate' } def __init__(self, parser, args, cmdline): @@ -396,6 +397,15 @@ def add_parser_options(cls, parser): cleaner_grp.add_argument('--usernames', dest='usernames', default=[], action='extend', help='List of usernames to obfuscate') + cleaner_grp.add_argument('--treat-certificates', default='obfuscate', + choices=['obfuscate', 'keep', 'remove'], + dest='treat_certificates', + help=( + 'How to treat the certificate files ' + '[.csr .crt .pem]. Defaults to "obfuscate"' + ' after convert the file to text. ' + ' "Key" certificate files are always ' + 'removed.')) @classmethod def display_help(cls, section): diff --git a/sos/utilities.py b/sos/utilities.py index 19c0b923df..8253d42199 100644 --- a/sos/utilities.py +++ b/sos/utilities.py @@ -132,6 +132,29 @@ def convert_bytes(num_bytes): return f"{num_bytes}" +def file_is_certificate(fname): + """Helper to determine if a given file is a certificate or not. + + This is especially helpful for `sos clean`, which cannot obfuscate + certificate files content and instead, by default, will keep them as is. + + :param fname: The full path of the file to check + :type fname: ``str`` + + :returns: The type of the certificate or ``None`` + :rtype: ``string`` or ``None`` + """ + if fname[-4:] in [".csr", ".crt", ".pem"]: + with open(fname, 'r', encoding='utf-8') as f: + content = f.read() + if re.search(r'-----BEGIN CERTIFICATE-----', content): + return "certificate" + if re.search(r'-----BEGIN [A-Z]+ PRIVATE KEY-----', content): + return "certificatekey" + return None + return None + + def file_is_binary(fname): """Helper to determine if a given file contains binary content or not. diff --git a/tests/unittests/cleaner_tests.py b/tests/unittests/cleaner_tests.py index 7551fea033..a661ec292f 100644 --- a/tests/unittests/cleaner_tests.py +++ b/tests/unittests/cleaner_tests.py @@ -339,7 +339,8 @@ def setUp(self): archive_path='tests/test_data/' 'sosreport-cleanertest-2021-08-03-qpkxdid.tar.xz', keep_binary_files=[], - tmpdir='/tmp' + tmpdir='/tmp', + treat_certificates='obfuscate' ) self.host_prepper = HostnamePrepper(SoSOptions(domains=[])) self.ipv4_prepper = IPPrepper(SoSOptions())