Skip to content

Commit ed1387f

Browse files
committed
WIP: Bintar deps validation script
1 parent e84cb1b commit ed1387f

File tree

11 files changed

+4795
-0
lines changed

11 files changed

+4795
-0
lines changed

runtime_config/bintar_deps/deps_10.11.yaml

Lines changed: 627 additions & 0 deletions
Large diffs are not rendered by default.

runtime_config/bintar_deps/deps_10.5.yaml

Lines changed: 412 additions & 0 deletions
Large diffs are not rendered by default.

runtime_config/bintar_deps/deps_10.6.yaml

Lines changed: 602 additions & 0 deletions
Large diffs are not rendered by default.

runtime_config/bintar_deps/deps_11.4.yaml

Lines changed: 628 additions & 0 deletions
Large diffs are not rendered by default.

runtime_config/bintar_deps/deps_11.6.yaml

Lines changed: 651 additions & 0 deletions
Large diffs are not rendered by default.

runtime_config/bintar_deps/deps_11.7.yaml

Lines changed: 651 additions & 0 deletions
Large diffs are not rendered by default.

runtime_config/bintar_deps/deps_11.8.yaml

Lines changed: 654 additions & 0 deletions
Large diffs are not rendered by default.

scripts/bintars/__init__.py

Whitespace-only changes.

scripts/bintars/common.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
import logging
2+
import tarfile
3+
import subprocess
4+
import shutil
5+
import sys
6+
import re
7+
import os
8+
from typing import Tuple
9+
10+
from pathlib import Path
11+
12+
13+
def setup_logging(level: int):
14+
# ANSI escape codes for colors
15+
RESET = "\033[0m"
16+
GREEN = "\033[32m"
17+
RED = "\033[31m"
18+
YELLOW = "\033[33m"
19+
20+
21+
# Custom log formatter to include colors
22+
class ColoredFormatter(logging.Formatter):
23+
def format(self, record):
24+
if record.levelno == logging.INFO:
25+
color = GREEN
26+
elif record.levelno == logging.ERROR:
27+
color = RED
28+
elif record.levelno == logging.WARNING:
29+
color = YELLOW
30+
else:
31+
color = RESET
32+
33+
# Apply color to the message
34+
record.msg = f"{color}{record.levelname}{RESET}: {record.msg}"
35+
return super().format(record)
36+
37+
# Basic logging configuration
38+
logging.basicConfig(
39+
level=level,
40+
format="%(message)s", # No logger name or timestamp
41+
handlers=[
42+
logging.StreamHandler()
43+
]
44+
)
45+
46+
# Apply the custom formatter
47+
logging.getLogger().handlers[0].setFormatter(ColoredFormatter("%(message)s"))
48+
49+
50+
# Helper functions
51+
def run_command(command):
52+
"""Run a shell command and return the output."""
53+
try:
54+
result = subprocess.run(command, shell=True, check=True, text=True,
55+
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
56+
return result.stdout.strip()
57+
except subprocess.CalledProcessError as e:
58+
logging.error(f"Error running command '{command}': {e} {e.stderr.strip()}")
59+
return None
60+
61+
62+
def _unpack_archive(tarball_path: Path, dst_path: Path):
63+
logging.info(f"Extracting archive {tarball_path}")
64+
with tarfile.open(str(tarball_path), 'r:*') as tar:
65+
tar.extractall(path=str(dst_path), filter='fully_trusted')
66+
67+
68+
def _parse_archive_path(archive_path: Path) -> Tuple[str, str]:
69+
archive_name = archive_path.name
70+
71+
# Removes the last extension (e.g., .gz)
72+
base_name = Path(archive_name).stem
73+
# Check and remove the .tar extension
74+
if base_name.endswith(".tar"):
75+
base_name = Path(base_name).stem
76+
77+
# Let's extract the product version from the archive:
78+
match = re.search('([1-9][0-9]+\\.[0-9]+\\.[0-9]+)', base_name)
79+
if not match:
80+
logging.error(f'Archive name {archive_name} must contain product version')
81+
sys.exit(1)
82+
83+
# Only interested in major and minor version numbers, not point.
84+
version = match.group(0).split('.')
85+
major_minor = f'{version[0]}.{version[1]}'
86+
87+
logging.info(f'Product version (major.minor) {major_minor}')
88+
89+
return base_name, major_minor
90+
91+
92+
def prepare_test_directory(archive_path: Path, tests_path: Path) -> Tuple[Path, str]:
93+
94+
base_name, major_minor = _parse_archive_path(archive_path)
95+
# The archive contains a folder with the same name as the archive.
96+
# We are interested in the contents within that folder, as thats where
97+
# the files are.
98+
files_path = tests_path / base_name
99+
100+
# Cleanup any previous run.
101+
shutil.rmtree(files_path, ignore_errors=True)
102+
103+
# Create the test directory.
104+
tests_path.mkdir(parents=True, exist_ok=True)
105+
106+
_unpack_archive(archive_path, tests_path)
107+
108+
# Sanity check that the archive has maintained its format.
109+
assert files_path.is_dir()
110+
111+
return files_path, major_minor

scripts/bintars/deps_test.py

Lines changed: 256 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,256 @@
1+
import argparse
2+
import os
3+
import re
4+
import sys
5+
from concurrent.futures import ProcessPoolExecutor
6+
from typing import Iterable, Tuple, Generator
7+
import logging
8+
9+
import magic
10+
import yaml
11+
12+
from common import run_command, setup_logging, prepare_test_directory
13+
from pathlib import Path
14+
15+
16+
def check_file_is_elf_binary_callback(file_path: str) -> str | None:
17+
global mime
18+
try:
19+
file_type = mime.from_file(file_path)
20+
if "elf" in file_type.lower(): # Identify ELF files
21+
return file_path
22+
except Exception as e:
23+
logging.error(f"Error checking file {file_path}: {e}")
24+
return None
25+
26+
27+
def start_worker():
28+
global mime
29+
mime = magic.Magic()
30+
31+
32+
def get_file_paths(path: str) -> Generator[str, None, None]:
33+
# Generator to feed file paths to processes.
34+
for root, _, files in os.walk(path):
35+
for file in files:
36+
yield os.path.join(root, file)
37+
38+
39+
def get_executables(path: str):
40+
"""
41+
Recursively searches for ELF executable files and libraries in the given
42+
path using a multiprocess approach (to speed up).
43+
44+
Args:
45+
path (str): Root directory to search.
46+
47+
Returns:
48+
list: List of paths to ELF executables and libraries.
49+
"""
50+
executables = []
51+
52+
# Use ProcessPoolExecutor to process files in parallel
53+
# This offers a 10x speed up compared to single threaded.
54+
with ProcessPoolExecutor(initializer=start_worker,
55+
max_workers=os.cpu_count()) as executor:
56+
results = executor.map(check_file_is_elf_binary_callback,
57+
get_file_paths(path))
58+
59+
# Collect non-None results
60+
executables = [result for result in results if result]
61+
62+
return executables
63+
64+
65+
def get_file_dependencies_callback(file: str) -> Tuple[str, set[str]]:
66+
result = set()
67+
output = run_command(f'readelf -d {file}')
68+
if output is None:
69+
logging.error(f"Failed to check libraries for {file}.")
70+
return file, False
71+
72+
pattern = "Shared library: \\[(\\S*)\\]"
73+
regex_shared_library = re.compile(pattern)
74+
75+
for line in output.splitlines():
76+
# Here is an example line we match:
77+
# 0x0000000000000001 (NEEDED) Shared library: [libsystemd.so.0]
78+
79+
match = regex_shared_library.search(line)
80+
if not match:
81+
continue
82+
library = match.group(1)
83+
result.add(library)
84+
85+
return file, result
86+
87+
88+
def get_dependencies_for_files(files: Iterable[str]) -> dict[str, list[str]]:
89+
with ProcessPoolExecutor(initializer=start_worker,
90+
max_workers=os.cpu_count()) as executor:
91+
results = executor.map(get_file_dependencies_callback, files)
92+
93+
deps = {}
94+
for full_file_path, file_deps in results:
95+
# TODO(cvicentiu) Perhaps this should be marked as a failure.
96+
# Unable to read file dependencies, skip the file.
97+
if file_deps is False:
98+
continue
99+
100+
deps[full_file_path] = file_deps
101+
102+
return deps
103+
104+
105+
def remove_base_path_from_files(dependencies: dict[str, list[str]],
106+
base_path: str) -> dict[str, list[str]]:
107+
"""
108+
For all keys in dependencies, remove the base_path prefix.
109+
"./tests/mariadb-11.6.2-linux-systemd-x86_64/lib/libgalera_smm.so"
110+
becomes
111+
"lib/libgalera_smm.so"
112+
"""
113+
result = {}
114+
for full_file_name, deps in dependencies.items():
115+
# If this assert fails, there is a bug in the testing script.
116+
assert full_file_name.startswith(base_path)
117+
file_name = full_file_name[len(base_path)+1:]
118+
result[file_name] = deps
119+
return result
120+
121+
122+
def dependencies_to_canonical_repr(
123+
dependencies: dict[str, set[str]],
124+
version: str,
125+
base_path: Path
126+
) -> dict[str, dict[str, list[str]]]:
127+
128+
dependencies = remove_base_path_from_files(dependencies, base_path.as_posix())
129+
result = {
130+
'version': version,
131+
'files': {},
132+
}
133+
134+
for file, deps in dependencies.items():
135+
result['files'][file] = list(sorted(deps))
136+
137+
return result
138+
139+
140+
def get_standard_dependencies(path: str):
141+
with open(path, 'r') as spec_file:
142+
return yaml.safe_load(spec_file)
143+
144+
145+
def get_executable_files_dependencies(path: str):
146+
files = get_executables(path)
147+
return get_dependencies_for_files(files)
148+
149+
150+
def compare_versions(archive_deps, standard_deps,
151+
allow_cross_version: bool):
152+
a_version = archive_deps['version']
153+
s_version = standard_deps['version']
154+
155+
if a_version != s_version:
156+
if allow_cross_version:
157+
logging.warn(f'WARNING: version mismatch {a_version} {s_version}')
158+
else:
159+
logging.error(f'version mismatch {a_version} {s_version}')
160+
return True
161+
return False
162+
163+
164+
def compare_dependencies(archive_deps, standard_deps):
165+
error = False
166+
files = archive_deps['files']
167+
control = standard_deps['files']
168+
169+
files_set = set(files.keys())
170+
control_set = set(control.keys())
171+
172+
files_extra = files_set.difference(control_set)
173+
files_missing = control_set.difference(files_set)
174+
common = files_set.intersection(control_set)
175+
176+
if files_extra:
177+
logging.error(f'We have extra files! {files_extra}')
178+
error = True
179+
180+
if files_missing:
181+
logging.error(f'We have missing files from the archive! {files_missing}')
182+
error = True
183+
184+
for file in common:
185+
deps_extra = set(files[file]).difference(control[file])
186+
deps_missing = set(control[file]).difference(files[file])
187+
188+
if deps_extra:
189+
logging.error(f'We have extra deps for {file}! {deps_extra}')
190+
error = True
191+
if deps_missing:
192+
logging.error(f'We have missing deps for {file}! {deps_missing}')
193+
error = True
194+
195+
return error
196+
197+
198+
def main(archive_path: Path,
199+
tests_path: Path,
200+
deps_file: Path,
201+
record: bool,
202+
allow_cross_version: bool):
203+
error = False # track any errors so we can return properly.
204+
205+
files_path, major_minor = prepare_test_directory(archive_path, tests_path)
206+
207+
logging.info("Fetching archive dependencies")
208+
dependencies = get_executable_files_dependencies(files_path)
209+
210+
canonical_deps = dependencies_to_canonical_repr(dependencies,
211+
version=major_minor,
212+
base_path=files_path)
213+
214+
if record:
215+
logging.info(f"Recording new result to {deps_file}")
216+
with open(deps_file, 'w') as f:
217+
yaml.dump(canonical_deps, f)
218+
return
219+
220+
# Validate dependencies.
221+
standard = get_standard_dependencies(deps_file)
222+
223+
error |= compare_versions(canonical_deps, standard, allow_cross_version)
224+
error |= compare_dependencies(canonical_deps, standard)
225+
226+
if error:
227+
logging.error("Some tests failed")
228+
sys.exit(1)
229+
230+
logging.info("All OK")
231+
232+
233+
if __name__ == "__main__":
234+
parser = argparse.ArgumentParser(
235+
prog='bintar_deps.py',
236+
description='Checks/Records bintar files and dependencies')
237+
parser.add_argument('archive',
238+
help='Path to the binary tarball archive')
239+
parser.add_argument('deps_file',
240+
help='Path to YAML file with a list of dependencies')
241+
parser.add_argument('--record', action='store_true',
242+
help='Use the bintar archive to generate a deps file')
243+
parser.add_argument('--test_directory', type=str, default='./tests/',
244+
help='Where to extract the archive and run tests.')
245+
parser.add_argument('--allow_cross_version', action='store_true',
246+
help='Tests pass even if there is a '
247+
'version mismatch between the archive and '
248+
'the deps_file version')
249+
args = parser.parse_args()
250+
251+
setup_logging(logging.INFO)
252+
main(archive_path=Path(args.archive),
253+
tests_path=Path(args.test_directory),
254+
deps_file=Path(args.deps_file),
255+
record=args.record,
256+
allow_cross_version=args.allow_cross_version)

0 commit comments

Comments
 (0)