From 617480fe5de239be16e66194e77471eeb122d8f4 Mon Sep 17 00:00:00 2001 From: Disyer Date: Sun, 24 Sep 2023 13:14:02 +0300 Subject: [PATCH] kernelcollector: Collect source packages as well for stable and mainline versions --- .github/workflows/autobuild-workflow.yml | 6 +- entrypoint.sh | 2 +- kernelcollector/Package | 0 kernelcollector/PackageCollector.py | 439 --------------- kernelcollector/PackageDistribution.py | 130 ----- kernelcollector/PackageList.py | 197 ------- kernelcollector/{Main.py => main.py} | 30 +- kernelcollector/package_collector.py | 507 ++++++++++++++++++ kernelcollector/package_distribution.py | 113 ++++ kernelcollector/package_list.py | 179 +++++++ kernelcollector/{Utils.py => utils.py} | 63 ++- .../{WebhookEmitter.py => webhook.py} | 2 +- run.sh | 2 +- 13 files changed, 878 insertions(+), 792 deletions(-) delete mode 100644 kernelcollector/Package delete mode 100644 kernelcollector/PackageCollector.py delete mode 100644 kernelcollector/PackageDistribution.py delete mode 100644 kernelcollector/PackageList.py rename kernelcollector/{Main.py => main.py} (52%) create mode 100644 kernelcollector/package_collector.py create mode 100644 kernelcollector/package_distribution.py create mode 100644 kernelcollector/package_list.py rename kernelcollector/{Utils.py => utils.py} (52%) rename kernelcollector/{WebhookEmitter.py => webhook.py} (100%) diff --git a/.github/workflows/autobuild-workflow.yml b/.github/workflows/autobuild-workflow.yml index 06cd334..015552b 100644 --- a/.github/workflows/autobuild-workflow.yml +++ b/.github/workflows/autobuild-workflow.yml @@ -7,14 +7,14 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Set up QEMU - uses: docker/setup-qemu-action@v1 + uses: docker/setup-qemu-action@v3 with: platforms: arm,arm64 - name: Install builder plugin id: buildx - uses: docker/setup-buildx-action@v1 + uses: docker/setup-buildx-action@v3 with: version: latest - name: Login to Docker diff --git a/entrypoint.sh b/entrypoint.sh index 40403d4..7ddd2da 100644 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -4,4 +4,4 @@ gpg --import gpg.key # Run the actual program -python -m kernelcollector.Main +python -m kernelcollector.main diff --git a/kernelcollector/Package b/kernelcollector/Package deleted file mode 100644 index e69de29..0000000 diff --git a/kernelcollector/PackageCollector.py b/kernelcollector/PackageCollector.py deleted file mode 100644 index aedd9e8..0000000 --- a/kernelcollector/PackageCollector.py +++ /dev/null @@ -1,439 +0,0 @@ -from bs4 import BeautifulSoup -from . import Utils -import json, logging, tempfile, re, shutil, os, time, uuid, multiprocessing -import requests - -FIND_IMAGE_RM = 'rm -f /lib/modules/$version/.fresh-install' -NEW_FIND_IMAGE_RM = 'rm -rf /lib/modules/$version' -INITRD_IMAGE_RMS = ['rm -f /boot/initrd.img-$version', 'rm -f /var/lib/initramfs-tools/$version'] -DEB_CONTENT_TYPE = 'application/x-debian-package' -DAILY_RELEASE_REGEX = re.compile(r'\d{4}-\d{2}-\d{2}') - -class PackageCollector(object): - - def __init__(self, logger, architectures, pkgList): - self.logger = logger - self.architectures = architectures - self.pkgList = pkgList - self.tmpDir = os.path.join(tempfile.gettempdir(), uuid.uuid4().hex) - self.currentDir = os.getcwd() - self.reloadCache() - - def runAllBuilds(self): - # Get all releases and prereleases - logging.info(f'Current directory is {self.currentDir}') - logging.info('Checking latest versions of the kernel...') - releases, prereleases = self.getAllReleases() - - # At the end of every release candidate cycle, a new kernel version is released. - # Upgrade the prerelease branch if there is no newer prerelease than the current release. - if Utils.releaseToTuple(releases[-1][1:])[0:2] >= Utils.releaseToTuple(prereleases[-1][1:])[0:2]: - prereleases.append(releases[-1]) - - dailyReleases = self.getDailyReleases() - downloaded = False - - # Delete the temporary folder - if os.path.exists(self.tmpDir): - shutil.rmtree(self.tmpDir) - - # Redownload stable build if necessary - release, downloadable_release = self.findDownloadableFiles(releases, 'linux-current') - prerelease, downloadable_prerelease = self.findDownloadableFiles(prereleases, 'linux-beta') - dailyRelease, downloadable_daily_release = self.findDownloadableFiles(dailyReleases, 'linux-devel') - - downloadable = downloadable_release + downloadable_prerelease + downloadable_daily_release - - logging.info(f'Current release: {release}') - logging.info(f'Current release candidate: {prerelease}') - logging.info(f'Current daily build: {dailyRelease}') - self.logger.send_all() - - # Update cache and publish repository - if not downloadable: - return - - # Create the temporary folder - os.makedirs(self.tmpDir) - - # Schedule pool - downloadable_queue = self.splitList(downloadable, multiprocessing.cpu_count()) - downloadable_queue = [q for q in downloadable_queue if q] - worker_count = len(downloadable_queue) - - # Create and run the pool - pool = multiprocessing.Pool(processes=worker_count) - file_caches = pool.map(self.downloadFilesWorker, list(enumerate(downloadable_queue))) - downloaded = any(file_caches) - - # Update the global file cache from the multiprocessing pool - for file_cache in file_caches: - self.fileCache.update(file_cache) - - # Update the cache if necessary - if downloaded: - self.updateCache() - self.publishRepository() - - # Remove temporary folder - if os.path.exists(self.tmpDir): - shutil.rmtree(self.tmpDir) - - def splitList(self, a, n): - k, m = divmod(len(a), n) - return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n)) - - def getAllReleases(self): - # We use the Ubuntu kernel mainline as the build source. - # This method will return a list of releases and prereleases, sorted in ascending order. - with requests.get('https://kernel.ubuntu.com/~kernel-ppa/mainline') as site: - data = site.content - - soup = BeautifulSoup(data, 'html.parser') - prereleases = [] - releases = [] - - for row in soup.findAll('tr'): - tds = row.findAll('td') - - if len(tds) != 5: - continue - - a = tds[1].find('a') - - if not a: - continue - - name = a.text - prerelease = '-rc' in name - - # Some Ubuntu specific kernel versions will have to be skipped, for example 2.6.32-xenial - if len(name) < 2 or name[0] != 'v' or (not name[1].isdigit()) or ('-' in name and not prerelease) or (name.count('-') > 1): - continue - - # Since we're reading links, they might have trailing slashes - name = name.rstrip('/') - - if prerelease: - prereleases.append(name) - else: - releases.append(name) - - # Sort the releases in descending order - prereleases.sort(key=lambda x: Utils.releaseToTuple(x[1:]), reverse=True) - releases.sort(key=lambda x: Utils.releaseToTuple(x[1:]), reverse=True) - - return releases, prereleases - - def getDailyReleases(self): - # We have to find the newest daily release version - with requests.get('https://kernel.ubuntu.com/~kernel-ppa/mainline/daily') as site: - data = site.content - - soup = BeautifulSoup(data, 'html.parser') - versions = [] - - for row in soup.findAll('tr'): - tds = row.findAll('td') - - if len(tds) != 5: - continue - - a = tds[1].find('a') - - # The link encapsulated inside the tag and the text of the tag will match for daily releases - if a and a['href'] == a.text: - version = a.text.rstrip('/') - - if version != 'current': - versions.append(version) - - return sorted(versions, reverse=True) - - def getFiles(self, releaseLink, releaseType): - with requests.get(f'https://kernel.ubuntu.com/~kernel-ppa/mainline/{releaseLink}') as site: - data = site.content - - files = {} - soup = BeautifulSoup(data, 'html.parser') - arch = None - - for a in soup.findAll('a'): - text = a.text - - # We have multiple options. - # If we've reached a build log, that means that we've switched to a new architecture. - # If we've reached MainlineBuilds, then we're done with all architectures. - # If we have a chosen architecture and the file is a .deb package, then it has to be in - # our list of architectures and it must not be an lpae-based build (we don't package those) - if text.endswith('/log'): - arch = text[:text.find('/log')] - continue - elif text == 'Name': - break - elif not text.endswith('.deb') or not arch: - continue - elif arch not in self.architectures: - continue - elif '-lpae' in text: - continue - - foundCurrent = False - - # There are three kinds of packages: images, modules and headers; - # and they can be either generic, low latency or snapdragon (the processor) - # The only package that doesn't have a sub type is headers-all, which is archless - for type in ('image', 'modules', 'headers'): - if f'-{type}-' not in text: - continue - - for subType in ('lpae', 'lowlatency', 'snapdragon', 'generic'): - if not f'-{subType}' in text: - continue - - fileType = f'{releaseType}-{type}-{subType}-{arch}' - - if fileType in files: - files[fileType].append(text) - else: - files[fileType] = [text] - - foundCurrent = True - break - - if (not foundCurrent) and '-headers-' in text: - files[f'{releaseType}-headers-all'] = [text] - - return files - - def downloadAndRepack(self, releaseLink, releaseName, releaseType, pkgName, filenames): - debFilename = os.path.join(self.tmpDir, pkgName + '.deb') - extractFolder = os.path.join(self.tmpDir, uuid.uuid4().hex) - controlFilename = os.path.join(extractFolder, 'DEBIAN', 'control') - postrmFilename = os.path.join(extractFolder, 'DEBIAN', 'postrm') - - # Create a temporary folder for the repackaging - if os.path.exists(extractFolder): - shutil.rmtree(extractFolder) - - os.makedirs(extractFolder) - - # Kernel versions such as 5.0 have to be adjusted to 5.0.0 - if releaseType != 'linux-devel': - names = releaseName.split('-') - release = list(Utils.releaseToTuple(names[0])) - - while len(release) < 3: - release.append(0) - - names[0] = '.'.join([str(num) for num in release]) - releaseName = '-'.join(names) - - for i, filename in enumerate(filenames): - primaryFile = i == 0 - link = f'https://kernel.ubuntu.com/~kernel-ppa/mainline/{releaseLink}/{filename}' - - # Download the .deb - logging.info(f'Downloading package {pkgName} (release v{releaseName}) from {link}') - - try: - Utils.downloadFile(link, debFilename, DEB_CONTENT_TYPE) - except: - self.logger.add(f'Could not download {os.path.basename(debFilename)} from {link}!', alert=True) - self.logger.add(traceback.print_exc(), pre=True) - self.logger.send_all() - return - - # Extract the .deb file - extractFlag = '-R' if primaryFile else '-x' - result = Utils.run_process(['dpkg-deb', extractFlag, debFilename, extractFolder]) - - if result.failed: - self.logger.add(f'Could not extract {os.path.basename(debFilename)} (error code {result.exit_code})!', alert=True) - self.logger.add(result.get_output(), pre=True) - self.logger.send_all() - return - - if not primaryFile: - # Auxiliary packages: unpack metadata into a secondary folder - secondaryExtractFolder = os.path.join(self.tmpDir, uuid.uuid4().hex) - - if os.path.exists(secondaryExtractFolder): - shutil.rmtree(secondaryExtractFolder) - - os.makedirs(secondaryExtractFolder) - result = Utils.run_process(['dpkg-deb', '-e', debFilename, secondaryExtractFolder]) - - if result.failed: - self.logger.add(f'Could not extract metadata {os.path.basename(debFilename)} (error code {result.exit_code})!', alert=True) - self.logger.add(result.get_output(), pre=True) - self.logger.send_all() - return - - # Merge md5sum metadata - with open(os.path.join(extractFolder, 'DEBIAN', 'md5sums'), 'a+') as targetHashFile: - with open(os.path.join(secondaryExtractFolder, 'md5sums'), 'r') as sourceHashFile: - targetHashFile.write(sourceHashFile.read()) - - # Remove secondary folder - if os.path.exists(secondaryExtractFolder): - shutil.rmtree(secondaryExtractFolder) - - os.remove(debFilename) - - if not os.path.exists(controlFilename): - self.logger.add(f'No control file for {pkgName}...', alert=True) - self.logger.send_all() - return - - # Rewrite the control file - with open(controlFilename, 'r') as f: - controlLines = f.read().replace('\r', '').split('\n') - - # We have to rewrite the package name, the version - # We will also remove all linux based dependencies - # In addition to this, we will replace conflicts with our own conflicts - # For example, generic packages will conflict with lowlatency and snapdragon packages - for i, line in enumerate(controlLines): - if line.startswith('Package:'): - controlLines[i] = f'Package: {pkgName}' - elif line.startswith('Version:'): - controlLines[i] = f'Version: {releaseName}' - elif line.startswith('Depends: '): - dependencies = [dep for dep in line[len('Depends: '):].split(', ') if not dep.startswith('linux-')] - - # libssl3 and newer libc6 is not available on Debian. - dependencies = [dep for dep in dependencies if not dep.startswith('libc6') and not dep.startswith('libssl3')] - - # initramfs depends on the logsave script, which is not installed by default. - # Without the logsave script, the system will not boot. - if 'image' in pkgName: - if 'logsave' not in dependencies: - dependencies.append('logsave') - - controlLines[i] = 'Depends: {0}'.format(', '.join(dependencies)) - elif line.startswith('Conflicts'): - origConflicts = ['generic', 'lowlatency', 'snapdragon'] - conflicts = [conflict for conflict in origConflicts if conflict not in pkgName] - - for conflict in conflicts: - origConflicts.remove(conflict) - - myType = origConflicts[0] - conflicts = [pkgName.replace(myType, conflict) for conflict in conflicts] - controlLines[i] = 'Conflicts: {0}'.format(', '.join(conflicts)) - - with open(controlFilename, 'w') as f: - f.write('\n'.join(controlLines)) - - # The Ubuntu kernel images do not remove initrd.img in the postrm script. - # Remove the initrd.img right before the fresh-install file is removed. - if os.path.exists(postrmFilename): - with open(postrmFilename, 'r') as f: - postrmLines = f.read().replace('\r', '').split('\n') - - if FIND_IMAGE_RM in postrmLines: - index = postrmLines.index(FIND_IMAGE_RM) - postrmLines[index] = NEW_FIND_IMAGE_RM - - for rmLine in INITRD_IMAGE_RMS: - postrmLines.insert(index, rmLine) - - with open(postrmFilename, 'w') as f: - f.write('\n'.join(postrmLines)) - - # Repack the .deb file - result = Utils.run_process(['dpkg-deb', '-Zgzip', '-b', extractFolder, debFilename]) - - if result.failed: - self.logger.add(f'Could not pack {os.path.basename(debFilename)} (error code {result.exit_code})!', alert=True) - self.logger.add(result.get_output(), pre=True) - self.logger.send_all() - return - - self.pkgList.addDebToPool(debFilename) - - # Remove the temporary extract folder - if os.path.exists(extractFolder): - shutil.rmtree(extractFolder) - - def downloadFilesWorker(self, worker_args): - i, files = worker_args - - logging.info(f'Starting worker number {i + 1} with {len(files)} packages to download...') - file_cache = {} - - # Go through all files - for releaseLink, releaseName, releaseType, pkgName, filenames in files: - # Download and repack - self.downloadAndRepack(releaseLink, releaseName, releaseType, pkgName, filenames) - file_cache[pkgName] = filenames - - logging.info(f'Worker number {i + 1} has finished.') - return file_cache - - def findDownloadableFiles(self, releases, releaseType): - # Download the file list for this release - requiredTypes = ['image', 'modules', 'headers'] - - for release in releases: - if DAILY_RELEASE_REGEX.match(release): - releaseLink = f'daily/{release}' - releaseName = release - else: - releaseLink = release - releaseName = release[:1] - - files = self.getFiles(releaseLink, releaseType) - currentTypes = [] - - for pkgName in files.keys(): - type = pkgName.split('-') - - if len(type) < 3: - continue - - type = type[2] - - if type in requiredTypes and type not in currentTypes: - currentTypes.append(type) - - if len(currentTypes) == len(requiredTypes): - # Found all files necessary - break - - self.logger.add(f'Release is not yet ready: {releaseType}') - - filtered_files = [] - - for pkgName, filenames in files.items(): - # Check our cache - if self.fileCache.get(pkgName, None) == filenames: - continue - - filtered_files.append([releaseLink, releaseName, releaseType, pkgName, filenames]) - - return release, filtered_files - - def reloadCache(self): - # Reload the cache. - # We use the cache to avoid redownloading and repackaging files that we've already processed - try: - with open('cache.json', 'r') as file: - self.cache = json.load(file) - except: - self.cache = {} - - self.fileCache = self.cache.get('files', {}) - - def updateCache(self): - # Save the cache to disk. - self.cache['files'] = self.fileCache - - with open('cache.json', 'w') as file: - json.dump(self.cache, file, sort_keys=True, indent=4, separators=(',', ': ')) - - def publishRepository(self): - # If temporary directory doesn't exist, nothing matters - self.pkgList.saveAllDistributions(['l', 'custom']) - self.pkgList.sendEmbeddedReport() diff --git a/kernelcollector/PackageDistribution.py b/kernelcollector/PackageDistribution.py deleted file mode 100644 index af41f7f..0000000 --- a/kernelcollector/PackageDistribution.py +++ /dev/null @@ -1,130 +0,0 @@ -from deb_pkg_tools.control import unparse_control_fields -from datetime import datetime -from . import Utils -import traceback, logging, gzip, os -import gnupg - -gpg = gnupg.GPG() -gpg.encoding = 'utf-8' - -class PackageDistribution(object): - - def __init__(self, logger, name, architectures, description): - self.logger = logger - self.name = name - self.architectures = architectures - self.description = description - - def getName(self): - return self.name - - def setName(self, name): - self.name = name - - def getArchitectures(self): - return self.architectures - - def setArchitectures(self, architectures): - self.architectures = architectures - - def getDescription(self): - return self.description - - def setDescription(self, description): - self.description = description - - def getPackageList(self): - return self.pkgList - - def setPackageList(self, pkgList): - self.pkgList = pkgList - - if not self.pkgList: - return - - self.folder = os.path.join(self.pkgList.distFolder, self.name) - - if not os.path.exists(self.folder): - os.makedirs(self.folder) - - def getArchDir(self, arch): - return os.path.join(self.folder, 'main', f'binary-{arch}') - - def signFile(self, filename, content, detach=False): - with open(filename, 'w') as file: - try: - file.write(str(gpg.sign(content, detach=detach, keyid=self.pkgList.gpgKey, passphrase=self.pkgList.gpgPassword))) - except: - self.logger.add(f'Could not sign {filename}! Please check your GPG keys!', alert=True) - self.logger.add(traceback.format_exc(), pre=True) - self.logger.send_all() - - def save(self, releases): - mainDir = os.path.join(self.folder, 'main') - archToPackages = {arch: [] for arch in self.architectures} - - logging.info('Writing package list to disk...') - - # Associate our packages with architectures. - for release in releases: - fullPath, data = release - arch = data['Architecture'].lower() - data = unparse_control_fields(data).dump() - - if arch == 'all': - for arch in self.architectures: - archToPackages[arch].append(data) - elif arch in self.architectures: - archToPackages[arch].append(data) - - # Write our package lists for all architectures. - for arch in self.architectures: - archDir = self.getArchDir(arch) - - if not os.path.exists(archDir): - os.makedirs(archDir) - - with open(os.path.join(archDir, 'Release'), 'w') as file: - file.write('\n'.join([ - 'Component: main', 'Origin: linux-kernel', 'Label: linux-kernel', - f'Architecture: {arch}', f'Description: {self.description}' - ])) - - packages = '\n'.join(archToPackages[arch]) - - with open(os.path.join(archDir, 'Packages'), 'w') as file: - file.write(packages) - - with gzip.open(os.path.join(archDir, 'Packages.gz'), 'wt') as file: - file.write(packages) - - # Gather hashes for the architecture package lists. - md5s = [] - sha1s = [] - sha256s = [] - - date = datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S UTC') - - for root, _, files in os.walk(mainDir): - for file in files: - fullPath = os.path.join(root, file) - displayPath = fullPath[len(self.folder):].lstrip('/') - - md5, sha1, sha256 = Utils.getAllHashes(fullPath) - size = str(os.path.getsize(fullPath)) - md5s.append(f' {md5} {size} {displayPath}') - sha1s.append(f' {sha1} {size} {displayPath}') - sha256s.append(f' {sha256} {size} {displayPath}') - - # Save the final package list, signing - release = '\n'.join([ - 'Origin: linux-kernel', 'Label: linux-kernel', f'Suite: {self.name}', f'Codename: {self.name}', f'Date: {date}', - 'Architectures: {0}'.format(' '.join(self.architectures)), 'Components: main', f'Description: {self.description}', - 'MD5Sum:\n{0}'.format('\n'.join(md5s)), 'SHA1:\n{0}'.format('\n'.join(sha1s)), 'SHA256:\n{0}'.format('\n'.join(sha256s)) - ]) - - with open(os.path.join(self.folder, 'Release'), 'w') as file: - file.write(release) - - self.signFile(os.path.join(self.folder, 'InRelease'), release, detach=False) - self.signFile(os.path.join(self.folder, 'Release.gpg'), release, detach=True) diff --git a/kernelcollector/PackageList.py b/kernelcollector/PackageList.py deleted file mode 100644 index c3449a4..0000000 --- a/kernelcollector/PackageList.py +++ /dev/null @@ -1,197 +0,0 @@ -from deb_pkg_tools.package import inspect_package_fields -from distutils.version import LooseVersion -from datetime import datetime -from . import Utils -import shutil, logging, time, os - -class PackageList(object): - - def __init__(self, logger, repoPath, gpgKey, gpgPassword): - self.logger = logger - self.gpgKey = gpgKey - self.gpgPassword = gpgPassword - self.distributions = {} - self.recentlyAdded = {} - self.setRepoPath(repoPath) - - def getRepoPath(self): - return self.repoPath - - def setRepoPath(self, repoPath): - self.repoPath = repoPath - self.poolFolder = os.path.join(self.repoPath, 'pool', 'main') - self.distFolder = os.path.join(self.repoPath, 'dists') - - def getGpgKey(self): - return self.gpgKey - - def setGpgKey(self, gpgKey): - self.gpgKey = gpgKey - - def getGpgPassword(self): - return self.gpgPassword - - def setGpgPassword(self, gpgPassword): - self.gpgPassword = gpgPassword - - def addDistribution(self, distribution): - distribution.setPackageList(self) - self.distributions[distribution.getName()] = distribution - - def getDistribution(self, name): - return self.distributions[name] - - def addDebToPool(self, filename): - basename = os.path.basename(filename) - logging.info(f'Adding {basename} to pool...') - - # Create the pool folder if necessary - poolFolder = os.path.join(self.poolFolder, basename[0]) - - if not os.path.exists(poolFolder): - os.makedirs(poolFolder) - - # Remove any old deb package, and move from original location to pool - noext, ext = os.path.splitext(basename) - poolFilename = os.path.join(poolFolder, f'{noext}_tmp{ext}') - - if os.path.exists(poolFilename): - os.remove(poolFilename) - - shutil.copyfile(filename, poolFilename) - os.remove(filename) - self.recentlyAdded[basename] = None # Version to be filled out in getAllReleasesInPool - - def saveAllDistributions(self, letters): - # Save all distributions - logging.info('Saving package list...') - releases = [] - - for letter in letters: - releases.extend(self.getAllReleasesInPool(letter)) - - for distribution in self.distributions.values(): - distribution.save(releases) - - def sendEmbeddedReport(self): - description = [f'**{filename}** has been updated to **v{version}**!' for filename, version in self.recentlyAdded.items() if version is not None] - - if not description: - return - - description = '\n'.join(description) - current_date = time.strftime('%Y-%m-%d %H:%M:%S') - content = { - 'embeds': [{ - 'title': 'Your package list has been updated!', - 'description': description, - 'color': 7526106, - 'author': { - 'name': 'Kernel Collector', - 'url': 'https://github.com/darktohka/kernelcollector', - 'icon_url': 'https://i.imgur.com/y6g563D.png' - }, - 'footer': { - 'text': f'This report has been generated on {current_date}.' - } - }] - } - - self.logger.add_embed(content) - self.logger.send_all() - - def getAllReleasesInPool(self, letter): - poolFolder = os.path.join(self.poolFolder, letter) - - # If we have no pool folder, there are no artifacts. - if not os.path.exists(poolFolder): - return [] - - # Rename all _tmp files - for file in os.listdir(poolFolder): - if not file.endswith('_tmp.deb'): - continue - - fullPath = os.path.join(poolFolder, file) - newFile = fullPath[:-len('_tmp.deb')] + '.deb' - - if os.path.exists(newFile): - os.remove(newFile) - - shutil.move(fullPath, newFile) - - # We have to gather all packages - pkgToVersions = {} - - for file in sorted(os.listdir(poolFolder)): - fullPath = os.path.join(poolFolder, file) - - if not fullPath.endswith('.deb'): - os.remove(fullPath) - continue - - basename = os.path.basename(fullPath) - logging.info(f'Inspecting {basename}...') - - try: - data = inspect_package_fields(fullPath) - except: - os.remove(fullPath) - continue - - pkgName = data['Package'] - version = data['Version'] - pkg = pkgToVersions.get(pkgName, {}) - - if version in pkg: - self.logger.add(f'Removing duplicate version {version} from package {pkgName}...') - self.logger.send_all() - os.remove(fullPath) - continue - - if basename in self.recentlyAdded: - self.recentlyAdded[basename] = version - - poolFilename = os.path.join(poolFolder, basename)[len(self.repoPath):].lstrip('/') - md5, sha1, sha256 = Utils.getAllHashes(fullPath) - data['Filename'] = poolFilename - data['Size'] = str(os.path.getsize(fullPath)) - data['MD5sum'] = md5 - data['SHA1'] = sha1 - data['SHA256'] = sha256 - pkg[version] = [fullPath, data] - pkgToVersions[pkgName] = pkg - - - releases = [] - - # We need to gather the current releases now - for pkgName, versions in pkgToVersions.items(): - if len(versions) == 1: - # There is only one version, which is always the newest. - fullPath, data = list(versions.values())[0] - else: - # Look for the newest version - newestVersion = None - newestVersionName = None - - for version in versions.keys(): - if newestVersion is None or LooseVersion(version) > newestVersion: - newestVersion = LooseVersion(version) - newestVersionName = version - - fullPath, data = versions[newestVersionName] - - # Delete all previous versions from the pool - for version, pkgList in versions.items(): - if version == newestVersionName: - continue - - filename = pkgList[0] - self.logger.add(f'Removing old file {os.path.basename(filename)}...') - self.logger.send_all() - os.remove(filename) - - releases.append([fullPath, data]) - - return releases diff --git a/kernelcollector/Main.py b/kernelcollector/main.py similarity index 52% rename from kernelcollector/Main.py rename to kernelcollector/main.py index f1189a8..408326b 100644 --- a/kernelcollector/Main.py +++ b/kernelcollector/main.py @@ -1,7 +1,7 @@ -from .PackageCollector import PackageCollector -from .PackageList import PackageList -from .PackageDistribution import PackageDistribution -from .WebhookEmitter import WebhookEmitter +from .package_collector import PackageCollector +from .package_list import PackageList +from .package_distribution import PackageDistribution +from .webhook import WebhookEmitter import traceback, json, logging, os, sys class Main(object): @@ -11,39 +11,39 @@ def __init__(self): with open('settings.json', 'r') as file: self.settings = json.load(file) - defaultValues = {'repoPath': '/srv/packages', 'gpgKey': 'ABCDEF', 'gpgPassword': 'none', 'distribution': 'sid', 'description': 'Package repository for newest Linux kernels', 'architectures': ['amd64'], 'webhook': None} + default_values = {'repoPath': '/srv/packages', 'gpgKey': 'ABCDEF', 'gpgPassword': 'none', 'distribution': 'sid', 'description': 'Package repository for newest Linux kernels', 'architectures': ['amd64'], 'webhook': None} edited = False - for key, value in defaultValues.items(): + for key, value in default_values.items(): if key not in self.settings: self.settings[key] = value edited = True if edited: print('Please edit the settings.json file before running the package collector!') - self.saveSettings() + self.save_settings() sys.exit() self.logger = WebhookEmitter(self.settings['webhook']) - self.packageList = PackageList(self.logger, self.settings['repoPath'].rstrip('/'), self.settings['gpgKey'], self.settings['gpgPassword']) - self.packageDist = PackageDistribution(self.logger, self.settings['distribution'], self.settings['architectures'], self.settings['description']) - self.packageList.addDistribution(self.packageDist) + self.package_list = PackageList(self.logger, self.settings['repoPath'].rstrip('/'), self.settings['gpgKey'], self.settings['gpgPassword']) + self.package_dist = PackageDistribution(self.logger, self.settings['distribution'], self.settings['architectures'], self.settings['description']) + self.package_list.add_distribution(self.package_dist) - self.packageCollector = PackageCollector(self.logger, self.settings['architectures'], self.packageList) + self.package_collector = PackageCollector(self.logger, self.settings['architectures'], self.package_list) - def runAllBuilds(self): + def run_all_builds(self): # Attempt to run all builds. # If something goes wrong, a webhook message will be sent. try: - self.packageCollector.runAllBuilds() + self.package_collector.run_all_builds() except: self.logger.add('Something went wrong while building packages!', alert=True) self.logger.add(traceback.format_exc(), pre=True) self.logger.send_all() - def saveSettings(self): + def save_settings(self): with open('settings.json', 'w') as file: json.dump(self.settings, file, sort_keys=True, indent=4, separators=(',', ': ')) @@ -52,4 +52,4 @@ def saveSettings(self): logging.root.setLevel(logging.INFO) main = Main() - main.runAllBuilds() + main.run_all_builds() diff --git a/kernelcollector/package_collector.py b/kernelcollector/package_collector.py new file mode 100644 index 0000000..e296945 --- /dev/null +++ b/kernelcollector/package_collector.py @@ -0,0 +1,507 @@ +from bs4 import BeautifulSoup +from . import utils +import json, logging, tempfile, re, shutil, os, uuid, multiprocessing, traceback +import requests + +FIND_IMAGE_RM = 'rm -f /lib/modules/$version/.fresh-install' +NEW_FIND_IMAGE_RM = 'rm -rf /lib/modules/$version' +INITRD_IMAGE_RMS = ['rm -f /boot/initrd.img-$version', 'rm -f /var/lib/initramfs-tools/$version'] +DEB_CONTENT_TYPE = 'application/x-debian-package' +DAILY_RELEASE_REGEX = re.compile(r'\d{4}-\d{2}-\d{2}') + +class PackageCollector(object): + + def __init__(self, logger, architectures, pkg_list): + self.logger = logger + self.architectures = architectures + self.pkg_list = pkg_list + self.tmp_dir = os.path.join(tempfile.gettempdir(), uuid.uuid4().hex) + self.current_dir = os.getcwd() + self.reload_cache() + + def run_all_builds(self): + # Get all releases and prereleases + logging.info(f'Current directory is {self.current_dir}') + logging.info('Checking latest source versions of the kernel...') + + stable_name, stable_link, mainline_name, mainline_link = self.get_kernel_releases() + logging.info(f'Current source release: v{stable_name}') + logging.info(f'Current source release candidate: v{mainline_name}') + + logging.info('Checking latest binary versions of the kernel...') + + releases, prereleases = self.get_ubuntu_releases() + daily_releases = self.get_daily_releases() + downloaded = False + + # Delete the temporary folder + if os.path.exists(self.tmp_dir): + shutil.rmtree(self.tmp_dir) + + # Redownload stable build if necessary + release, downloadable_release = self.find_downloadable_files(releases, 'linux-current') + prerelease, downloadable_prerelease = self.find_downloadable_files(prereleases, 'linux-beta') + daily_release, downloadable_daily_release = self.find_downloadable_files(daily_releases, 'linux-devel') + downloadable_stable = self.find_downloadable_sources('linux-stable', stable_name, stable_link) + downloadable_mainline = self.find_downloadable_sources('linux-mainline', mainline_name, mainline_link) + + downloadable = downloadable_release + downloadable_prerelease + downloadable_daily_release + downloadable_stable + downloadable_mainline + + logging.info(f'Current binary release: {release}') + logging.info(f'Current binary release candidate: {prerelease}') + logging.info(f'Current binary daily build: {daily_release}') + + self.logger.send_all() + + # Update cache and publish repository + if not downloadable: + return + + # Create the temporary folder + os.makedirs(self.tmp_dir) + + # Schedule pool + downloadable_queue = utils.split_list(downloadable, multiprocessing.cpu_count()) + downloadable_queue = [q for q in downloadable_queue if q] + worker_count = len(downloadable_queue) + + # Create and run the pool + pool = multiprocessing.Pool(processes=worker_count) + file_caches = pool.map(self.download_files_worker, list(enumerate(downloadable_queue))) + downloaded = any(file_caches) + + # Update the global file cache from the multiprocessing pool + for cache in file_caches: + self.file_cache.update(cache) + + # Update the cache if necessary + if downloaded: + self.update_cache() + self.publish_repository() + + # Remove temporary folder + if os.path.exists(self.tmp_dir): + shutil.rmtree(self.tmp_dir) + + def get_kernel_releases(self): + with requests.get('https://kernel.org') as site: + data = site.content + + soup = BeautifulSoup(data, 'html.parser') + table_rows = soup.find_all('tr') + mainline_entry = next((row for row in table_rows if 'mainline' in row.text), None) + stable_entry = next((row for row in table_rows if 'stable' in row.text), None) + + if mainline_entry is None: + mainline_entry = stable_entry + if stable_entry is None: + stable_entry = mainline_entry + if mainline_entry is None or stable_entry is None: + raise Exception('No mainline or stable entries found.') + + # Extract the version and download link for mainline + mainline_version = mainline_entry.find('strong').text + mainline_download_link = mainline_entry.find('a', {'title': 'Download complete tarball'})['href'] + + # Extract the version and download link for stable + stable_version = stable_entry.find('strong').text + stable_download_link = stable_entry.find('a', {'title': 'Download complete tarball'})['href'] + + return stable_version, stable_download_link, mainline_version, mainline_download_link + + def get_ubuntu_releases(self): + # We use the Ubuntu kernel mainline as the build source. + # This method will return a list of releases and prereleases, sorted in ascending order. + with requests.get('https://kernel.ubuntu.com/~kernel-ppa/mainline') as site: + data = site.content + + soup = BeautifulSoup(data, 'html.parser') + prereleases = [] + releases = [] + + for row in soup.findAll('tr'): + tds = row.findAll('td') + + if len(tds) != 5: + continue + + a = tds[1].find('a') + + if not a: + continue + + name = a.text + prerelease = '-rc' in name + + # Some Ubuntu specific kernel versions will have to be skipped, for example 2.6.32-xenial + if len(name) < 2 or name[0] != 'v' or (not name[1].isdigit()) or ('-' in name and not prerelease) or (name.count('-') > 1): + continue + + # Since we're reading links, they might have trailing slashes + name = name.rstrip('/') + + if prerelease: + prereleases.append(name) + else: + releases.append(name) + + # Sort the releases in descending order + prereleases.sort(key=lambda x: utils.release_to_tuple(x), reverse=True) + releases.sort(key=lambda x: utils.release_to_tuple(x), reverse=True) + + # At the end of every release candidate cycle, a new kernel version is released. + # Upgrade the prerelease branch if there is no newer prerelease than the current release. + if utils.release_to_tuple(releases[-1])[0:2] >= utils.release_to_tuple(prereleases[-1])[0:2]: + prereleases.append(releases[-1]) + + return releases, prereleases + + def get_daily_releases(self): + # We have to find the newest daily release version + with requests.get('https://kernel.ubuntu.com/~kernel-ppa/mainline/daily') as site: + data = site.content + + soup = BeautifulSoup(data, 'html.parser') + versions = [] + + for row in soup.findAll('tr'): + tds = row.findAll('td') + + if len(tds) != 5: + continue + + a = tds[1].find('a') + + # The link encapsulated inside the tag and the text of the tag will match for daily releases + if a and a['href'] == a.text: + version = a.text.rstrip('/') + + if version != 'current': + versions.append(version) + + return sorted(versions, reverse=True) + + def get_files(self, release_link, release_type): + with requests.get(f'https://kernel.ubuntu.com/~kernel-ppa/mainline/{release_link}') as site: + data = site.content + + files = {} + soup = BeautifulSoup(data, 'html.parser') + arch = None + + for a in soup.findAll('a'): + text = a.text + + # We have multiple options. + # If we've reached a build log, that means that we've switched to a new architecture. + # If we've reached MainlineBuilds, then we're done with all architectures. + # If we have a chosen architecture and the file is a .deb package, then it has to be in + # our list of architectures and it must not be an lpae-based build (we don't package those) + if text.endswith('/log'): + arch = text[:text.find('/log')] + continue + elif text == 'Name': + break + elif not text.endswith('.deb') or not arch: + continue + elif arch not in self.architectures: + continue + elif '-lpae' in text: + continue + + found_current = False + + # There are three kinds of packages: images, modules and headers; + # and they can be either generic, low latency or snapdragon (the processor) + # The only package that doesn't have a sub type is headers-all, which is archless + for type in ('image', 'modules', 'headers'): + if f'-{type}-' not in text: + continue + + for sub_type in ('lpae', 'lowlatency', 'snapdragon', 'generic'): + if not f'-{sub_type}' in text: + continue + + file_type = f'{release_type}-{type}-{sub_type}-{arch}' + + if file_type in files: + files[file_type].append(text) + else: + files[file_type] = [text] + + found_current = True + break + + if (not found_current) and '-headers-' in text: + files[f'{release_type}-headers-all'] = [text] + + return files + + def download_and_repack_source(self, release_link, release_name, release_type): + archive_name = f'{release_type}.tar.xz' + temp_filename = os.path.join(self.tmp_dir, archive_name) + archive_filename = os.path.join(self.pkg_list.src_folder, archive_name) + + logging.info(f'Downloading source for release {release_name} from {release_link}') + + try: + utils.download_file_to_xz(release_link, temp_filename) + except: + self.logger.add(f'Could not download {archive_name} from {release_link}!', alert=True) + self.logger.add(traceback.format_exc(), pre=True) + self.logger.send_all() + return + + if not os.path.exists(self.pkg_list.src_folder): + os.makedirs(self.pkg_list.src_folder) + + if os.path.exists(archive_filename): + os.remove(archive_filename) + + shutil.copyfile(temp_filename, archive_filename) + os.remove(temp_filename) + + def download_and_repack(self, release_link, release_name, release_type, pkg_name, filenames): + if release_type in ('linux-stable', 'linux-mainline'): + return self.download_and_repack_source(release_link, release_name, release_type) + + deb_filename = os.path.join(self.tmp_dir, pkg_name + '.deb') + extract_folder = os.path.join(self.tmp_dir, uuid.uuid4().hex) + control_filename = os.path.join(extract_folder, 'DEBIAN', 'control') + postrm_filename = os.path.join(extract_folder, 'DEBIAN', 'postrm') + + # Create a temporary folder for the repackaging + if os.path.exists(extract_folder): + shutil.rmtree(extract_folder) + + os.makedirs(extract_folder) + + # Kernel versions such as 5.0 have to be adjusted to 5.0.0 + if release_type != 'linux-devel': + names = release_name.split('-') + release = list(utils.release_to_tuple(names[0])) + + while len(release) < 3: + release.append(0) + + names[0] = '.'.join([str(num) for num in release]) + release_name = '-'.join(names) + + for i, filename in enumerate(filenames): + primary_file = i == 0 + link = f'https://kernel.ubuntu.com/~kernel-ppa/mainline/{release_link}/{filename}' + + # Download the .deb + logging.info(f'Downloading package {pkg_name} (release v{release_name}) from {link}') + + try: + utils.download_file(link, deb_filename, DEB_CONTENT_TYPE) + except: + self.logger.add(f'Could not download {os.path.basename(deb_filename)} from {link}!', alert=True) + self.logger.add(traceback.format_exc(), pre=True) + self.logger.send_all() + return + + # Extract the .deb file + extract_flag = '-R' if primary_file else '-x' + result = utils.run_process(['dpkg-deb', extract_flag, deb_filename, extract_folder]) + + if result.failed: + self.logger.add(f'Could not extract {os.path.basename(deb_filename)} (error code {result.exit_code})!', alert=True) + self.logger.add(result.get_output(), pre=True) + self.logger.send_all() + return + + if not primary_file: + # Auxiliary packages: unpack metadata into a secondary folder + aux_extract_folder = os.path.join(self.tmp_dir, uuid.uuid4().hex) + + if os.path.exists(aux_extract_folder): + shutil.rmtree(aux_extract_folder) + + os.makedirs(aux_extract_folder) + result = utils.run_process(['dpkg-deb', '-e', deb_filename, aux_extract_folder]) + + if result.failed: + self.logger.add(f'Could not extract metadata {os.path.basename(deb_filename)} (error code {result.exit_code})!', alert=True) + self.logger.add(result.get_output(), pre=True) + self.logger.send_all() + return + + # Merge md5sum metadata + with open(os.path.join(extract_folder, 'DEBIAN', 'md5sums'), 'a+') as target_hash_file: + with open(os.path.join(aux_extract_folder, 'md5sums'), 'r') as source_hash_file: + target_hash_file.write(source_hash_file.read()) + + # Remove secondary folder + if os.path.exists(aux_extract_folder): + shutil.rmtree(aux_extract_folder) + + os.remove(deb_filename) + + if not os.path.exists(control_filename): + self.logger.add(f'No control file for {pkg_name}...', alert=True) + self.logger.send_all() + return + + # Rewrite the control file + with open(control_filename, 'r') as f: + control_lines = f.read().replace('\r', '').split('\n') + + # We have to rewrite the package name, the version + # We will also remove all linux based dependencies + # In addition to this, we will replace conflicts with our own conflicts + # For example, generic packages will conflict with lowlatency and snapdragon packages + for i, line in enumerate(control_lines): + if line.startswith('Package:'): + control_lines[i] = f'Package: {pkg_name}' + elif line.startswith('Version:'): + control_lines[i] = f'Version: {release_name}' + elif line.startswith('Depends: '): + dependencies = [dep for dep in line[len('Depends: '):].split(', ') if not dep.startswith('linux-')] + + # libssl3 and newer libc6 is not available on Debian. + dependencies = [dep for dep in dependencies if not dep.startswith('libc6') and not dep.startswith('libssl3')] + + # initramfs depends on the logsave script, which is not installed by default. + # Without the logsave script, the system will not boot. + if 'image' in pkg_name: + if 'logsave' not in dependencies: + dependencies.append('logsave') + + depends = ', '.join(dependencies) + control_lines[i] = f'Depends: {depends}' + elif line.startswith('Conflicts'): + orig_conflicts = ['generic', 'lowlatency', 'snapdragon'] + conflicts = [conflict for conflict in orig_conflicts if conflict not in pkg_name] + + for conflict in conflicts: + orig_conflicts.remove(conflict) + + my_type = orig_conflicts[0] + conflicts = [pkg_name.replace(my_type, conflict) for conflict in conflicts] + conflicts = ', '.join(conflicts) + control_lines[i] = f'Conflicts: {conflicts}' + + with open(control_filename, 'w') as f: + f.write('\n'.join(control_lines)) + + # The Ubuntu kernel images do not remove initrd.img in the postrm script. + # Remove the initrd.img right before the fresh-install file is removed. + if os.path.exists(postrm_filename): + with open(postrm_filename, 'r') as f: + postrm_lines = f.read().replace('\r', '').split('\n') + + if FIND_IMAGE_RM in postrm_lines: + index = postrm_lines.index(FIND_IMAGE_RM) + postrm_lines[index] = NEW_FIND_IMAGE_RM + + for rm_line in INITRD_IMAGE_RMS: + postrm_lines.insert(index, rm_line) + + with open(postrm_filename, 'w') as f: + f.write('\n'.join(postrm_lines)) + + # Repack the .deb file + result = utils.run_process(['dpkg-deb', '-Zgzip', '-b', extract_folder, deb_filename]) + + if result.failed: + self.logger.add(f'Could not pack {os.path.basename(deb_filename)} (error code {result.exit_code})!', alert=True) + self.logger.add(result.get_output(), pre=True) + self.logger.send_all() + return + + self.pkg_list.add_deb_to_pool(deb_filename) + + # Remove the temporary extract folder + if os.path.exists(extract_folder): + shutil.rmtree(extract_folder) + + def download_files_worker(self, worker_args): + i, files = worker_args + + logging.info(f'Starting worker number {i + 1} with {len(files)} packages to download...') + file_cache = {} + + # Go through all files + for release_link, release_name, release_type, pkg_name, filenames in files: + # Download and repack + self.download_and_repack(release_link, release_name, release_type, pkg_name, filenames) + file_cache[pkg_name] = filenames + + logging.info(f'Worker number {i + 1} has finished.') + return file_cache + + def find_downloadable_sources(self, release_type, release_version, release_link): + filenames = [release_link] + + if self.file_cache.get(release_type, None) == filenames: + return [] + + return [[release_link, f'v{release_version}', release_type, release_type, filenames]] + + def find_downloadable_files(self, releases, release_type): + # Download the file list for this release + required_types = ['image', 'modules', 'headers'] + + for release in releases: + if DAILY_RELEASE_REGEX.match(release): + release_link = f'daily/{release}' + release_name = release + else: + release_link = release + release_name = release[1:] + + files = self.get_files(release_link, release_type) + current_types = [] + + for pkg_name in files.keys(): + type = pkg_name.split('-') + + if len(type) < 3: + continue + + type = type[2] + + if type in required_types and type not in current_types: + current_types.append(type) + + if len(current_types) == len(required_types): + # Found all files necessary + break + + self.logger.add(f'Release is not yet ready: {release_type}') + + filtered_files = [] + + for pkg_name, filenames in files.items(): + # Check our cache + if self.file_cache.get(pkg_name, None) == filenames: + continue + + filtered_files.append([release_link, release_name, release_type, pkg_name, filenames]) + + return release, filtered_files + + def reload_cache(self): + # Reload the cache. + # We use the cache to avoid redownloading and repackaging files that we've already processed + try: + with open('cache.json', 'r') as file: + self.cache = json.load(file) + except: + self.cache = {} + + self.file_cache = self.cache.get('files', {}) + + def update_cache(self): + # Save the cache to disk. + self.cache['files'] = self.file_cache + + with open('cache.json', 'w') as file: + json.dump(self.cache, file, sort_keys=True, indent=4, separators=(',', ': ')) + + def publish_repository(self): + # If temporary directory doesn't exist, nothing matters + self.pkg_list.save_all_distributions(['l', 'custom']) + self.pkg_list.send_embedded_report() diff --git a/kernelcollector/package_distribution.py b/kernelcollector/package_distribution.py new file mode 100644 index 0000000..6d165c0 --- /dev/null +++ b/kernelcollector/package_distribution.py @@ -0,0 +1,113 @@ +from deb_pkg_tools.control import unparse_control_fields +from datetime import datetime +from . import utils +import traceback, logging, gzip, os +import gnupg + +gpg = gnupg.GPG() +gpg.encoding = 'utf-8' + +class PackageDistribution(object): + + def __init__(self, logger, name, architectures, description): + self.logger = logger + self.name = name + self.architectures = architectures + self.description = description + + def set_package_list(self, pkg_list): + self.pkg_list = pkg_list + + if not self.pkg_list: + return + + self.folder = os.path.join(self.pkg_list.dist_folder, self.name) + + if not os.path.exists(self.folder): + os.makedirs(self.folder) + + def get_arch_dir(self, arch): + return os.path.join(self.folder, 'main', f'binary-{arch}') + + def sign_file(self, filename, content, detach=False): + with open(filename, 'w') as file: + try: + file.write(str(gpg.sign(content, detach=detach, keyid=self.pkg_list.gpg_key, passphrase=self.pkg_list.gpg_password))) + except: + self.logger.add(f'Could not sign {filename}! Please check your GPG keys!', alert=True) + self.logger.add(traceback.format_exc(), pre=True) + self.logger.send_all() + + def save(self, releases): + main_dir = os.path.join(self.folder, 'main') + arch_to_packages = {arch: [] for arch in self.architectures} + + logging.info('Writing package list to disk...') + + # Associate our packages with architectures. + for release in releases: + full_path, data = release + arch = data['Architecture'].lower() + data = unparse_control_fields(data).dump() + + if arch == 'all': + for arch in self.architectures: + arch_to_packages[arch].append(data) + elif arch in self.architectures: + arch_to_packages[arch].append(data) + + # Write our package lists for all architectures. + for arch in self.architectures: + arch_dir = self.get_arch_dir(arch) + + if not os.path.exists(arch_dir): + os.makedirs(arch_dir) + + with open(os.path.join(arch_dir, 'Release'), 'w') as file: + file.write('\n'.join([ + 'Component: main', 'Origin: linux-kernel', 'Label: linux-kernel', + f'Architecture: {arch}', f'Description: {self.description}' + ])) + + packages = '\n'.join(arch_to_packages[arch]) + + with open(os.path.join(arch_dir, 'Packages'), 'w') as file: + file.write(packages) + + with gzip.open(os.path.join(arch_dir, 'Packages.gz'), 'wt') as file: + file.write(packages) + + # Gather hashes for the architecture package lists. + md5s = [] + sha1s = [] + sha256s = [] + + date = datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S UTC') + + for root, _, files in os.walk(main_dir): + for file in files: + full_path = os.path.join(root, file) + display_path = full_path[len(self.folder):].lstrip('/') + + md5, sha1, sha256 = utils.get_all_hashes(full_path) + size = str(os.path.getsize(full_path)) + md5s.append(f' {md5} {size} {display_path}') + sha1s.append(f' {sha1} {size} {display_path}') + sha256s.append(f' {sha256} {size} {display_path}') + + # Save the final package list, signing + archs = ' '.join(self.architectures) + md5s = '\n'.join(md5s) + sha1s = '\n'.join(sha1s) + sha256s = '\n'.join(sha256s) + release = '\n'.join([ + 'Origin: linux-kernel', 'Label: linux-kernel', f'Suite: {self.name}', f'Codename: {self.name}', f'Date: {date}', + f'Architectures: {archs}', 'Components: main', f'Description: {self.description}', + f'MD5Sum:\n{md5s}', f'SHA1:\n{sha1s}', f'SHA256:\n{sha256s}' + ]) + + with open(os.path.join(self.folder, 'Release'), 'w') as file: + file.write(release) + + self.sign_file(os.path.join(self.folder, 'InRelease'), release, detach=False) + self.sign_file(os.path.join(self.folder, 'Release.gpg'), release, detach=True) diff --git a/kernelcollector/package_list.py b/kernelcollector/package_list.py new file mode 100644 index 0000000..f1e6533 --- /dev/null +++ b/kernelcollector/package_list.py @@ -0,0 +1,179 @@ +from deb_pkg_tools.package import inspect_package_fields +from distutils.version import LooseVersion +from . import utils +import shutil, logging, time, os + +class PackageList(object): + + def __init__(self, logger, repo_path, gpg_key, gpg_password): + self.logger = logger + self.gpg_key = gpg_key + self.gpg_password = gpg_password + self.distributions = {} + self.recently_added = {} + self.set_repo_path(repo_path) + + def set_repo_path(self, repo_path): + self.repo_path = repo_path + self.src_folder = os.path.join(self.repo_path, 'source') + self.pool_folder = os.path.join(self.repo_path, 'pool', 'main') + self.dist_folder = os.path.join(self.repo_path, 'dists') + + def add_distribution(self, distribution): + distribution.set_package_list(self) + self.distributions[distribution.name] = distribution + + def add_deb_to_pool(self, filename): + basename = os.path.basename(filename) + logging.info(f'Adding {basename} to pool...') + + # Create the pool folder if necessary + pool_folder = os.path.join(self.pool_folder, basename[0]) + + if not os.path.exists(pool_folder): + os.makedirs(pool_folder) + + # Remove any old deb package, and move from original location to pool + no_ext, ext = os.path.splitext(basename) + pool_filename = os.path.join(pool_folder, f'{no_ext}_tmp{ext}') + + if os.path.exists(pool_filename): + os.remove(pool_filename) + + shutil.copyfile(filename, pool_filename) + os.remove(filename) + self.recently_added[basename] = None # Version to be filled out in get_all_releases_in_pool + + def save_all_distributions(self, letters): + # Save all distributions + logging.info('Saving package list...') + releases = [] + + for letter in letters: + releases.extend(self.get_all_releases_in_pool(letter)) + + for distribution in self.distributions.values(): + distribution.save(releases) + + def send_embedded_report(self): + description = [f'**{filename}** has been updated to **v{version}**!' for filename, version in self.recently_added.items() if version is not None] + + if not description: + return + + description = '\n'.join(description) + current_date = time.strftime('%Y-%m-%d %H:%M:%S') + content = { + 'embeds': [{ + 'title': 'Your package list has been updated!', + 'description': description, + 'color': 7526106, + 'author': { + 'name': 'Kernel Collector', + 'url': 'https://github.com/darktohka/kernelcollector', + 'icon_url': 'https://i.imgur.com/y6g563D.png' + }, + 'footer': { + 'text': f'This report has been generated on {current_date}.' + } + }] + } + + self.logger.add_embed(content) + self.logger.send_all() + + def get_all_releases_in_pool(self, letter): + pool_folder = os.path.join(self.pool_folder, letter) + + # If we have no pool folder, there are no artifacts. + if not os.path.exists(pool_folder): + return [] + + # Rename all _tmp files + for file in os.listdir(pool_folder): + if not file.endswith('_tmp.deb'): + continue + + full_path = os.path.join(pool_folder, file) + new_file = full_path[:-len('_tmp.deb')] + '.deb' + + if os.path.exists(new_file): + os.remove(new_file) + + shutil.move(full_path, new_file) + + # We have to gather all packages + pkg_to_versions = {} + + for file in sorted(os.listdir(pool_folder)): + full_path = os.path.join(pool_folder, file) + + if not full_path.endswith('.deb'): + os.remove(full_path) + continue + + basename = os.path.basename(full_path) + logging.info(f'Inspecting {basename}...') + + try: + data = inspect_package_fields(full_path) + except: + os.remove(full_path) + continue + + pkg_name = data['Package'] + version = data['Version'] + pkg = pkg_to_versions.get(pkg_name, {}) + + if version in pkg: + self.logger.add(f'Removing duplicate version {version} from package {pkg_name}...') + self.logger.send_all() + os.remove(full_path) + continue + + if basename in self.recently_added: + self.recently_added[basename] = version + + pool_filename = os.path.join(pool_folder, basename)[len(self.repo_path):].lstrip('/') + md5, sha1, sha256 = utils.get_all_hashes(full_path) + data['Filename'] = pool_filename + data['Size'] = str(os.path.getsize(full_path)) + data['MD5sum'] = md5 + data['SHA1'] = sha1 + data['SHA256'] = sha256 + pkg[version] = [full_path, data] + pkg_to_versions[pkg_name] = pkg + + + releases = [] + + # We need to gather the current releases now + for pkg_name, versions in pkg_to_versions.items(): + if len(versions) == 1: + # There is only one version, which is always the newest. + full_path, data = list(versions.values())[0] + else: + # Look for the newest version + newest_version = None + newest_version_name = None + + for version in versions.keys(): + if newest_version is None or LooseVersion(version) > newest_version: + newest_version = LooseVersion(version) + newest_version_name = version + + full_path, data = versions[newest_version_name] + + # Delete all previous versions from the pool + for version, pkg_list in versions.items(): + if version == newest_version_name: + continue + + filename = pkg_list[0] + self.logger.add(f'Removing old file {os.path.basename(filename)}...') + self.logger.send_all() + os.remove(filename) + + releases.append([full_path, data]) + + return releases diff --git a/kernelcollector/Utils.py b/kernelcollector/utils.py similarity index 52% rename from kernelcollector/Utils.py rename to kernelcollector/utils.py index 928d504..0872b7a 100644 --- a/kernelcollector/Utils.py +++ b/kernelcollector/utils.py @@ -1,4 +1,4 @@ -import hashlib, subprocess, re +import hashlib, subprocess, re, zlib, lzma import requests class ContentTypeException(Exception): @@ -43,10 +43,37 @@ def run_process(process): process.wait() return ProcessOutput(lines, process.returncode) -def releaseToTuple(name): +def remove_version_prefix(version): + return re.sub(r'[^0-9\.\-rc]', '', version) + +def release_to_tuple(name): + name = remove_version_prefix(name) return tuple(int(x) for x in re.split('\\-rc|\\.', name, 0)) -def downloadFile(link, destination, expected_content_type): +def stream_gzip_decompress(stream): + dec = zlib.decompressobj(32 + zlib.MAX_WBITS) # offset 32 to skip the header + + for chunk in stream: + rv = dec.decompress(chunk) + + if rv: + yield rv + + if dec.unused_data: + yield dec.flush() + +def stream_xz_compress(stream): + enc = lzma.LZMACompressor(lzma.FORMAT_XZ) + + for chunk in stream: + rv = enc.compress(chunk) + + if rv: + yield rv + + yield enc.flush() + +def download_file(link, destination, expected_content_type): with requests.get(link, stream=True) as r: r.raise_for_status() @@ -56,13 +83,35 @@ def downloadFile(link, destination, expected_content_type): raise ContentTypeException(f'Expected content type {expected_content_type} but received {content_type}.') with open(destination, 'wb') as f: - for chunk in r.iter_content(chunk_size=8192): + for chunk in r.iter_content(chunk_size=1048576): if chunk: f.write(chunk) f.flush() -def getAllHashes(filename): +def download_file_to_xz(link, destination): + with requests.get(link, stream=True) as r: + r.raise_for_status() + + content_type = r.headers.get('content-type', 'unset') + + with open(destination, 'wb') as f: + iterator = r.iter_content(chunk_size=1048576) + + if 'application/x-gzip' in content_type: + iterator = stream_gzip_decompress(iterator) + + if 'application/x-xz' not in content_type: + iterator = stream_xz_compress(iterator) + + with open(destination, 'wb') as f: + for chunk in iterator: + if chunk: + f.write(chunk) + + f.flush() + +def get_all_hashes(filename): md5 = hashlib.md5() sha1 = hashlib.sha1() sha256 = hashlib.sha256() @@ -79,3 +128,7 @@ def getAllHashes(filename): sha256.update(data) return md5.hexdigest(), sha1.hexdigest(), sha256.hexdigest() + +def split_list(a, n): + k, m = divmod(len(a), n) + return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n)) diff --git a/kernelcollector/WebhookEmitter.py b/kernelcollector/webhook.py similarity index 100% rename from kernelcollector/WebhookEmitter.py rename to kernelcollector/webhook.py index 0d9f086..a41fa62 100644 --- a/kernelcollector/WebhookEmitter.py +++ b/kernelcollector/webhook.py @@ -1,5 +1,5 @@ -import requests import logging, time +import requests HEADERS = {'User-Agent': 'KernelCollector'} diff --git a/run.sh b/run.sh index 5bffc45..e3e60c6 100644 --- a/run.sh +++ b/run.sh @@ -2,4 +2,4 @@ parent_path=$( cd "$(dirname "${BASH_SOURCE}")" ; pwd -P ) cd "$parent_path" -python3 -m kernelcollector.Main +python3 -m kernelcollector.main