From e99d2945e17682b51503e64730ab13deac6d4097 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tiago=20Magalh=C3=A3es?= Date: Mon, 13 Feb 2023 13:17:17 +0000 Subject: [PATCH 1/2] add delete batches --- dbcleanup/management/commands/dbcleanup.py | 56 +++++++++++++++++----- 1 file changed, 44 insertions(+), 12 deletions(-) diff --git a/dbcleanup/management/commands/dbcleanup.py b/dbcleanup/management/commands/dbcleanup.py index cdf7e8c..ff82e3b 100644 --- a/dbcleanup/management/commands/dbcleanup.py +++ b/dbcleanup/management/commands/dbcleanup.py @@ -1,7 +1,7 @@ from django.core.management import CommandError, BaseCommand from django.conf import settings from django.db import connection, transaction -from django.db.models import ManyToManyField +from django.db.models import ManyToManyField, Max, Min, Q from django.utils import timezone from django.contrib.contenttypes.models import ContentType from django.db.migrations.loader import MigrationLoader @@ -9,6 +9,7 @@ from dbcleanup import utils, models REQUIRED_TABLES = {'django_migrations'} +BATCH_SIZE = 5000 class Command(BaseCommand): @@ -139,21 +140,52 @@ def _clean_history(self, options): ct = ContentType.objects.get_by_natural_key(*model_tuple) # normalize model name to match against .delete() return labels (and for capitalized printing!) model = ct.model_class()._meta.label - q = ct.get_all_objects_for_this_type(**{f'{field}__lt': timezone.now() - timezone.timedelta(days=log_size)}) + q = ct.get_all_objects_for_this_type() + filtered = q.filter( + **{f"{field}__lt": timezone.now() - timezone.timedelta(days=log_size)} + ).aggregate(Min("id"), Max("id")) + min_id = filtered["id__min"] + max_id = filtered["id__max"] + rows_deleted = {} - try: - deleted, rows_deleted = self._clean_history_intention(model, q, options) - except CascadeException as e: - _exit = 1 - self.stderr.write(f'{model} cleanup aborted as it would cascade to:\n') - self._clean_history_print(e.args[2].items(), err=True) - continue + while True: + batch = q.filter( + Q(id__lte=min_id + BATCH_SIZE), + Q(id__gte=min_id), + Q( + **{ + f"{field}__lt": timezone.now() + - timezone.timedelta(days=log_size) + } + ), + ) + if batch: + try: + deleted, batch_rows_deleted = self._clean_history_intention( + model, batch, options + ) + for k, v in batch_rows_deleted.items(): + if rows_deleted.get(k): + rows_deleted[k] = rows_deleted[k] + v + else: + rows_deleted.update(batch_rows_deleted) + break + except CascadeException as e: + _exit = 1 + self.stderr.write( + f"{model} cleanup aborted as it would cascade to:\n" + ) + self._clean_history_print(e.args[2].items(), err=True) + continue + min_id += BATCH_SIZE + if min_id > max_id: + break if deleted: - if options['force'] or options['interactive']: - self.stdout.write(f'{model} cleanup deleted:\n') + if options["force"] or options["interactive"]: + self.stdout.write(f"{model} cleanup deleted:\n") else: - self.stdout.write(f'{model} cleanup would delete:\n') + self.stdout.write(f"{model} cleanup would delete:\n") self._clean_history_print(rows_deleted.items()) return _exit From c83ceb149287b635fa103a5a382e68d06b91428b Mon Sep 17 00:00:00 2001 From: Gustavo Silva Date: Tue, 21 Feb 2023 00:13:03 +0000 Subject: [PATCH 2/2] misc: run black on files --- dbcleanup/management/commands/dbcleanup.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/dbcleanup/management/commands/dbcleanup.py b/dbcleanup/management/commands/dbcleanup.py index ff82e3b..c3c9ea8 100644 --- a/dbcleanup/management/commands/dbcleanup.py +++ b/dbcleanup/management/commands/dbcleanup.py @@ -141,9 +141,9 @@ def _clean_history(self, options): # normalize model name to match against .delete() return labels (and for capitalized printing!) model = ct.model_class()._meta.label q = ct.get_all_objects_for_this_type() - filtered = q.filter( - **{f"{field}__lt": timezone.now() - timezone.timedelta(days=log_size)} - ).aggregate(Min("id"), Max("id")) + filtered = q.filter(**{f"{field}__lt": timezone.now() - timezone.timedelta(days=log_size)}).aggregate( + Min("id"), Max("id") + ) min_id = filtered["id__min"] max_id = filtered["id__max"] rows_deleted = {} @@ -152,18 +152,11 @@ def _clean_history(self, options): batch = q.filter( Q(id__lte=min_id + BATCH_SIZE), Q(id__gte=min_id), - Q( - **{ - f"{field}__lt": timezone.now() - - timezone.timedelta(days=log_size) - } - ), + Q(**{f"{field}__lt": timezone.now() - timezone.timedelta(days=log_size)}), ) if batch: try: - deleted, batch_rows_deleted = self._clean_history_intention( - model, batch, options - ) + deleted, batch_rows_deleted = self._clean_history_intention(model, batch, options) for k, v in batch_rows_deleted.items(): if rows_deleted.get(k): rows_deleted[k] = rows_deleted[k] + v @@ -173,9 +166,7 @@ def _clean_history(self, options): except CascadeException as e: _exit = 1 - self.stderr.write( - f"{model} cleanup aborted as it would cascade to:\n" - ) + self.stderr.write(f"{model} cleanup aborted as it would cascade to:\n") self._clean_history_print(e.args[2].items(), err=True) continue min_id += BATCH_SIZE