diff --git a/ocw/lib/dump_state.py b/ocw/lib/dump_state.py index 27b3a5bf..f7256626 100644 --- a/ocw/lib/dump_state.py +++ b/ocw/lib/dump_state.py @@ -1,7 +1,9 @@ +import os import logging import traceback from webui.PCWConfig import PCWConfig from ocw.lib.azure import Azure +from ocw.lib.ec2 import EC2 from ocw.enums import ProviderChoice from ocw.lib.influx import Influx @@ -9,6 +11,12 @@ def dump_state(): + if os.getenv("INFLUX_TOKEN") is None: + logger.warning("INFLUX_TOKEN is not set, dumping state is not possible") + return + if not PCWConfig.has("influxdb/url"): + logger.warning("pcw.ini missing influxdb configuration, dumping state is not possible") + return for namespace in PCWConfig.get_namespaces_for("influxdb"): try: providers = PCWConfig.get_providers_for("influxdb", namespace) @@ -38,6 +46,25 @@ def dump_state(): namespace, Azure(namespace).get_img_versions_count, ) + if ProviderChoice.EC2 in providers: + Influx().dump_resource( + ProviderChoice.EC2.value, + Influx.VMS_QUANTITY, + namespace, + EC2(namespace).count_all_instances + ) + Influx().dump_resource( + ProviderChoice.EC2.value, + Influx.IMAGES_QUANTITY, + namespace, + EC2(namespace).count_all_images + ) + Influx().dump_resource( + ProviderChoice.EC2.value, + Influx.VOLUMES_QUANTITY, + namespace, + EC2(namespace).count_all_volumes + ) except Exception: logger.exception( "[%s] Dump state failed!: \n %s", namespace, traceback.format_exc() diff --git a/ocw/lib/ec2.py b/ocw/lib/ec2.py index dab2b383..7a43a0c7 100644 --- a/ocw/lib/ec2.py +++ b/ocw/lib/ec2.py @@ -116,6 +116,13 @@ def volume_protected(self, volume: dict) -> bool: def list_instances(self, region: str) -> list: return list(self.ec2_resource(region).instances.all()) + def count_all_instances(self) -> int: + instance_quantity = 0 + for region in self.all_regions: + instances = self.list_instances(region=region) + instance_quantity += len(instances) + return instance_quantity + def get_all_regions(self) -> list: regions_resp = self.ec2_client(EC2.default_region).describe_regions() regions = [region['RegionName'] for region in regions_resp['Regions']] @@ -324,6 +331,20 @@ def report_cleanup_results(self, vpc_errors: list, vpc_notify: list, vpc_locked: if len(vpc_locked) > 0: send_mail('VPC deletion locked by running VMs', '\n'.join(vpc_locked)) + def count_all_images(self) -> int: + all_images_cnt = 0 + for region in self.all_regions: + response = self.ec2_client(region).describe_images(Owners=['self']) + all_images_cnt += len(response['Images']) + return all_images_cnt + + def count_all_volumes(self) -> int: + all_volumes_cnt = 0 + for region in self.all_regions: + response = self.ec2_client(region).describe_volumes() + all_volumes_cnt += len(response['Volumes']) + return all_volumes_cnt + def cleanup_images(self, valid_period_days: float) -> None: self.log_dbg('Call cleanup_images') for region in self.all_regions: diff --git a/ocw/lib/influx.py b/ocw/lib/influx.py index 774ebb50..9b921c5f 100644 --- a/ocw/lib/influx.py +++ b/ocw/lib/influx.py @@ -17,6 +17,7 @@ class Influx: VMS_QUANTITY: str = "vms_quantity" IMAGES_QUANTITY: str = "images_quantity" DISK_QUANTITY: str = "disk_quantity" + VOLUMES_QUANTITY: str = "volumes_quanity" IMAGE_VERSION_QUANTITY: str = "img_version_quantity" NAMESPACE_TAG: str = "namespace" diff --git a/requirements.txt b/requirements.txt index db282154..a75bfadc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,23 +1,23 @@ boto3 -azure-mgmt-compute==30.4.0 +azure-identity==1.16.0 +azure-mgmt-compute==31.0.0 +azure-mgmt-resource==23.1.1 azure-mgmt-storage==21.1.0 -azure-mgmt-resource==23.1.0b2 -azure-storage-blob==12.19.0 -azure-identity==1.14.1 +azure-storage-blob==12.20.0 cachetools msrestazure==0.6.4 uwsgi==2.0.24 -requests==2.32.1 +requests==2.32.2 Django~=5.0.6 django-tables2==2.7.0 django-filter==23.5 django-bootstrap5==24.2 texttable oauth2client -google-api-python-client==2.96.0 -google-cloud-storage==2.10.0 +google-api-python-client==2.131.0 +google-cloud-storage==2.16.0 openqa_client -openstacksdk~=1.5.0 +openstacksdk~=3.1.0 python-dateutil apscheduler kubernetes diff --git a/requirements_k8s.txt b/requirements_k8s.txt index 0589fe09..bf206a1d 100644 --- a/requirements_k8s.txt +++ b/requirements_k8s.txt @@ -1,8 +1,8 @@ oauth2client kubernetes -azure-cli==2.56.0 -google-api-python-client==2.96.0 -azure-mgmt-containerservice==28.0.0 -azure-identity==1.14.1 -azure-mgmt-resource==23.1.0b2 +google-api-python-client==2.131.0 +azure-cli==2.61.0 +azure-identity==1.16.0 +azure-mgmt-containerservice==30.0.0 +azure-mgmt-resource==23.1.1 boto3 diff --git a/tests/test_ec2.py b/tests/test_ec2.py index 34f4fe41..8db40914 100644 --- a/tests/test_ec2.py +++ b/tests/test_ec2.py @@ -47,6 +47,7 @@ def mocked_client(): mocked_ec2_resource.Vpc = mocked_vpc mocked_ec2_resource.meta = mocked_meta mocked_ec2_resource.VpcPeeringConnection = lambda id: MockedVpcPeeringConnection() + mocked_ec2_resource.instances = MockedCollectionWithAllMethod() mocked_meta.client = mocked_client # don't mix up this with EC2.delete_vpc . this one is boto3 side of the call mocked_client.delete_vpc = mocked_boto3_delete_vpc @@ -507,3 +508,24 @@ def mocked_get_boolean(config_path, field=None): ec2_patch.cleanup_all() assert called_stack == ['cleanup_images', 'cleanup_snapshots', 'cleanup_volumes', 'cleanup_vpcs'] + +def test_count_all_instances(ec2_patch): + assert ec2_patch.count_all_instances() == 1 + +def test_count_all_images(ec2_patch): + MockedEC2Client.response = { + 'Images': [ + {'Name': Faker().uuid4(), 'CreationDate': now_age_str, 'ImageId': 0}, + {'Name': Faker().uuid4(), 'CreationDate': older_than_max_age_str, 'ImageId': 2}, + ] + } + assert ec2_patch.count_all_images() == 2 + +def test_count_all_volumes(ec2_patch): + MockedEC2Client.response = { + 'Volumes': [{'VolumeId': MockedEC2Client.volumeid_to_delete, 'CreateTime': older_than_max_age_date}, + {'VolumeId': 'too_young_to_die', 'CreateTime': now_age_date}, + {'VolumeId': MockedEC2Client.volumeid_to_delete, 'CreateTime': older_than_max_age_date, + 'Tags': [{'Key': 'pcw_ignore', 'Value': '1'}]}, ] + } + assert ec2_patch.count_all_volumes() == 3