Skip to content

Commit 7d9e4c7

Browse files
Add current cluster as parent resoruce for compute nodes
1 parent f64bcd5 commit 7d9e4c7

File tree

2 files changed

+46
-11
lines changed

2 files changed

+46
-11
lines changed

coldfront/core/resource/models.py

+2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
from django.db import models
44
from django.conf import settings
55
from django.contrib.auth.models import Group
6+
from django.utils.translation import gettext_lazy as _
67
from django.core.exceptions import ValidationError, ObjectDoesNotExist
8+
from model_utils.fields import AutoLastModifiedField
79
from model_utils.models import TimeStampedModel
810
from simple_history.models import HistoricalRecords
911

coldfront/plugins/slurm/management/commands/slurm_manage_resources.py

+44-11
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@
33
import re
44
from functools import reduce
55
from cProfile import Profile
6+
from django.utils import timezone
67

7-
from django.core.management.base import BaseCommand, CommandError
8+
from django.core.management.base import BaseCommand
89
from simple_history.utils import bulk_update_with_history, bulk_create_with_history
910

1011
from coldfront.core.resource.models import ResourceType, ResourceAttribute, ResourceAttributeType, AttributeType, Resource
1112
from coldfront.core.project.models import Project
1213
from coldfront.plugins.slurm.utils import slurm_get_nodes_info
13-
from django.utils.datetime_safe import datetime
1414

1515
logger = logging.getLogger(__name__)
1616

@@ -68,13 +68,19 @@ def calculate_owner_value(project_list, row):
6868
return'FASRC'
6969
return owner_name
7070

71+
def get_cluster():
72+
return Resource.objects.get(resource_type__name='Cluster')
73+
74+
7175
env = options['environment'] or 'production'
7276
if 'dev' in env:
73-
output = self.get_output_from_file(os.path.join(os.getcwd(), 'coldfront/plugins/slurm/management/commands/sinfo_output.txt'))
77+
output = self.get_output_from_file(os.path.join(os.getcwd(), 'coldfront/plugins/slurm/management/commands/sinfo.txt'))
7478
else:
7579
output = slurm_get_nodes_info()
7680
logger.debug(f'Running on {env} mode')
81+
modify_history_date = timezone.now()
7782
project_list = Project.objects.all()
83+
current_cluster = get_cluster()
7884
compute_node = ResourceType.objects.get(name='Compute Node')
7985
attribute_type_name_list = ['GPU Count', 'Core Count', 'Features', 'Owner', 'ServiceEnd']
8086
partition_resource_type = ResourceType.objects.get(name='Cluster Partition')
@@ -96,7 +102,14 @@ def calculate_owner_value(project_list, row):
96102
bulk_update_resource = []
97103
processed_resource_attribute = []
98104
for row in output:
99-
new_resource, compute_node_created_created = Resource.objects.get_or_create(name=row['nodelist'], defaults={'is_allocatable':False, 'resource_type':compute_node})
105+
new_resource, compute_node_created_created = Resource.objects.get_or_create(
106+
name=row['nodelist'],
107+
defaults={
108+
'is_allocatable':False,
109+
'resource_type':compute_node,
110+
'parent_resource':current_cluster
111+
}
112+
)
100113
Resource.objects.get_or_create(name=row['partition'], defaults={'resource_type':partition_resource_type})
101114

102115
gpu_count = ResourceAttribute(resource_attribute_type=gpu_count_attribute_type, resource=new_resource, value=calculate_gpu_count(row['gres']))
@@ -143,13 +156,27 @@ def calculate_owner_value(project_list, row):
143156
new_resource.is_available = True
144157
bulk_update_resource.append(new_resource)
145158
service_end_pk = existing_resource_attributes_pk_map[f"{row['nodelist']} {service_end_attribute_type.name}"]
146-
bulk_update_resource_attribute.append(ResourceAttribute(resource=new_resource, value=None, resource_attribute_type=service_end_attribute_type, pk=service_end_pk))
159+
bulk_update_resource_attribute.append(
160+
ResourceAttribute(
161+
resource=new_resource, value=None,
162+
resource_attribute_type=service_end_attribute_type,
163+
pk=service_end_pk,
164+
modified=modify_history_date
165+
)
166+
)
147167
processed_resources.add(new_resource.name)
148168
try:
149169
logger.debug(f'Updating {len(bulk_update_resource_attribute)} ResourceAttribute records')
150-
bulk_update_with_history(bulk_update_resource_attribute, ResourceAttribute, ['value'], batch_size=500, default_change_reason='slurm_manage_resource command')
170+
bulk_update_with_history(
171+
bulk_update_resource_attribute, ResourceAttribute, ['value'],
172+
batch_size=500, default_change_reason='slurm_manage_resource command',
173+
default_date=modify_history_date
174+
)
151175
logger.debug(f'Updating {len(bulk_update_resource)} Resource records')
152-
bulk_update_with_history(bulk_update_resource, Resource, ['is_available'], batch_size=500, default_change_reason='slurm_manage_resource command')
176+
bulk_update_with_history(
177+
bulk_update_resource, Resource, ['is_available'], batch_size=500,
178+
default_change_reason='slurm_manage_resource command', default_date=modify_history_date
179+
)
153180
logger.debug(f'Creating {len(bulk_create_resource_attribute)} ResourceAttribute records')
154181
bulk_create_with_history(bulk_create_resource_attribute, ResourceAttribute, batch_size=500, default_change_reason='slurm_manage_resource command')
155182
except Exception as e:
@@ -161,19 +188,25 @@ def calculate_owner_value(project_list, row):
161188
for resource_to_delete in Resource.objects.exclude(name__in=list(processed_resources)).filter(is_available=True, resource_type=compute_node):
162189
resource_to_delete.is_available = False
163190
bulk_update_resource.append(resource_to_delete)
164-
service_end = ResourceAttribute(resource=resource_to_delete, value=str(datetime.now()), resource_attribute_type=service_end_attribute_type)
191+
service_end = ResourceAttribute(resource=resource_to_delete, value=modify_history_date, resource_attribute_type=service_end_attribute_type)
165192
if f"{resource_to_delete.name} {service_end_attribute_type.name}" in existing_resource_attributes_check:
166193
service_end.pk = existing_resource_attributes_pk_map[f"{resource_to_delete.name} {service_end_attribute_type.name}"]
167194
bulk_update_resource_attribute.append(service_end)
168195
else:
169196
bulk_create_resource_attribute.append(service_end)
170197
try:
171-
logger.debug(f'Decommissioning {len(bulk_update_resource)} Resource records')
172-
bulk_update_with_history(bulk_update_resource, Resource, ['is_available'], batch_size=500, default_change_reason='slurm_manage_resource command')
198+
logger.debug(f'Decommissioning {bulk_update_resource} Resource records')
199+
bulk_update_with_history(
200+
bulk_update_resource, Resource, ['is_available'], batch_size=500,
201+
default_change_reason='slurm_manage_resource command', default_date=modify_history_date
202+
)
173203
logger.debug(f'Creating {len(bulk_create_resource_attribute)} ServiceEnd ResourceAttribute records')
174204
bulk_create_with_history(bulk_create_resource_attribute, ResourceAttribute, batch_size=500, default_change_reason='slurm_manage_resource command')
175205
logger.debug(f'Updating {len(bulk_update_resource_attribute)} ServiceEnd ResourceAttribute records')
176-
bulk_update_with_history(bulk_update_resource_attribute, ResourceAttribute, ['value'], batch_size=500, default_change_reason='slurm_manage_resource command')
206+
bulk_update_with_history(
207+
bulk_update_resource_attribute, ResourceAttribute, ['value'], batch_size=500,
208+
default_change_reason='slurm_manage_resource command', default_date=modify_history_date
209+
)
177210
except Exception as e:
178211
logger.error(f'Error cleaning up resources: {str(e)}')
179212
raise

0 commit comments

Comments
 (0)