Skip to content

Commit 2669efe

Browse files
authored
Merge branch 'main' into rename-nebius-json
2 parents 7ac1fd3 + 371c0c2 commit 2669efe

File tree

15 files changed

+171
-52
lines changed

15 files changed

+171
-52
lines changed

.github/workflows/validation-nebius.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ jobs:
2525
- name: Set up Go
2626
uses: actions/setup-go@v4
2727
with:
28-
go-version-file: 'go.mod'
28+
go-version-file: "go.mod"
2929

3030
- name: Cache Go modules
3131
uses: actions/cache@v4
@@ -47,6 +47,10 @@ jobs:
4747
NEBIUS_SERVICE_ACCOUNT_ID: ${{ secrets.NEBIUS_SERVICE_ACCOUNT_ID }}
4848
NEBIUS_PROJECT_ID: ${{ secrets.NEBIUS_PROJECT_ID }}
4949
TEST_USER_PRIVATE_KEY_PEM_BASE64: ${{ secrets.TEST_USER_PRIVATE_KEY_PEM_BASE64 }}
50+
NEBIUS_SERVICE_ACCOUNT_JSON: ${{ secrets.NEBIUS_SERVICE_ACCOUNT_JSON }}
51+
NEBIUS_TENANT_ID: ${{ secrets.NEBIUS_TENANT_ID }}
52+
TEST_PRIVATE_KEY_BASE64: ${{ secrets.TEST_PRIVATE_KEY_BASE64 }}
53+
TEST_PUBLIC_KEY_BASE64: ${{ secrets.TEST_PUBLIC_KEY_BASE64 }}
5054
VALIDATION_TEST: true
5155
run: |
5256
cd v1/providers/nebius

internal/validation/suite.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,11 @@ func RunInstanceLifecycleValidation(t *testing.T, config ProviderConfig) {
6565
capabilities, err := client.GetCapabilities(ctx)
6666
require.NoError(t, err)
6767

68-
types, err := client.GetInstanceTypes(ctx, v1.GetInstanceTypeArgs{})
68+
types, err := client.GetInstanceTypes(ctx, v1.GetInstanceTypeArgs{
69+
ArchitectureFilter: &v1.ArchitectureFilter{
70+
IncludeArchitectures: []v1.Architecture{v1.ArchitectureX86_64},
71+
},
72+
})
6973
require.NoError(t, err)
7074
require.NotEmpty(t, types, "Should have instance types")
7175

v1/image.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ func validateOSVersion(ctx context.Context, sshClient *ssh.Client) (string, erro
112112
}
113113

114114
osVersion := strings.Trim(parts[1], "\"")
115-
ubuntuRegex := regexp.MustCompile(`Ubuntu 20\.04|22\.04`)
115+
ubuntuRegex := regexp.MustCompile(`Ubuntu 20\.04|22\.04|24\.04`)
116116
if !ubuntuRegex.MatchString(osVersion) {
117117
return "", fmt.Errorf("expected Ubuntu 20.04 or 22.04, got: %s", osVersion)
118118
}

v1/providers/aws/validation_kubernetes_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ import (
1111
)
1212

1313
func TestAWSKubernetesValidation(t *testing.T) {
14+
t.Skip("Skipping AWS Kubernetes validation tests")
15+
1416
if isValidationTest == "" {
1517
t.Skip("VALIDATION_TEST is not set, skipping AWS Kubernetes validation tests")
1618
}

v1/providers/aws/validation_network_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ var (
1616
)
1717

1818
func TestAWSNetworkValidation(t *testing.T) {
19+
t.Skip("Skipping AWS Network validation tests")
20+
1921
if isValidationTest == "" {
2022
t.Skip("VALIDATION_TEST is not set, skipping AWS Network validation tests")
2123
}

v1/providers/launchpad/instancetype.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"fmt"
66
"strconv"
77
"strings"
8+
"time"
89

910
"github.com/alecthomas/units"
1011
"github.com/bojanz/currency"
@@ -16,6 +17,10 @@ import (
1617

1718
const (
1819
brevDGXCWorkshopID = "brev-dgxc"
20+
dmzCloud = "dmz"
21+
22+
defaultEstimatedDeployTime = 15 * time.Minute
23+
dmzEstimatedDeployTime = 1 * time.Minute
1924
)
2025

2126
func (c *LaunchpadClient) GetInstanceTypes(ctx context.Context, args v1.GetInstanceTypeArgs) ([]v1.InstanceType, error) {
@@ -200,6 +205,13 @@ func launchpadInstanceTypeToInstanceType(launchpadInstanceType openapi.InstanceT
200205
}
201206
typeName := makeInstanceTypeName(info)
202207

208+
var estimatedDeployTime time.Duration
209+
if launchpadInstanceType.Cloud == dmzCloud {
210+
estimatedDeployTime = dmzEstimatedDeployTime
211+
} else {
212+
estimatedDeployTime = defaultEstimatedDeployTime
213+
}
214+
203215
it := &v1.InstanceType{
204216
Type: typeName,
205217
VCPU: launchpadInstanceType.Cpu,
@@ -214,6 +226,7 @@ func launchpadInstanceTypeToInstanceType(launchpadInstanceType openapi.InstanceT
214226
Provider: CloudProviderID,
215227
Cloud: launchpadInstanceType.Cloud,
216228
ReservedInstancePoolID: launchpadWorkshopIDToReservedInstancePoolID(info.workshopID),
229+
EstimatedDeployTime: &estimatedDeployTime,
217230
}
218231

219232
// Make the instance type ID

v1/providers/nebius/client.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,11 @@ func findProjectForRegion(ctx context.Context, sdk *gosdk.SDK, tenantID, region
130130
return "", fmt.Errorf("no projects found in tenant %s", tenantID)
131131
}
132132

133+
// TODO: I don't think the following code is correct, as the use of monikers like "default" or "default-project"
134+
// or even the nebius convention of "default-project-{region}" will work with the nebius SDK. The SDK expects
135+
// the project *ID* to be used, not the name. If we get to this part of the code, it likely implies that we will
136+
// not be able to proceed.
137+
133138
// Sort projects by ID for deterministic selection
134139
// This ensures CreateInstance and ListInstances always use the same project!
135140
sort.Slice(projects, func(i, j int) bool {

v1/providers/nebius/image.go

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,13 @@ func getImageDescription(image *compute.Image) string {
202202
return ""
203203
}
204204

205+
const (
206+
ArchitectureX86_64 = "x86_64"
207+
ArchitectureArm64 = "arm64"
208+
ArchitectureAMD64 = "amd64"
209+
ArchitectureAArch64 = "aarch64"
210+
)
211+
205212
// extractArchitecture extracts architecture information from image metadata
206213
func extractArchitecture(image *compute.Image) string {
207214
// Check labels for architecture info
@@ -217,16 +224,15 @@ func extractArchitecture(image *compute.Image) string {
217224
// Infer from image name
218225
if image.Metadata != nil {
219226
name := strings.ToLower(image.Metadata.Name)
220-
if strings.Contains(name, "arm64") || strings.Contains(name, "aarch64") {
221-
return "arm64"
227+
if strings.Contains(name, ArchitectureArm64) || strings.Contains(name, ArchitectureAArch64) {
228+
return ArchitectureArm64
222229
}
223-
if strings.Contains(name, "x86_64") || strings.Contains(name, "amd64") {
224-
//nolint:goconst // Architecture string used in detection and returned as default
225-
return "x86_64"
230+
if strings.Contains(name, ArchitectureX86_64) || strings.Contains(name, ArchitectureAMD64) {
231+
return ArchitectureX86_64
226232
}
227233
}
228234

229-
return "x86_64"
235+
return ArchitectureX86_64
230236
}
231237

232238
// filterImagesByArchitectures filters images by multiple architectures

v1/providers/nebius/instance.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,7 @@ func (c *NebiusClient) convertNebiusInstanceToV1(ctx context.Context, instance *
344344
InstanceType: instanceTypeID, // Full instance type ID (e.g., "gpu-h100-sxm.8gpu-128vcpu-1600gb")
345345
InstanceTypeID: v1.InstanceTypeID(instanceTypeID), // Same as InstanceType - required for dev-plane lookup
346346
ImageID: imageFamily,
347+
DiskSize: units.Base2Bytes(diskSize),
347348
DiskSizeBytes: v1.NewBytes(v1.BytesValue(diskSize), v1.Byte), // diskSize is already in bytes from getBootDiskSize
348349
Tags: tags,
349350
Status: v1.Status{LifecycleStatus: lifecycleStatus},
@@ -1150,6 +1151,10 @@ func (c *NebiusClient) createBootDisk(ctx context.Context, attrs v1.CreateInstan
11501151

11511152
// buildDiskCreateRequest builds a disk creation request, trying image family first, then image ID
11521153
func (c *NebiusClient) buildDiskCreateRequest(ctx context.Context, diskName string, attrs v1.CreateInstanceAttrs) (*compute.CreateDiskRequest, error) {
1154+
if attrs.DiskSize == 0 {
1155+
attrs.DiskSize = 1280 * units.Gibibyte // Defaulted by the Nebius Console
1156+
}
1157+
11531158
baseReq := &compute.CreateDiskRequest{
11541159
Metadata: &common.ResourceMetadata{
11551160
ParentId: c.projectID,
@@ -1553,7 +1558,6 @@ func (c *NebiusClient) resolveImageFamily(ctx context.Context, imageID string) (
15531558
"mk8s-worker-node-v-1-31-ubuntu24.04-cuda12",
15541559
"ubuntu22.04",
15551560
"ubuntu20.04",
1556-
"ubuntu18.04",
15571561
}
15581562

15591563
// Check if ImageID is already a known family name
@@ -1600,9 +1604,6 @@ func (c *NebiusClient) resolveImageFamily(ctx context.Context, imageID string) (
16001604
if strings.Contains(name, "ubuntu20") || strings.Contains(name, "ubuntu-20") {
16011605
return "ubuntu20.04", nil
16021606
}
1603-
if strings.Contains(name, "ubuntu18") || strings.Contains(name, "ubuntu-18") {
1604-
return "ubuntu18.04", nil
1605-
}
16061607
}
16071608

16081609
// Default fallback - use the original ImageID as family

v1/providers/nebius/instancetype.go

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,13 @@ func (c *NebiusClient) GetInstanceTypes(ctx context.Context, args v1.GetInstance
2929
// Default behavior: check ALL regions to show all available quota
3030
var locations []v1.Location
3131

32-
if len(args.Locations) > 0 && !args.Locations.IsAll() {
32+
if args.Locations.IsAll() { //nolint:gocritic // prefer if statement over switch statement
33+
allLocations, err := c.GetLocations(ctx, v1.GetLocationsArgs{})
34+
if err != nil {
35+
return nil, errors.WrapAndTrace(err)
36+
}
37+
locations = allLocations
38+
} else if len(args.Locations) > 0 {
3339
// User requested specific locations - filter to those
3440
allLocations, err := c.GetLocations(ctx, v1.GetLocationsArgs{})
3541
if err == nil {
@@ -48,15 +54,8 @@ func (c *NebiusClient) GetInstanceTypes(ctx context.Context, args v1.GetInstance
4854
locations = []v1.Location{{Name: c.location}}
4955
}
5056
} else {
51-
// Default behavior: enumerate ALL regions for quota-aware discovery
52-
// This shows users all instance types they have quota for, regardless of region
53-
allLocations, err := c.GetLocations(ctx, v1.GetLocationsArgs{})
54-
if err == nil {
55-
locations = allLocations
56-
} else {
57-
// Fallback to client's configured location if we can't get all locations
58-
locations = []v1.Location{{Name: c.location}}
59-
}
57+
// Fallback to client's configured location if we can't get all locations
58+
locations = []v1.Location{{Name: c.location}}
6059
}
6160

6261
// Get quota information for all regions
@@ -176,10 +175,10 @@ func (c *NebiusClient) getInstanceTypesForLocation(ctx context.Context, platform
176175

177176
// Convert Nebius platform preset to our InstanceType format
178177
instanceType := v1.InstanceType{
179-
ID: v1.InstanceTypeID(instanceTypeID), // Dot-separated format (e.g., "gpu-h100-sxm.8gpu-128vcpu-1600gb")
180178
Location: location.Name,
181179
Type: instanceTypeID, // Same as ID - both use dot-separated format
182180
VCPU: preset.Resources.VcpuCount,
181+
Memory: units.Base2Bytes(preset.Resources.MemoryGibibytes) * units.Gibibyte,
183182
MemoryBytes: v1.NewBytes(v1.BytesValue(preset.Resources.MemoryGibibytes), v1.Gibibyte), // Memory in GiB
184183
NetworkPerformance: "standard", // Default network performance
185184
IsAvailable: isAvailable,
@@ -191,12 +190,14 @@ func (c *NebiusClient) getInstanceTypesForLocation(ctx context.Context, platform
191190

192191
// Add GPU information if available
193192
if preset.Resources.GpuCount > 0 && !isCPUOnly {
193+
memory := getGPUMemory(gpuType)
194194
gpu := v1.GPU{
195195
Count: preset.Resources.GpuCount,
196196
Type: gpuType,
197197
Name: gpuName,
198198
Manufacturer: v1.ManufacturerNVIDIA, // Nebius currently only supports NVIDIA GPUs
199-
Memory: getGPUMemory(gpuType), // Populate VRAM based on GPU type
199+
Memory: memory, // Populate VRAM based on GPU type
200+
MemoryBytes: v1.NewBytes(v1.BytesValue(int64(memory)/int64(units.Gibibyte)), v1.Gibibyte),
200201
}
201202
instanceType.SupportedGPUs = []v1.GPU{gpu}
202203
}
@@ -207,6 +208,9 @@ func (c *NebiusClient) getInstanceTypesForLocation(ctx context.Context, platform
207208
instanceType.BasePrice = pricing
208209
}
209210

211+
// Make the instance type ID
212+
instanceType.ID = v1.MakeGenericInstanceTypeID(instanceType)
213+
210214
instanceTypes = append(instanceTypes, instanceType)
211215
}
212216
}
@@ -368,7 +372,9 @@ func (c *NebiusClient) buildSupportedStorage() []v1.Storage {
368372
// Nebius supports dynamically allocatable network SSD disks
369373
// Minimum: 50GB, Maximum: 2560GB
370374
minSize := 50 * units.GiB
375+
minSizeBytes := v1.NewBytes(50, v1.Gibibyte)
371376
maxSize := 2560 * units.GiB
377+
maxSizeBytes := v1.NewBytes(2560, v1.Gibibyte)
372378

373379
// Pricing is roughly $0.10 per GB-month, which is ~$0.00014 per GB-hour
374380
pricePerGBHr, _ := currency.NewAmount("0.00014", "USD")
@@ -379,6 +385,8 @@ func (c *NebiusClient) buildSupportedStorage() []v1.Storage {
379385
Count: 1,
380386
MinSize: &minSize,
381387
MaxSize: &maxSize,
388+
MinSizeBytes: &minSizeBytes,
389+
MaxSizeBytes: &maxSizeBytes,
382390
IsElastic: true,
383391
PricePerGBHr: &pricePerGBHr,
384392
},
@@ -396,7 +404,7 @@ func (c *NebiusClient) applyInstanceTypeFilters(instanceTypes []v1.InstanceType,
396404
if len(args.InstanceTypes) > 0 {
397405
found := false
398406
for _, requestedType := range args.InstanceTypes {
399-
if string(instanceType.ID) == requestedType {
407+
if instanceType.Type == requestedType {
400408
found = true
401409
break
402410
}

0 commit comments

Comments
 (0)