Skip to content

Commit c5b48df

Browse files
committed
Initial commit - Go plugin for natively launching Spark applications on Kubernetes
1 parent 3f0d3a8 commit c5b48df

24 files changed

+3988
-4
lines changed

README.md

Lines changed: 69 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,75 @@
1-
# README
1+
# Native Submit Plugin for Spark Operator
22

3-
A repo containing all the basic file templates and general guidelines for any open source project at Salesforce.
3+
A high-performance alternative to `spark-submit` for launching Spark applications via the Spark Operator in Kubernetes clusters. This plugin eliminates the JVM spin-up overhead associated with traditional `spark-submit` commands, providing faster application startup times.
4+
5+
## Features
6+
7+
- 🚀 Native implementation bypassing JVM overhead
8+
- ⚡ Faster Spark application startup
9+
- 🔧 Flexible configuration options
10+
- 🔒 Secure execution environment
11+
- 📊 Resource management and optimization
12+
- 🔄 Support for various Spark application types (Java, Scala, Python, R)
13+
14+
## Prerequisites
15+
16+
17+
- Kubernetes cluster
18+
- Spark Operator installed in the cluster
19+
- kubectl configured to access the cluster
20+
21+
## Installation
22+
23+
1. Clone the repository:
24+
```bash
25+
git clone https://github.com/your-org/native-submit-plugin.git
26+
cd native-submit-plugin
27+
```
28+
29+
2. Build the plugin:
30+
```bash
31+
go build -buildmode=plugin -o plugin.so ./main
32+
```
33+
34+
3. Deploy the plugin to your cluster:
35+
```bash
36+
kubectl apply -f deploy/
37+
```
438

539
## Usage
40+
native-submit will be plugin to spark operator.
41+
42+
43+
## Architecture
44+
45+
The plugin consists of several components:
46+
47+
- `common/`: Shared utilities and constants
48+
- `driver/`: Driver pod management
49+
- `service/`: Core service implementation
50+
- `configmap/`: Configuration management
51+
- `main/`: Plugin entry point
52+
53+
54+
### Building
55+
56+
```bash
57+
# Build the plugin
58+
go build -buildmode=plugin -o plugin.so ./main
59+
60+
# Run tests
61+
go test -v ./...
62+
```
63+
64+
### Testing
665

7-
It's required that all files must be placed at the top level of your repository.
66+
```bash
67+
# Run unit tests
68+
go test -v ./...
869

9-
> **NOTE** Your README should contain detailed, useful information about the project!
70+
# Run tests with coverage
71+
go test -cover ./...
1072

73+
# Run specific package tests
74+
go test -v ./pkg/...
75+
```

common/common-util.go

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
package common
2+
3+
import (
4+
"fmt"
5+
"reflect"
6+
"strconv"
7+
8+
"github.com/kubeflow/spark-operator/api/v1beta2"
9+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
10+
)
11+
12+
func Int32Pointer(a int32) *int32 {
13+
return &a
14+
}
15+
func LabelsForSpark() map[string]string {
16+
return map[string]string{"version": "3.0.1"}
17+
}
18+
func BoolPointer(a bool) *bool {
19+
return &a
20+
}
21+
22+
func StringPointer(a string) *string {
23+
return &a
24+
}
25+
26+
func GetAppNamespace(app *v1beta2.SparkApplication) string {
27+
namespace := "default"
28+
if app.Namespace != "" {
29+
namespace = app.Namespace
30+
} else {
31+
spakConfNamespace, namespaceExists := app.Spec.SparkConf[SparkAppNamespaceKey]
32+
if namespaceExists {
33+
namespace = spakConfNamespace
34+
}
35+
}
36+
return namespace
37+
}
38+
39+
// Helper func to get driver pod name from Spark Application CRD instance
40+
func GetDriverPodName(app *v1beta2.SparkApplication) string {
41+
name := app.Spec.Driver.PodName
42+
if name != nil && len(*name) > 0 {
43+
return *name
44+
}
45+
sparkConf := app.Spec.SparkConf
46+
if sparkConf[SparkDriverPodNameKey] != "" {
47+
return sparkConf[SparkDriverPodNameKey]
48+
}
49+
return fmt.Sprintf("%s-driver", app.Name)
50+
}
51+
func GetDriverPort(sparkConfKeyValuePairs map[string]string) int {
52+
//Checking if port information is passed in the spec, and using same
53+
// or using the default ones
54+
driverPortToBeUsed := DefaultDriverPort
55+
driverPort, valueExists := sparkConfKeyValuePairs[SparkDriverPort]
56+
if valueExists {
57+
driverPortSupplied, err := strconv.Atoi(driverPort)
58+
if err != nil {
59+
panic("Driver Port not parseable - hence failing the spark submit" + fmt.Sprint(err))
60+
} else {
61+
driverPortToBeUsed = driverPortSupplied
62+
}
63+
}
64+
return driverPortToBeUsed
65+
}
66+
67+
// Helper func to get Owner references to be added to Spark Application resources - pod, service, configmap
68+
func GetOwnerReference(app *v1beta2.SparkApplication) *metav1.OwnerReference {
69+
controller := false
70+
return &metav1.OwnerReference{
71+
APIVersion: v1beta2.SchemeGroupVersion.String(),
72+
Kind: reflect.TypeOf(v1beta2.SparkApplication{}).Name(),
73+
Name: app.Name,
74+
UID: app.UID,
75+
Controller: &controller,
76+
}
77+
}
78+
79+
func GetMemoryOverheadFactor(app *v1beta2.SparkApplication) string {
80+
var memoryOverheadFactor string
81+
if app.Spec.Type == v1beta2.SparkApplicationTypeJava || app.Spec.Type == v1beta2.SparkApplicationTypeScala {
82+
memoryOverheadFactor = JavaScalaMemoryOverheadFactor
83+
} else {
84+
85+
memoryOverheadFactor = OtherLanguageMemoryOverheadFactor
86+
}
87+
return memoryOverheadFactor
88+
}
89+
func CheckSparkConf(sparkConf map[string]string, configKey string) bool {
90+
valueExists := false
91+
_, valueExists = sparkConf[configKey]
92+
return valueExists
93+
}
94+
func Int64Pointer(a int64) *int64 {
95+
return &a
96+
}

0 commit comments

Comments
 (0)