diff --git a/src/images/README.md b/src/images/README.md index 6b91837..4535422 100644 --- a/src/images/README.md +++ b/src/images/README.md @@ -35,3 +35,108 @@ sh run_tests.sh ### nvidia-smi: command not found - download [nvidia container toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installing-the-nvidia-container-toolkit) + +# mist/images Package Documentation + +## Overview + +The `mist/images` package provides a `ContainerMgr` struct and related methods for managing Docker containers and volumes programmatically using the Docker Go SDK. It enforces limits on the number of containers and volumes, and provides safe creation, deletion, and lifecycle management. + +--- + +## Main APIs + +### `type ContainerMgr struct` +Manages Docker containers and volumes, enforces resource limits, and tracks active resources. + +**Fields:** +- `ctx context.Context` — Context for Docker operations. +- `cli *client.Client` — Docker client. +- `containerLimit int` — Maximum allowed containers. +- `volumeLimit int` — Maximum allowed volumes. +- `containers map[string]struct{}` — Tracks active container IDs. +- `volumes map[string]struct{}` — Tracks active volume names. +- `mu sync.Mutex` — Mutex for concurrency protection. + +--- + +### `func NewContainerMgr(client *client.Client, containerLimit, volumeLimit int) *ContainerMgr` +Creates a new `ContainerMgr` with the specified Docker client and resource limits. + +--- + +### `func (mgr *ContainerMgr) createVolume(volumeName string) (volume.Volume, error)` +Creates a Docker volume with the given name, enforcing the volume limit. +Returns the created volume or an error. + +--- + +### `func (mgr *ContainerMgr) removeVolume(volumeName string, force bool) error` +Removes a Docker volume by name. +Returns an error if the volume does not exist or is in use (unless `force` is true). + +--- + +### `func (mgr *ContainerMgr) runContainer(imageName string, runtimeName string, volumeName string) (string, error)` +Creates and starts a container with the specified image, runtime, and volume attached at `/data`. +Enforces the container limit. +Returns the container ID or an error. + +--- + +### `func (mgr *ContainerMgr) stopContainer(containerID string) error` +Stops a running container by ID. +Returns an error if the operation fails. + +--- + +### `func (mgr *ContainerMgr) removeContainer(containerID string) error` +Removes a container by ID and deletes it from the internal tracking map. +Returns an error if the operation fails. + +--- + +## Test Plan + +The test suite (`serve_image_test.go`) covers the following scenarios: + +- Create a volume, check it exists, delete it, check it no longer exists. +- Create a volume with the same name twice (should not fail). +- Remove a volume that doesn't exist (should fail or return error). +- Remove a volume in use (should fail or return error). +- Attach a volume that does not exist (should fail or return error). +- Two containers attach to the same volume (should succeed in Docker, but test for your policy). +- Two containers try to attach to the same volume at the same time (should succeed in Docker). +- Set a limit of 100 volumes (should fail on 101st if you enforce a limit). +- Set a limit of 10 containers (should fail on 11th if you enforce a limit). + +--- + +## Example Usage + +```go +cli, _ := client.NewClientWithOpts(client.FromEnv) +mgr := NewContainerMgr(cli, 10, 100) + +vol, err := mgr.createVolume("myvol") +if err != nil { /* handle error */ } + +cid, err := mgr.runContainerCuda("myvol") +if err != nil { /* handle error */ } + +_ = mgr.stopContainer(cid) +_ = mgr.removeContainer(cid) +_ = mgr.removeVolume("myvol", true) +``` + +--- + +## Notes + +- All resource-creating methods enforce limits and track resources in maps for accurate management. +- All destructive operations (`removeVolume`, `removeContainer`) return errors for non-existent or in-use resources. +- The package is designed for integration with Docker and expects a running Docker daemon. + +--- + +**For more details, see the source code and comments in `serve_image.go \ No newline at end of file diff --git a/src/images/go.mod b/src/images/go.mod new file mode 100644 index 0000000..213ae47 --- /dev/null +++ b/src/images/go.mod @@ -0,0 +1,35 @@ +module mist/images + +go 1.24.6 + +require github.com/docker/docker v28.2.2+incompatible + +require ( + github.com/Microsoft/go-winio v0.4.21 // indirect + github.com/containerd/errdefs v1.0.0 // indirect + github.com/containerd/errdefs/pkg v0.3.0 // indirect + github.com/containerd/log v0.1.0 // indirect + github.com/distribution/reference v0.6.0 // indirect + github.com/docker/go-connections v0.6.0 // indirect + github.com/docker/go-units v0.5.0 // indirect + github.com/felixge/httpsnoop v1.0.4 // indirect + github.com/go-logr/logr v1.4.3 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/moby/docker-image-spec v1.3.1 // indirect + github.com/moby/sys/atomicwriter v0.1.0 // indirect + github.com/moby/term v0.5.2 // indirect + github.com/morikuni/aec v1.0.0 // indirect + github.com/opencontainers/go-digest v1.0.0 // indirect + github.com/opencontainers/image-spec v1.1.1 // indirect + github.com/pkg/errors v0.9.1 // indirect + go.opentelemetry.io/auto/sdk v1.1.0 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 // indirect + go.opentelemetry.io/otel v1.38.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0 // indirect + go.opentelemetry.io/otel/metric v1.38.0 // indirect + go.opentelemetry.io/otel/trace v1.38.0 // indirect + golang.org/x/sys v0.35.0 // indirect + golang.org/x/time v0.12.0 // indirect + gotest.tools/v3 v3.5.2 // indirect +) diff --git a/src/images/go.sum b/src/images/go.sum new file mode 100644 index 0000000..be7bb0e --- /dev/null +++ b/src/images/go.sum @@ -0,0 +1,132 @@ +github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c h1:udKWzYgxTojEKWjV8V+WSxDXJ4NFATAsZjh8iIbsQIg= +github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= +github.com/Microsoft/go-winio v0.4.21 h1:+6mVbXh4wPzUrl1COX9A+ZCvEpYsOBZ6/+kwDnvLyro= +github.com/Microsoft/go-winio v0.4.21/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84= +github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM= +github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= +github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI= +github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M= +github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE= +github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk= +github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= +github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk= +github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= +github.com/docker/docker v28.2.2+incompatible h1:CjwRSksz8Yo4+RmQ339Dp/D2tGO5JxwYeqtMOEe0LDw= +github.com/docker/docker v28.2.2+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= +github.com/docker/go-connections v0.6.0 h1:LlMG9azAe1TqfR7sO+NJttz1gy6KO7VJBh+pMmjSD94= +github.com/docker/go-connections v0.6.0/go.mod h1:AahvXYshr6JgfUJGdDCs2b5EZG/vmaMAntpSFH5BFKE= +github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= +github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= +github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= +github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 h1:8Tjv8EJ+pM1xP8mK6egEbD1OgnVTyacbefKhmbLhIhU= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2/go.mod h1:pkJQ2tZHJ0aFOVEEot6oZmaVEZcRme73eIFmhiVuRWs= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0= +github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo= +github.com/moby/sys/atomicwriter v0.1.0 h1:kw5D/EqkBwsBFi0ss9v1VG3wIkVhzGvLklJ+w3A14Sw= +github.com/moby/sys/atomicwriter v0.1.0/go.mod h1:Ul8oqv2ZMNHOceF643P6FKPXeCmYtlQMvpizfsSoaWs= +github.com/moby/sys/sequential v0.6.0 h1:qrx7XFUd/5DxtqcoH1h438hF5TmOvzC/lspjy7zgvCU= +github.com/moby/sys/sequential v0.6.0/go.mod h1:uyv8EUTrca5PnDsdMGXhZe6CCe8U/UiTWd+lL+7b/Ko= +github.com/moby/term v0.5.2 h1:6qk3FJAFDs6i/q3W/pQ97SX192qKfZgGjCQqfCJkgzQ= +github.com/moby/term v0.5.2/go.mod h1:d3djjFCrjnB+fl8NJux+EJzu0msscUP+f8it8hPkFLc= +github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= +github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= +github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= +github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= +github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040= +github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= +github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= +github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= +go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 h1:RbKq8BG0FI8OiXhBfcRtqqHcZcka+gU3cskNuf05R18= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0/go.mod h1:h06DGIukJOevXaj/xrNjhi/2098RZzcLTbc0jDAUbsg= +go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8= +go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 h1:GqRJVj7UmLjCVyVJ3ZFLdPRmhDUp2zFmQe3RHIOsw24= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0/go.mod h1:ri3aaHSmCTVYu2AWv44YMauwAQc0aqI9gHKIcSbI1pU= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0 h1:aTL7F04bJHUlztTsNGJ2l+6he8c+y/b//eR0jjjemT4= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0/go.mod h1:kldtb7jDTeol0l3ewcmd8SDvx3EmIE7lyvqbasU3QC4= +go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA= +go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI= +go.opentelemetry.io/otel/sdk v1.38.0 h1:l48sr5YbNf2hpCUj/FoGhW9yDkl+Ma+LrVl8qaM5b+E= +go.opentelemetry.io/otel/sdk v1.38.0/go.mod h1:ghmNdGlVemJI3+ZB5iDEuk4bWA3GkTpW+DOoZMYBVVg= +go.opentelemetry.io/otel/sdk/metric v1.38.0 h1:aSH66iL0aZqo//xXzQLYozmWrXxyFkBJ6qT5wthqPoM= +go.opentelemetry.io/otel/sdk/metric v1.38.0/go.mod h1:dg9PBnW9XdQ1Hd6ZnRz689CbtrUp0wMMs9iPcgT9EZA= +go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE= +go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs= +go.opentelemetry.io/proto/otlp v1.7.1 h1:gTOMpGDb0WTBOP8JaO72iL3auEZhVmAQg4ipjOVAtj4= +go.opentelemetry.io/proto/otlp v1.7.1/go.mod h1:b2rVh6rfI/s2pHWNlB7ILJcRALpcNDzKhACevjI+ZnE= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE= +golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= +golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng= +golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU= +golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE= +golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 h1:BIRfGDEjiHRrk0QKZe3Xv2ieMhtgRGeLcZQ0mIVn4EY= +google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5/go.mod h1:j3QtIyytwqGr1JUDtYXwtMXWPKsEa5LtzIFN1Wn5WvE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5 h1:eaY8u2EuxbRv7c3NiGK0/NedzVsCcV6hDuU5qPX5EGE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5/go.mod h1:M4/wBTSeyLxupu3W3tJtOgB14jILAS/XWPSSa3TAlJc= +google.golang.org/grpc v1.75.0 h1:+TW+dqTd2Biwe6KKfhE5JpiYIBWq865PhKGSXiivqt4= +google.golang.org/grpc v1.75.0/go.mod h1:JtPAzKiq4v1xcAB2hydNlWI2RnF85XXcV0mhKXr2ecQ= +google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc= +google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gotest.tools/v3 v3.5.2 h1:7koQfIKdy+I8UTetycgUqXWSDwpgv193Ka+qRsmBY8Q= +gotest.tools/v3 v3.5.2/go.mod h1:LtdLGcnqToBH83WByAAi/wiwSFCArdFIUV/xxN4pcjA= diff --git a/src/images/serve_image.go b/src/images/serve_image.go new file mode 100644 index 0000000..b264582 --- /dev/null +++ b/src/images/serve_image.go @@ -0,0 +1,187 @@ +package main + +import ( + "context" + "fmt" + "io" + "log/slog" + "os" + "sync" + + "github.com/docker/docker/api/types/container" + "github.com/docker/docker/api/types/mount" + "github.com/docker/docker/api/types/volume" + "github.com/docker/docker/client" +) + +// ContainerMgr manages Docker containers and volumes, enforces resource limits, and tracks active resources. +type ContainerMgr struct { + ctx context.Context + cli *client.Client + containerLimit int + volumeLimit int + containers map[string]struct{} + volumes map[string]struct{} + mu sync.Mutex +} + +// NewContainerMgr creates a new ContainerMgr with the specified Docker client and resource limits. +func NewContainerMgr(client *client.Client, containerLimit, volumeLimit int) *ContainerMgr { + return &ContainerMgr{ + ctx: context.Background(), + cli: client, + containerLimit: containerLimit, + volumeLimit: volumeLimit, + containers: make(map[string]struct{}), + volumes: make(map[string]struct{}), + } +} + +// stopContainer stops a running container by its ID. +// Returns an error if the operation fails. +func (mgr *ContainerMgr) stopContainer(containerID string) error { + ctx := mgr.ctx + cli := mgr.cli + + err := cli.ContainerStop(ctx, containerID, container.StopOptions{}) + if err != nil { + slog.Error("Failed to stop container", "containerID", containerID, "error", err) + return err + } + return nil +} + +// removeContainer removes a container by its ID and deletes it from the internal tracking map. +// Returns an error if the operation fails. +func (mgr *ContainerMgr) removeContainer(containerID string) error { + mgr.mu.Lock() + defer mgr.mu.Unlock() + ctx := mgr.ctx + cli := mgr.cli + err := cli.ContainerRemove(ctx, containerID, container.RemoveOptions{RemoveVolumes: true}) + if err != nil { + slog.Error("Failed to remove container", "containerID", containerID, "error", err) + return err + } + delete(mgr.containers, containerID) + return nil +} + +// createVolume creates a Docker volume with the given name, enforcing the volume limit. +// Returns the created volume or an error. +func (mgr *ContainerMgr) createVolume(volumeName string) (volume.Volume, error) { + mgr.mu.Lock() + defer mgr.mu.Unlock() + if len(mgr.volumes) >= mgr.volumeLimit { + slog.Warn("Volume limit reached", "limit", mgr.volumeLimit) + return volume.Volume{}, fmt.Errorf("volume limit reached") + } + ctx := mgr.ctx + cli := mgr.cli + + vol, err := cli.VolumeCreate(ctx, volume.CreateOptions{Name: volumeName}) + if err != nil { + slog.Error("Failed to create volume", "volumeName", volumeName, "error", err) + return volume.Volume{}, err + } + mgr.volumes[vol.Name] = struct{}{} + return vol, nil +} + +// removeVolume removes a Docker volume by name. +// Returns an error if the volume does not exist or is in use (unless force is true). +func (mgr *ContainerMgr) removeVolume(volumeName string, force bool) error { + mgr.mu.Lock() + defer mgr.mu.Unlock() + ctx := mgr.ctx + cli := mgr.cli + + vols, _ := cli.VolumeList(ctx, volume.ListOptions{}) + found := false + for _, v := range vols.Volumes { + if v.Name == volumeName { + found = true + break + } + } + if !found { + slog.Warn("Volume does not exist", "volumeName", volumeName) + return fmt.Errorf("volume %s does not exist", volumeName) + } + + err := cli.VolumeRemove(ctx, volumeName, force) + if err != nil { + slog.Error("Failed to remove volume", "volumeName", volumeName, "error", err) + return err + } + delete(mgr.volumes, volumeName) + return nil +} + +// runContainer creates and starts a container with the specified image, runtime, and volume attached at /data. +// Enforces the container limit and checks that the volume exists. +// Returns the container ID or an error. +func (mgr *ContainerMgr) runContainer(imageName string, runtimeName string, volumeName string) (string, error) { + mgr.mu.Lock() + defer mgr.mu.Unlock() + if len(mgr.containers) >= mgr.containerLimit { + slog.Warn("Container limit reached", "limit", mgr.containerLimit) + return "", fmt.Errorf("container limit reached") + } + ctx := mgr.ctx + cli := mgr.cli + + vols, _ := cli.VolumeList(ctx, volume.ListOptions{}) + found := false + for _, v := range vols.Volumes { + if v.Name == volumeName { + found = true + break + } + } + if !found { + slog.Error("Volume does not exist for container", "volumeName", volumeName) + return "", fmt.Errorf("volume %s does not exist", volumeName) + } + + resp, err := cli.ContainerCreate( + ctx, + &container.Config{ + Image: imageName, + Cmd: []string{"sleep", "1000"}, + }, + &container.HostConfig{ + Runtime: runtimeName, + Mounts: []mount.Mount{ + { + Type: mount.TypeVolume, + Source: volumeName, + Target: "/data", + }, + }, + }, + nil, + nil, + "", + ) + + if err != nil { + slog.Error("Failed to create container", "imageName", imageName, "runtimeName", runtimeName, "volumeName", volumeName, "error", err) + return "", err + } + + mgr.containers[resp.ID] = struct{}{} + if err := cli.ContainerStart(ctx, resp.ID, container.StartOptions{}); err != nil { + slog.Error("Failed to start container", "containerID", resp.ID, "error", err) + return "", err + } + + out, err := cli.ContainerLogs(ctx, resp.ID, container.LogsOptions{ShowStdout: true}) + if err != nil { + slog.Error("Failed to get container logs", "containerID", resp.ID, "error", err) + return resp.ID, err + } + + io.Copy(os.Stdout, out) + return resp.ID, nil +} diff --git a/src/images/serve_image_test.go b/src/images/serve_image_test.go new file mode 100644 index 0000000..9baa0f0 --- /dev/null +++ b/src/images/serve_image_test.go @@ -0,0 +1,274 @@ +package main + +import ( + "fmt" + "testing" + + "github.com/docker/docker/api/types/volume" + "github.com/docker/docker/client" +) + +func setupMgr(t *testing.T) *ContainerMgr { + cli, err := client.NewClientWithOpts(client.FromEnv) + if err != nil { + t.Fatalf("Failed to create Docker client: %v", err) + } + return NewContainerMgr(cli, 10, 100) +} + +// Create a volume, check exists, delete, check not exists +func TestCreateDeleteVolume(t *testing.T) { + mgr := setupMgr(t) + volName := "test_volume_t1" + _, err := mgr.createVolume(volName) + if err != nil { + t.Errorf("Failed to create volume %s: %v", volName, err) + } + vols, _ := mgr.cli.VolumeList(mgr.ctx, volume.ListOptions{}) + found := false + for _, v := range vols.Volumes { + if v.Name == volName { + found = true + break + } + } + if !found { + t.Errorf("Volume %s not found after creation", volName) + } + err = mgr.removeVolume(volName, true) + if err != nil { + t.Errorf("Failed to remove volume %s: %v", volName, err) + } + vols, _ = mgr.cli.VolumeList(mgr.ctx, volume.ListOptions{}) + for _, v := range vols.Volumes { + if v.Name == volName { + t.Errorf("Volume %s still exists after deletion", volName) + } + } +} + +// Create a volume with same name twice (should not fail) +func TestCreateVolumeTwice(t *testing.T) { + mgr := setupMgr(t) + volName := "test_volume_t3" + _, err := mgr.createVolume(volName) + if err != nil { + t.Errorf("Failed to create volume %s: %v", volName, err) + } + defer mgr.removeVolume(volName, true) + _, err = mgr.createVolume(volName) + if err != nil { + t.Errorf("Failed to create volume %s a second time: %v", volName, err) + } +} + +// Remove volume that doesn't exist (should fail or panic) +func TestRemoveNonexistentVolume(t *testing.T) { + mgr := setupMgr(t) + err := mgr.removeVolume("nonexistent_volume_t4", true) + if err == nil { + t.Errorf("Expected error when removing nonexistent volume, but no error") + } else { + t.Logf("Correctly got error when removing nonexistent volume: %v", err) + } +} + +// Remove volume in use (should fail or panic) +func TestRemoveVolumeInUse(t *testing.T) { + mgr := setupMgr(t) + imageName := "pytorch-cuda" + runtimeName := "nvidia" + volName := "test_volume_t5" + _, err := mgr.createVolume(volName) + if err != nil { + t.Fatalf("Failed to create volume %s: %v", volName, err) + } + containerID, err := mgr.runContainer(imageName, runtimeName, volName) + if err != nil { + t.Fatalf("Failed to start container: %v", err) + } + defer func() { + // Cleanup: stop and remove container, then remove volume + if err := mgr.stopContainer(containerID); err != nil { + t.Logf("Cleanup: failed to stop container %s: %v", containerID, err) + } else { + t.Logf("Cleanup: stopped container %s successfully", containerID) + } + if err := mgr.removeContainer(containerID); err != nil { + t.Logf("Cleanup: failed to remove container %s: %v", containerID, err) + } else { + t.Logf("Cleanup: removed container %s successfully", containerID) + } + if err := mgr.removeVolume(volName, true); err != nil { + t.Logf("Cleanup: failed to remove volume %s: %v", volName, err) + } else { + t.Logf("Cleanup: removed volume %s successfully", volName) + } + }() + err = mgr.removeVolume(volName, true) // Should error: volume is in use by a running container + if err == nil { + t.Errorf("Expected error when removing volume in use, but no error") + } else { + t.Logf("Correctly got error when removing volume in use: %v", err) + } +} + +// Attach a volume that does not exist (should fail or panic) +func TestAttachNonexistentVolume(t *testing.T) { + mgr := setupMgr(t) + imageName := "pytorch-cuda" + runtimeName := "nvidia" + volName := "nonexistent_volume_t6" + id, err := mgr.runContainer(imageName, runtimeName, volName) + // If Docker auto-creates the volume, this may not error; check your policy + if id != "" && err != nil { + t.Errorf("Expected error when attaching nonexistent volume, but got id=%v, err=%v", id, err) + } else if err != nil { + t.Logf("Correctly got error when attaching nonexistent volume: %v", err) + } +} + +// Two containers attach to the same volume (should succeed in Docker, but test for your policy) +func TestTwoContainersSameVolume(t *testing.T) { + mgr := setupMgr(t) + imageName := "pytorch-cuda" + runtimeName := "nvidia" + volName := "test_volume_t7" + _, err := mgr.createVolume(volName) + if err != nil { + t.Fatalf("Failed to create volume %s: %v", volName, err) + } + id1, err := mgr.runContainer(imageName, runtimeName, volName) + if err != nil { + t.Fatalf("Failed to start first container: %v", err) + } + id2, err := mgr.runContainer(imageName, runtimeName, volName) + if err != nil { + t.Fatalf("Failed to start second container: %v", err) + } + // Both containers should be able to use the same volume + if err := mgr.stopContainer(id1); err != nil { + t.Logf("Failed to stop first container: %v", err) + } + if err := mgr.removeContainer(id1); err != nil { + t.Logf("Failed to remove first container: %v", err) + } + if err := mgr.stopContainer(id2); err != nil { + t.Logf("Failed to stop second container: %v", err) + } + if err := mgr.removeContainer(id2); err != nil { + t.Logf("Failed to remove second container: %v", err) + } + if err := mgr.removeVolume(volName, true); err != nil { + t.Logf("Failed to remove volume %s: %v", volName, err) + } +} + +// Two containers try to attach to the same volume at the same time (should succeed in Docker) +func TestTwoContainersSameVolumeConcurrent(t *testing.T) { + mgr := setupMgr(t) + imageName := "pytorch-cuda" + runtimeName := "nvidia" + volName := "test_volume_t8" + _, err := mgr.createVolume(volName) + if err != nil { + t.Fatalf("Failed to create volume %s: %v", volName, err) + } + id1, err := mgr.runContainer(imageName, runtimeName, volName) + if err != nil { + t.Fatalf("Failed to start first container: %v", err) + } + id2, err2 := mgr.runContainer(imageName, runtimeName, volName) + if err2 != nil { + t.Fatalf("Failed to start second container: %v", err2) + } + // This test does not actually run containers concurrently, but checks Docker's shared volume support + if err := mgr.stopContainer(id1); err != nil { + t.Logf("Failed to stop first container: %v", err) + } + if err := mgr.removeContainer(id1); err != nil { + t.Logf("Failed to remove first container: %v", err) + } + if err := mgr.stopContainer(id2); err != nil { + t.Logf("Failed to stop second container: %v", err) + } + if err := mgr.removeContainer(id2); err != nil { + t.Logf("Failed to remove second container: %v", err) + } + if err := mgr.removeVolume(volName, true); err != nil { + t.Logf("Failed to remove volume %s: %v", volName, err) + } +} + +// Set a limit of 100 volumes (should fail on 101st if you enforce a limit) +func TestVolumeLimit(t *testing.T) { + mgr := setupMgr(t) + limit := 100 + created := []string{} + for i := 0; i < limit; i++ { + name := "test_volume_t9_" + fmt.Sprint(i) + _, err := mgr.createVolume(name) + if err != nil { + t.Fatalf("Failed to create volume %s: %v", name, err) + } + created = append(created, name) + } + name := "test_volume_fail" + _, err := mgr.createVolume(name) + if err == nil { + t.Errorf("Volume limit not enforced") + } else { + t.Logf("Correctly got error when exceeding volume limit: %v", err) + } + + defer func() { + // Cleanup: remove all created volumes + for _, name := range created { + if err := mgr.removeVolume(name, true); err != nil { + t.Logf("Cleanup: failed to remove volume %s: %v", name, err) + } + } + }() + // If your implementation doesn't enforce a limit, this test will fail +} + +// Set a limit of 10 containers (should fail on 11th if you enforce a limit) +func TestContainerLimit(t *testing.T) { + mgr := setupMgr(t) + imageName := "pytorch-cuda" + runtimeName := "nvidia" + volName := "test_volume_t10" + _, err := mgr.createVolume(volName) + if err != nil { + t.Fatalf("Failed to create volume %s: %v", volName, err) + } + ids := []string{} + limit := 10 + for i := 0; i < limit; i++ { + id, err := mgr.runContainer(imageName, runtimeName, volName) + if err != nil { + t.Fatalf("Failed to start container %d: %v", i, err) + } + ids = append(ids, id) + } + _, err = mgr.runContainer(imageName, runtimeName, volName) + if err == nil { + t.Errorf("Container limit not enforced") + } else { + t.Logf("Correctly got error when exceeding container limit: %v", err) + } + defer func() { + // Cleanup: stop and remove all containers, then remove the volume + for _, id := range ids { + if err := mgr.stopContainer(id); err != nil { + t.Logf("Cleanup: failed to stop container %s: %v", id, err) + } + if err := mgr.removeContainer(id); err != nil { + t.Logf("Cleanup: failed to remove container %s: %v", id, err) + } + } + if err := mgr.removeVolume(volName, true); err != nil { + t.Logf("Cleanup: failed to remove volume %s: %v", volName, err) + } + }() +}