Skip to content
This repository was archived by the owner on Sep 30, 2024. It is now read-only.

Commit cd1721b

Browse files
authored
Batch indexing: syntactic codeintel worker scaffolding (#59747)
Currently the worker itself does nothing, only exposes a health endpoint and loads basic environment configuration. Bazel build for the Docker container Wire in scip-treesitter-cli to make it available in the container Dev setup for scip-treesitter-cli (copied from scip-ctags setup for local development) Run configuration for the worker sg run codeintel-syntactic-worker to test Start configuration sg start codeintel-syntactic - contains only the minimal dependencies required to run the worker, we will expand the configuration gradually as we add more features
1 parent 014cad1 commit cd1721b

File tree

18 files changed

+398
-10
lines changed

18 files changed

+398
-10
lines changed
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
2+
load("@rules_oci//oci:defs.bzl", "oci_image", "oci_push", "oci_tarball")
3+
load("@rules_pkg//:pkg.bzl", "pkg_tar")
4+
load("@container_structure_test//:defs.bzl", "container_structure_test")
5+
load("//dev:oci_defs.bzl", "image_repository")
6+
7+
go_library(
8+
name = "syntactic-code-intel-worker_lib",
9+
srcs = ["main.go"],
10+
importpath = "github.com/sourcegraph/sourcegraph/cmd/syntactic-code-intel-worker",
11+
visibility = ["//visibility:private"],
12+
deps = [
13+
"//cmd/syntactic-code-intel-worker/shared",
14+
"//internal/sanitycheck",
15+
"//internal/service/svcmain",
16+
],
17+
)
18+
19+
go_binary(
20+
name = "syntactic-code-intel-worker",
21+
embed = [":syntactic-code-intel-worker_lib"],
22+
visibility = ["//visibility:public"],
23+
x_defs = {
24+
"github.com/sourcegraph/sourcegraph/internal/version.version": "{STABLE_VERSION}",
25+
"github.com/sourcegraph/sourcegraph/internal/version.timestamp": "{VERSION_TIMESTAMP}",
26+
},
27+
)
28+
29+
pkg_tar(
30+
name = "tar_syntactic-code-intel-worker",
31+
srcs = [":syntactic-code-intel-worker"],
32+
)
33+
34+
pkg_tar(
35+
name = "tar_scip-treesitter",
36+
srcs = ["//docker-images/syntax-highlighter/crates/scip-treesitter-cli:scip-treesitter"],
37+
package_dir = "/usr/local/bin",
38+
)
39+
40+
oci_image(
41+
name = "image",
42+
base = "@wolfi_base",
43+
entrypoint = [
44+
"/sbin/tini",
45+
"--",
46+
"/syntactic-code-intel-worker",
47+
],
48+
tars = [
49+
":tar_syntactic-code-intel-worker",
50+
"tar_scip-treesitter",
51+
],
52+
user = "sourcegraph",
53+
)
54+
55+
oci_tarball(
56+
name = "image_tarball",
57+
image = ":image",
58+
repo_tags = ["syntactic-code-intel-worker:candidate"],
59+
)
60+
61+
container_structure_test(
62+
name = "image_test",
63+
timeout = "short",
64+
configs = ["image_test.yaml"],
65+
driver = "docker",
66+
image = ":image",
67+
tags = [
68+
"exclusive",
69+
"requires-network",
70+
],
71+
)
72+
73+
oci_push(
74+
name = "candidate_push",
75+
image = ":image",
76+
repository = image_repository("syntactic-code-intel-worker"),
77+
)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# See https://github.com/sourcegraph/codenotify for documentation.
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Syntactic code intel worker
2+
3+
🚧 WORK IN PROGRESS 🚧
4+
5+
Stateless service that handles generating SCIP data for codebases
6+
using Tree-sitter for powering syntax-based code navigation.
7+
8+
[Design docs](https://docs.google.com/document/d/14MHauv52o4zTFiV6gC6NOJZxcJpglK-ElWa64gqeKDo/edit) (Sourcegraph internal)
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
schemaVersion: "2.0.0"
2+
3+
commandTests:
4+
- name: "worker binary is runnable"
5+
command: "/syntactic-code-intel-worker"
6+
envVars:
7+
- key: "SANITY_CHECK"
8+
value: "true"
9+
10+
- name: "scip treesitter binary is runnable"
11+
command: "/usr/local/bin/scip-treesitter"
12+
envVars:
13+
- key: "SANITY_CHECK"
14+
value: "true"
15+
16+
- name: "not running as root"
17+
command: "/usr/bin/id"
18+
args:
19+
- -u
20+
excludedOutput: ["^0"]
21+
exitCode: 0
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
package main
2+
3+
import (
4+
"github.com/sourcegraph/sourcegraph/cmd/syntactic-code-intel-worker/shared"
5+
"github.com/sourcegraph/sourcegraph/internal/sanitycheck"
6+
"github.com/sourcegraph/sourcegraph/internal/service/svcmain"
7+
)
8+
9+
func main() {
10+
sanitycheck.Pass()
11+
svcmain.SingleServiceMain(shared.Service)
12+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
load("@io_bazel_rules_go//go:def.bzl", "go_library")
2+
3+
go_library(
4+
name = "shared",
5+
srcs = [
6+
"config.go",
7+
"service.go",
8+
"shared.go",
9+
],
10+
importpath = "github.com/sourcegraph/sourcegraph/cmd/syntactic-code-intel-worker/shared",
11+
visibility = ["//visibility:public"],
12+
deps = [
13+
"//internal/codeintel/shared/lsifuploadstore",
14+
"//internal/debugserver",
15+
"//internal/encryption/keyring",
16+
"//internal/env",
17+
"//internal/goroutine",
18+
"//internal/httpserver",
19+
"//internal/observation",
20+
"//internal/service",
21+
"//lib/errors",
22+
"@com_github_sourcegraph_log//:log",
23+
],
24+
)
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
package shared
2+
3+
import (
4+
"net"
5+
"strconv"
6+
"time"
7+
8+
"github.com/sourcegraph/sourcegraph/internal/codeintel/shared/lsifuploadstore"
9+
"github.com/sourcegraph/sourcegraph/internal/env"
10+
"github.com/sourcegraph/sourcegraph/lib/errors"
11+
)
12+
13+
type Config struct {
14+
env.BaseConfig
15+
16+
WorkerPollInterval time.Duration
17+
WorkerConcurrency int
18+
WorkerBudget int64
19+
MaximumRuntimePerJob time.Duration
20+
SCIPUploadStoreConfig *lsifuploadstore.Config
21+
CliPath string
22+
ListenAddress string
23+
}
24+
25+
const DefaultPort = 3188
26+
27+
func (c *Config) Load() {
28+
c.SCIPUploadStoreConfig = &lsifuploadstore.Config{}
29+
c.SCIPUploadStoreConfig.Load()
30+
31+
c.WorkerPollInterval = c.GetInterval("SYNTACTIC_CODE_INTEL_WORKER_POLL_INTERVAL", "1s", "Interval between queries to the repository queue")
32+
c.WorkerConcurrency = c.GetInt("SYNTACTIC_CODE_INTEL_WORKER_CONCURRENCY", "1", "The maximum number of repositories that can be processed concurrently.")
33+
c.WorkerBudget = int64(c.GetInt("SYNTACTIC_CODE_INTEL_WORKER_BUDGET", "0", "The amount of compressed input data (in bytes) a worker can process concurrently. Zero acts as an infinite budget."))
34+
c.MaximumRuntimePerJob = c.GetInterval("SYNTACTIC_CODE_INTEL_WORKER_MAXIMUM_RUNTIME_PER_JOB", "25m", "The maximum time a single repository indexing job can take")
35+
36+
c.CliPath = c.Get("SCIP_TREESITTER_COMMAND", "scip-treesitter", "TODO: fill in description")
37+
38+
c.ListenAddress = c.GetOptional("SYNTACTIC_CODE_INTEL_WORKER_ADDR", "The address under which the syntactic codeintel worker API listens. Can include a port.")
39+
// Fall back to a reasonable default.
40+
if c.ListenAddress == "" {
41+
port := strconv.Itoa(DefaultPort)
42+
host := ""
43+
if env.InsecureDev {
44+
host = "127.0.0.1"
45+
}
46+
c.ListenAddress = net.JoinHostPort(host, port)
47+
}
48+
}
49+
50+
func (c *Config) Validate() error {
51+
var errs error
52+
errs = errors.Append(errs, c.BaseConfig.Validate())
53+
errs = errors.Append(errs, c.SCIPUploadStoreConfig.Validate())
54+
return errs
55+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
package shared
2+
3+
import (
4+
"context"
5+
6+
"github.com/sourcegraph/sourcegraph/internal/debugserver"
7+
"github.com/sourcegraph/sourcegraph/internal/env"
8+
"github.com/sourcegraph/sourcegraph/internal/observation"
9+
"github.com/sourcegraph/sourcegraph/internal/service"
10+
)
11+
12+
type svc struct{}
13+
14+
func (svc) Name() string { return "syntactic-code-intel-worker" }
15+
16+
func (svc) Configure() (env.Config, []debugserver.Endpoint) {
17+
var config Config
18+
config.Load()
19+
return &config, nil
20+
}
21+
22+
func (svc) Start(ctx context.Context, observationCtx *observation.Context, ready service.ReadyFunc, config env.Config) error {
23+
return Main(ctx, observationCtx, ready, *config.(*Config))
24+
}
25+
26+
var Service service.Service = svc{}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
package shared
2+
3+
import (
4+
"context"
5+
6+
"net/http"
7+
"time"
8+
9+
"github.com/sourcegraph/log"
10+
"github.com/sourcegraph/sourcegraph/internal/encryption/keyring"
11+
"github.com/sourcegraph/sourcegraph/internal/goroutine"
12+
"github.com/sourcegraph/sourcegraph/internal/httpserver"
13+
"github.com/sourcegraph/sourcegraph/internal/observation"
14+
"github.com/sourcegraph/sourcegraph/internal/service"
15+
"github.com/sourcegraph/sourcegraph/lib/errors"
16+
)
17+
18+
func Main(ctx context.Context, observationCtx *observation.Context, ready service.ReadyFunc, config Config) error {
19+
logger := observationCtx.Logger
20+
21+
if err := keyring.Init(ctx); err != nil {
22+
return errors.Wrap(err, "initializing keyring")
23+
}
24+
25+
logger.Info("Syntactic code intel worker running",
26+
log.String("path to scip-treesitter CLI", config.CliPath),
27+
log.String("API address", config.ListenAddress))
28+
29+
// Initialize health server
30+
server := httpserver.NewFromAddr(config.ListenAddress, &http.Server{
31+
ReadTimeout: 75 * time.Second,
32+
WriteTimeout: 10 * time.Minute,
33+
Handler: httpserver.NewHandler(nil),
34+
})
35+
36+
// Go!
37+
goroutine.MonitorBackgroundRoutines(ctx, server)
38+
39+
return nil
40+
}

dev/check/go-dbconn-import.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ allowed_prefix=(
1818
# Transitively depends on updatecheck package which imports but does not use DB
1919
github.com/sourcegraph/sourcegraph/cmd/pings
2020
github.com/sourcegraph/sourcegraph/cmd/precise-code-intel-worker
21+
github.com/sourcegraph/sourcegraph/cmd/syntactic-code-intel-worker
2122
github.com/sourcegraph/sourcegraph/cmd/repo-updater
2223
# Transitively depends on zoekt package which imports but does not use DB
2324
github.com/sourcegraph/sourcegraph/cmd/searcher

dev/linters/dbconn/dbconn.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ var allowedToImport = []string{
3131
// Transitively depends on updatecheck package which imports but does not use DB
3232
"github.com/sourcegraph/sourcegraph/cmd/pings",
3333
"github.com/sourcegraph/sourcegraph/cmd/precise-code-intel-worker",
34+
"github.com/sourcegraph/sourcegraph/cmd/syntactic-code-intel-worker",
3435
"github.com/sourcegraph/sourcegraph/cmd/repo-updater",
3536
// Transitively depends on zoekt package which imports but does not use DB
3637
"github.com/sourcegraph/sourcegraph/cmd/searcher",

dev/scip-treesitter-dev

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#!/usr/bin/env bash
2+
3+
# Wrapper for `scip-treesitter` similar to `dev/scip-ctags-dev`.
4+
#
5+
# To use an alternate scip-treesitter binary for development, invoke
6+
# `SCIP_TREESITTER_COMMAND=path/to/scip-treesitter sg start`.
7+
8+
root="$(dirname "${BASH_SOURCE[0]}")/.." >/dev/null
9+
TARGET=$("$root/dev/scip-treesitter-install.sh" which)
10+
11+
if [ ! -f "${TARGET}" ]; then
12+
echo "scip-treesitter is not installed, please run ./dev/scip-treesitter-install.sh"
13+
echo "Alternatively you can use SCIP_TREESITTER_COMMAND=path/to/scip-treesitter to use your own binary."
14+
exit 1
15+
else
16+
${TARGET} "$@"
17+
fi

dev/scip-treesitter-install.sh

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#!/usr/bin/env bash
2+
3+
set -euf -o pipefail
4+
pushd "$(dirname "${BASH_SOURCE[0]}")/.." >/dev/null
5+
mkdir -p .bin
6+
7+
# TODO: add similar task to zoekt alpine
8+
9+
NAME="scip-treesitter"
10+
TARGET="$PWD/.bin/${NAME}"
11+
12+
if [ $# -ne 0 ]; then
13+
if [ "$1" == "which" ]; then
14+
echo "$TARGET"
15+
exit 0
16+
fi
17+
fi
18+
19+
function ctrl_c() {
20+
printf "[-] Installation cancelled.\n"
21+
exit 1
22+
}
23+
24+
trap ctrl_c INT
25+
26+
function build_scip_treesitter {
27+
cd docker-images/syntax-highlighter/crates/scip-treesitter-cli
28+
cargo build --bin scip-treesitter --target-dir target
29+
cp ./target/release/scip-treesitter "$TARGET"
30+
}
31+
32+
build_scip_treesitter
33+
34+
popd >/dev/null

docker-images/syntax-highlighter/Cargo.Bazel.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docker-images/syntax-highlighter/crates/scip-treesitter-cli/BUILD.bazel

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ load("@crate_index//:defs.bzl", "aliases", "all_crate_deps")
22
load("@rules_rust//rust:defs.bzl", "rust_binary", "rust_library", "rust_test")
33

44
rust_binary(
5-
name = "scip-treesitter-cli",
6-
srcs = glob(["src/main.rs"]),
5+
name = "scip-treesitter",
6+
srcs = ["src/bin/scip-treesitter.rs"],
77
aliases = aliases(),
88
proc_macro_deps = all_crate_deps(
99
proc_macro = True,
@@ -71,20 +71,20 @@ rust_test(
7171
],
7272
allow_empty = False,
7373
),
74-
data = [":scip-treesitter-cli"] +
74+
data = [":scip-treesitter"] +
7575
glob(
7676
["tests/snapshots/**"],
7777
allow_empty = False,
7878
),
7979
env = {
8080
"INSTA_WORKSPACE_ROOT": ".",
8181
"RUST_BACKTRACE": "1",
82-
"SCIP_CLI_LOCATION": "$(rootpath :scip-treesitter-cli)",
82+
"SCIP_CLI_LOCATION": "$(rootpath :scip-treesitter)",
8383
},
8484
deps = all_crate_deps(
8585
normal = True,
8686
) + [
87-
":scip-treesitter-cli",
87+
":scip-treesitter",
8888
":scip-treesitter-cli-lib",
8989
] + WORKSPACE_DEPS,
9090
)

docker-images/syntax-highlighter/crates/scip-treesitter-cli/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@ name = "scip-treesitter-cli"
33
version = "0.1.0"
44
edition = "2021"
55

6-
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
6+
[[bin]]
7+
name = "scip-treesitter"
78

89
[dependencies]
910
lazy_static = "1.0"

0 commit comments

Comments
 (0)