Skip to content

Commit 0f3cdb7

Browse files
authored
Add repo index validation (#339)
* add repo index validation * add entry to changelog
1 parent 27fb5ad commit 0f3cdb7

File tree

4 files changed

+62
-4
lines changed

4 files changed

+62
-4
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1111
- Added seperate page for signup. [#311](https://github.com/sourcebot-dev/sourcebot/pull/331)
1212
- Fix repo images in authed instance case and add manifest json. [#332](https://github.com/sourcebot-dev/sourcebot/pull/332)
1313
- Added encryption logic for license keys. [#335](https://github.com/sourcebot-dev/sourcebot/pull/335)
14+
- Added repo shard validation on startup. [#339](https://github.com/sourcebot-dev/sourcebot/pull/339)
1415
- Added support for a file explorer when browsing files. [#336](https://github.com/sourcebot-dev/sourcebot/pull/336)
1516

1617
## [4.1.1] - 2025-06-03

packages/backend/src/main.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,5 +68,6 @@ export const main = async (db: PrismaClient, context: AppContext) => {
6868
connectionManager.registerPollingCallback();
6969

7070
const repoManager = new RepoManager(db, settings, redis, promClient, context);
71+
await repoManager.validateIndexedReposHaveShards();
7172
await repoManager.blockingPollLoop();
7273
}

packages/backend/src/repoManager.ts

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import { PromClient } from './promClient.js';
1212
import * as Sentry from "@sentry/node";
1313

1414
interface IRepoManager {
15+
validateIndexedReposHaveShards: () => Promise<void>;
1516
blockingPollLoop: () => void;
1617
dispose: () => void;
1718
}
@@ -526,6 +527,61 @@ export class RepoManager implements IRepoManager {
526527
}
527528
}
528529

530+
///////////////////////////
531+
// Repo index validation
532+
///////////////////////////
533+
534+
public async validateIndexedReposHaveShards() {
535+
logger.info('Validating indexed repos have shards...');
536+
537+
const indexedRepos = await this.db.repo.findMany({
538+
where: {
539+
repoIndexingStatus: RepoIndexingStatus.INDEXED
540+
}
541+
});
542+
logger.info(`Found ${indexedRepos.length} repos in the DB marked as INDEXED`);
543+
544+
if (indexedRepos.length === 0) {
545+
return;
546+
}
547+
548+
const reposToReindex: number[] = [];
549+
550+
for (const repo of indexedRepos) {
551+
const shardPrefix = getShardPrefix(repo.orgId, repo.id);
552+
553+
// TODO: this doesn't take into account if a repo has multiple shards and only some of them are missing. To support that, this logic
554+
// would need to know how many total shards are expected for this repo
555+
let hasShards = false;
556+
try {
557+
const files = readdirSync(this.ctx.indexPath);
558+
hasShards = files.some(file => file.startsWith(shardPrefix));
559+
} catch (error) {
560+
logger.error(`Failed to read index directory ${this.ctx.indexPath}: ${error}`);
561+
continue;
562+
}
563+
564+
if (!hasShards) {
565+
logger.info(`Repo ${repo.displayName} (id: ${repo.id}) is marked as INDEXED but has no shards on disk. Marking for reindexing.`);
566+
reposToReindex.push(repo.id);
567+
}
568+
}
569+
570+
if (reposToReindex.length > 0) {
571+
await this.db.repo.updateMany({
572+
where: {
573+
id: { in: reposToReindex }
574+
},
575+
data: {
576+
repoIndexingStatus: RepoIndexingStatus.NEW
577+
}
578+
});
579+
logger.info(`Marked ${reposToReindex.length} repos for reindexing due to missing shards`);
580+
}
581+
582+
logger.info('Done validating indexed repos have shards');
583+
}
584+
529585
private async fetchAndScheduleRepoTimeouts() {
530586
const repos = await this.db.repo.findMany({
531587
where: {

packages/web/src/initialize.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -214,10 +214,8 @@ const initSingleTenancy = async () => {
214214
// Load any connections defined declaratively in the config file.
215215
const configPath = env.CONFIG_PATH;
216216
if (configPath) {
217-
await syncDeclarativeConfig(configPath);
218-
219217
// If we're given a config file, mark the org as onboarded so we don't go through
220-
// the UI conneciton onboarding flow
218+
// the UI connection onboarding flow
221219
await prisma.org.update({
222220
where: {
223221
id: SINGLE_TENANT_ORG_ID,
@@ -226,7 +224,9 @@ const initSingleTenancy = async () => {
226224
isOnboarded: true,
227225
}
228226
});
229-
227+
228+
await syncDeclarativeConfig(configPath);
229+
230230
// watch for changes assuming it is a local file
231231
if (!isRemotePath(configPath)) {
232232
watch(configPath, () => {

0 commit comments

Comments
 (0)