Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
ecee3fc
feat(sitemap): add chunking strategy for sitemaps
Slackluky Oct 1, 2025
8bd654e
feat(sitemap): add chunks option to sitemap config
Slackluky Oct 1, 2025
bba98f6
feat(sitemap): add sitemap chunk writing functionality
Slackluky Oct 1, 2025
63dfacd
fix(sitemap): fix empty callback in writeSitemap
Slackluky Oct 1, 2025
bcb9f59
feat(sitemap): add test fixture for sitemap chunking
Slackluky Oct 1, 2025
4d40768
test(sitemap): add test for sitemap chunking with files
Slackluky Oct 1, 2025
e2d5198
feat(sitemap): add changeset for sitemap chunking
Slackluky Oct 1, 2025
9687b55
Merge pull request #1 from Slackluky/feature/sitemap-chunking-strategy
Slackluky Oct 1, 2025
74fc698
Merge branch 'withastro:main' into main
Slackluky Oct 1, 2025
a4edd79
build: update dependencies and add astro
Slackluky Oct 1, 2025
4ce7284
chore: remove unused astro dependency
Slackluky Oct 1, 2025
216983b
chore: remove unused entries from lockfile
Slackluky Oct 1, 2025
f5486f0
Merge remote-tracking branch 'origin/main' into feature/sitemap-chunk…
Slackluky Oct 1, 2025
0d7eb59
Merge pull request #2 from Slackluky/feature/sitemap-chunking-strategy
Slackluky Oct 1, 2025
30cde1b
refactor(sitemap): improve import ordering and formatting
Slackluky Oct 1, 2025
18d8c70
refactor(sitemap): improve import ordering
Slackluky Oct 1, 2025
7d6d225
refactor(sitemap): improve import ordering
Slackluky Oct 1, 2025
8b55a0b
refactor(sitemap): improve import ordering
Slackluky Oct 1, 2025
58e90f2
refactor(sitemap): improve import ordering
Slackluky Oct 2, 2025
e7816a2
refactor(sitemap): improve chunk file test readability
Slackluky Oct 2, 2025
a5c0126
test(sitemap): fix flaky chunk file tests
Slackluky Oct 2, 2025
b73e105
refactor(sitemap): improve import ordering
Slackluky Oct 2, 2025
b688960
Merge branch 'main' of https://github.com/withastro/astro
Slackluky Oct 2, 2025
961c43f
Merge branch 'main' into main
Slackluky Oct 5, 2025
c6ed9f7
Update .changeset/floppy-times-grab.md
Slackluky Oct 13, 2025
30f5ebe
chore(sitemap): update changeset to minor
Slackluky Oct 13, 2025
4d4a5e3
feat(sitemap): add chunking support for sitemap generation
Slackluky Oct 13, 2025
6cdd1ed
Merge remote-tracking branch 'upstream/main'
Slackluky Oct 13, 2025
c510a4c
Merge branch 'main' into main
Slackluky Oct 13, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions .changeset/floppy-times-grab.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
---
'@astrojs/sitemap': minor
---

Adds the ability to split sitemap generation into chunks based on customizable logic. This allows for better management of large sitemaps and improved performance. The new `chunks` option in the sitemap configuration allows users to define functions that categorize sitemap items into different chunks. Each chunk is then written to a separate sitemap file.

```
integrations: [
sitemap({
serialize(item) { th
return item
},
chunks: { // this property will be treated last on the configuration
'blog': (item) => { // will produce a sitemap file with `blog` name (sitemap-blog-0.xml)
if (/blog/.test(item.url)) { // filter path that will be included in this specific sitemap file
item.changefreq = 'weekly';
item.lastmod = new Date();
item.priority = 0.9; // define specific properties for this filtered path
return item;
}
},
'glossary': (item) => {
if (/glossary/.test(item.url)) {
item.changefreq = 'weekly';
item.lastmod = new Date();
item.priority = 0.7;
return item;
}
}

// the rest of the path will be stored in `sitemap-pages.0.xml`
},
}),
],

```
6 changes: 3 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,10 @@
]
},
"onlyBuiltDependencies": [
"esbuild",
"workerd",
"@biomejs/biome",
"sharp"
"esbuild",
"sharp",
"workerd"
]
}
}
108 changes: 76 additions & 32 deletions packages/integrations/sitemap/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@ import { ZodError } from 'zod';
import { generateSitemap } from './generate-sitemap.js';
import { validateOptions } from './validate-options.js';
import { writeSitemap } from './write-sitemap.js';
import { writeSitemapChunk } from './write-sitemap-chunk.js';

export { EnumChangefreq as ChangeFreqEnum } from 'sitemap';

export type ChangeFreq = `${EnumChangefreq}`;
export type SitemapItem = Pick<
SitemapItemLoose,
Expand All @@ -18,36 +20,35 @@ export type LinkItem = LinkItemBase;

export type SitemapOptions =
| {
filenameBase?: string;
filter?(page: string): boolean;
customSitemaps?: string[];
customPages?: string[];

i18n?: {
defaultLocale: string;
locales: Record<string, string>;
};
// number of entries per sitemap file
entryLimit?: number;

// sitemap specific
changefreq?: ChangeFreq;
lastmod?: Date;
priority?: number;

// called for each sitemap item just before to save them on disk, sync or async
serialize?(item: SitemapItem): SitemapItem | Promise<SitemapItem | undefined> | undefined;

xslURL?: string;

// namespace configuration
namespaces?: {
news?: boolean;
xhtml?: boolean;
image?: boolean;
video?: boolean;
};
}
filenameBase?: string;
filter?(page: string): boolean;
customSitemaps?: string[];
customPages?: string[];

i18n?: {
defaultLocale: string;
locales: Record<string, string>;
};
// number of entries per sitemap file
entryLimit?: number;
// sitemap specific
changefreq?: ChangeFreq;
lastmod?: Date;
priority?: number;

// called for each sitemap item just before to save them on disk, sync or async
serialize?(item: SitemapItem): SitemapItem | Promise<SitemapItem | undefined> | undefined;

xslURL?: string;
chunks?: Record<string, (item: SitemapItem) => SitemapItem | Promise<SitemapItem | undefined> | undefined>
// namespace configuration
namespaces?: {
news?: boolean;
xhtml?: boolean;
image?: boolean;
video?: boolean;
};
}
| undefined;

function formatConfigErrorMessage(err: ZodError) {
Expand Down Expand Up @@ -97,8 +98,7 @@ const createPlugin = (options?: SitemapOptions): AstroIntegration => {

const opts = validateOptions(config.site, options);

const { filenameBase, filter, customPages, customSitemaps, serialize, entryLimit } = opts;

const { filenameBase, filter, customPages, customSitemaps, serialize, entryLimit, chunks } = opts;
const outFile = `${filenameBase}-index.xml`;
const finalSiteUrl = new URL(config.base, config.site);
const shouldIgnoreStatus = isStatusCodePage(Object.keys(opts.i18n?.locales ?? {}));
Expand Down Expand Up @@ -174,9 +174,53 @@ const createPlugin = (options?: SitemapOptions): AstroIntegration => {
return;
}
}

const destDir = fileURLToPath(dir);
const lastmod = opts.lastmod?.toISOString();
const xslURL = opts.xslURL ? new URL(opts.xslURL, finalSiteUrl).href : undefined;

if (chunks) {
try {
let groupedUrlCollection: SitemapItem['url'][] = []
const chunksItem: Record<string, SitemapItem[]> = {};
for (const [key, cb] of Object.entries(chunks)) {
// Create a new, separate collection for each key
const collection: SitemapItem[] = [];

for (const item of urlData) {
// Await the asynchronous operation
const collect = await Promise.resolve(cb(item));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does this need the Promise.resolve()?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Promise.resolve() call is indeed necessary in this case. The callback function cb provided in the chunks option can return either a direct SitemapItem object or a Promise that resolves to a SitemapItem object. Promise.resolve() ensures that the result is always a Promise, allowing for consistent handling of both synchronous and asynchronous results.

if (collect) {
collection.push(collect);
}
}

// Assign the specific collection to its key
chunksItem[key] = collection;
groupedUrlCollection = [...groupedUrlCollection, ...collection.map((coll) => coll.url)]
}
chunksItem['pages'] = urlData.filter((urlDataItem) => !(groupedUrlCollection.includes(urlDataItem.url)))
// Process each chunk here
await writeSitemapChunk({
filenameBase,
hostname: finalSiteUrl.href,
sitemapHostname: finalSiteUrl.href,
sourceData: chunksItem,
destinationDir: destDir,
publicBasePath: config.base,
customSitemaps,
limit: entryLimit,
xslURL,
lastmod,
namespaces: opts.namespaces,
}, config);
logger.info(`\`${outFile}\` created at \`${path.relative(process.cwd(), destDir)}\``);
return
} catch (err) {
logger.error(`Error chunking sitemaps\n${(err as any).toString()}`);
return;
}
}
await writeSitemap(
{
filenameBase: filenameBase,
Expand Down
3 changes: 2 additions & 1 deletion packages/integrations/sitemap/src/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ export const SitemapOptionsSchema = z
})
.optional()
.default(SITEMAP_CONFIG_DEFAULTS.namespaces),
})
chunks: z.record(z.function().args(z.any()).returns(z.any())).optional(),
})
.strict()
.default(SITEMAP_CONFIG_DEFAULTS);
128 changes: 128 additions & 0 deletions packages/integrations/sitemap/src/write-sitemap-chunk.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import { createWriteStream, type WriteStream } from 'node:fs';
import { mkdir } from 'node:fs/promises';
import { normalize, resolve } from 'node:path';
import { pipeline, Readable } from 'node:stream';
import { promisify } from 'node:util';
import type { AstroConfig } from 'astro';
import { SitemapAndIndexStream, SitemapIndexStream, SitemapStream } from 'sitemap';
import replace from 'stream-replace-string';
import type { SitemapItem } from './index.js';


type WriteSitemapChunkConfig = {
filenameBase: string;
hostname: string;
sitemapHostname?: string;
sourceData: Record<string, SitemapItem[]>;
destinationDir: string;
customSitemaps?: string[];
publicBasePath?: string;
limit?: number;
xslURL?: string;
lastmod?: string;
namespaces?: {
news?: boolean;
xhtml?: boolean;
image?: boolean;
video?: boolean;
};
};

// adapted from sitemap.js/sitemap-simple
export async function writeSitemapChunk(
{
filenameBase,
hostname,
sitemapHostname = hostname,
sourceData,
destinationDir,
limit = 50000,
customSitemaps = [],
publicBasePath = './',
xslURL: xslUrl,
lastmod,
namespaces = { news: true, xhtml: true, image: true, video: true },
}: WriteSitemapChunkConfig,
astroConfig: AstroConfig,
) {
await mkdir(destinationDir, { recursive: true });

// Normalize publicBasePath
let normalizedPublicBasePath = publicBasePath;
if (!normalizedPublicBasePath.endsWith('/')) {
normalizedPublicBasePath += '/';
}

// Array to collect all sitemap URLs for the index
const sitemapUrls: Array<{ url: string; lastmod?: string }> = [];

// Process each chunk separately
for (const [chunkName, items] of Object.entries(sourceData)) {
const sitemapAndIndexStream = new SitemapAndIndexStream({
limit,
xslUrl,
getSitemapStream: (i) => {
const sitemapStream = new SitemapStream({
hostname,
xslUrl,
// Custom namespace handling
xmlns: {
news: namespaces?.news !== false,
xhtml: namespaces?.xhtml !== false,
image: namespaces?.image !== false,
video: namespaces?.video !== false,
},
});

const path = `./${filenameBase}-${chunkName}-${i}.xml`;
const writePath = resolve(destinationDir, path);
const publicPath = normalize(normalizedPublicBasePath + path);

let stream: WriteStream;
if (astroConfig.trailingSlash === 'never' || astroConfig.build.format === 'file') {
// workaround for trailing slash issue in sitemap.js
const host = hostname.endsWith('/') ? hostname.slice(0, -1) : hostname;
const searchStr = `<loc>${host}/</loc>`;
const replaceStr = `<loc>${host}</loc>`;
stream = sitemapStream
.pipe(replace(searchStr, replaceStr))
.pipe(createWriteStream(writePath));
} else {
stream = sitemapStream.pipe(createWriteStream(writePath));
}

const url = new URL(publicPath, sitemapHostname).toString();

// Collect this sitemap URL for the index
sitemapUrls.push({ url, lastmod });

return [{ url, lastmod }, sitemapStream, stream];
},
});

// Create a readable stream from this chunk's items
const dataStream = Readable.from(items);

// Write this chunk's sitemap(s)
await promisify(pipeline)(dataStream, sitemapAndIndexStream);
}

// Now create the sitemap index with all the generated sitemaps
const indexStream = new SitemapIndexStream({ xslUrl });
const indexPath = resolve(destinationDir, `./${filenameBase}-index.xml`);
const indexWriteStream = createWriteStream(indexPath);

// Add custom sitemaps to the index
for (const url of customSitemaps) {
indexStream.write({ url, lastmod });
}

// Add all generated sitemaps to the index
for (const sitemapUrl of sitemapUrls) {
indexStream.write(sitemapUrl);
}

indexStream.end();

return await promisify(pipeline)(indexStream, indexWriteStream);
}
2 changes: 1 addition & 1 deletion packages/integrations/sitemap/src/write-sitemap.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ export async function writeSitemap(
sitemapAndIndexStream,
{ url, lastmod },
'utf8',
() => {},
() => { },
);
}
return promisify(pipeline)(src, sitemapAndIndexStream, createWriteStream(indexPath));
Expand Down
Loading
Loading