Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 50 additions & 1 deletion scripts/update-sitemap-loc.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,53 @@ const sitemapXMLs = [
],
];

/**
* URL patterns to exclude from the sitemap.
*
* Why:
* - Versioned doc URLs (e.g. /docs/apisix/3.14/) duplicate the latest
* unversioned paths (e.g. /docs/apisix/) and bloat the sitemap.
* Only the unversioned (latest) URLs should be indexed.
* - /docs/.../next/ pages are for unreleased development docs.
* - /search pages are blocked by robots.txt — keeping them in
* the sitemap sends contradictory signals to crawlers.
* - /blog/tags/ and /blog/page/ are low-value aggregation/pagination
* pages, also blocked by robots.txt.
*/
const excludePatterns = [
// Versioned docs: /docs/<project>/<version>/ where version is digits.digits
/\/docs\/[\w-]+\/\d+\.\d+\//,
// Development "next" docs
/\/docs\/[\w-]+\/next\//,
// Search pages (blocked by robots.txt)
/\/search\/?$/,
// Blog tag and pagination pages (blocked by robots.txt)
/\/blog\/tags\//,
/\/blog\/page\//,
];
Comment thread
Yilialinn marked this conversation as resolved.

/**
* Returns true if the URL should be excluded from the sitemap.
*/
function shouldExclude(url) {
return excludePatterns.some((pattern) => pattern.test(url));
}

/**
* Filter out excluded URLs from a sitemap object and return removal count.
*/
function filterSitemapUrls(sitemap) {
const urls = Array.isArray(sitemap.urlset.url)
? sitemap.urlset.url
: [sitemap.urlset.url];
const before = urls.length;
sitemap.urlset.url = urls.filter((entry) => {
const loc = entry.loc && entry.loc._text;
return !loc || !shouldExclude(loc);
Copy link

Copilot AI Apr 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This currently keeps entries that don’t have a valid loc (return !loc || ...). In a sitemap, entries without loc are invalid and should be removed to avoid generating a malformed sitemap. Consider changing the predicate to require loc and then apply the exclude filter (i.e., drop entries without loc).

Suggested change
return !loc || !shouldExclude(loc);
return Boolean(loc) && !shouldExclude(loc);

Copilot uses AI. Check for mistakes.
});
return before - sitemap.urlset.url.length;
}

const tasks = new Listr([
{
title: `Check sitemap.xml files exist`,
Expand All @@ -27,7 +74,7 @@ const tasks = new Listr([
),
},
{
title: `Merge sitemap.xml files`,
title: `Merge and filter sitemap.xml files`,
task: () => new Listr(
sitemapXMLs.map((group) => ({
title: `Merge ${group[0]}`,
Expand All @@ -42,6 +89,8 @@ const tasks = new Listr([
...sitemaps[i].urlset.url,
];
}
const removed = filterSitemapUrls(res);
console.log(` Filtered out ${removed} URLs from ${group[0]}`);
Comment thread
Yilialinn marked this conversation as resolved.
return res;
})
.then((sitemap) => writeFile(group[0], js2xml(sitemap, { compact: true }, 'utf-8'))),
Expand Down
113 changes: 113 additions & 0 deletions website/static/robots.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,119 @@

User-agent: *

# Blog aggregation and pagination pages (low-value for indexing)
Disallow: /blog/tags/
Disallow: /zh/blog/tags/
Disallow: /blog/page/
Disallow: /zh/blog/page/

# Search pages
Disallow: /search
Disallow: /zh/search

# Versioned docs — only the unversioned (latest) paths should be indexed.
# e.g. /docs/apisix/ is the latest; /docs/apisix/3.14/ is a duplicate.
Disallow: /docs/apisix/3.10/
Disallow: /docs/apisix/3.11/
Disallow: /docs/apisix/3.12/
Disallow: /docs/apisix/3.13/
Disallow: /docs/apisix/3.14/
Disallow: /docs/apisix/3.15/
Disallow: /docs/apisix/next/
Disallow: /docs/ingress-controller/3.10/
Disallow: /docs/ingress-controller/3.11/
Disallow: /docs/ingress-controller/3.12/
Disallow: /docs/ingress-controller/3.13/
Disallow: /docs/ingress-controller/3.14/
Disallow: /docs/ingress-controller/3.15/
Disallow: /docs/ingress-controller/next/
Disallow: /docs/helm-chart/3.10/
Disallow: /docs/helm-chart/3.11/
Disallow: /docs/helm-chart/3.12/
Disallow: /docs/helm-chart/3.13/
Disallow: /docs/helm-chart/3.14/
Disallow: /docs/helm-chart/3.15/
Disallow: /docs/helm-chart/next/
Disallow: /docs/docker/3.10/
Disallow: /docs/docker/3.11/
Disallow: /docs/docker/3.12/
Disallow: /docs/docker/3.13/
Disallow: /docs/docker/3.14/
Disallow: /docs/docker/3.15/
Disallow: /docs/docker/next/
Disallow: /docs/java-plugin-runner/3.10/
Disallow: /docs/java-plugin-runner/3.11/
Disallow: /docs/java-plugin-runner/3.12/
Disallow: /docs/java-plugin-runner/3.13/
Disallow: /docs/java-plugin-runner/3.14/
Disallow: /docs/java-plugin-runner/3.15/
Comment thread
Yilialinn marked this conversation as resolved.
Disallow: /docs/java-plugin-runner/next/
Disallow: /docs/go-plugin-runner/3.10/
Disallow: /docs/go-plugin-runner/3.11/
Disallow: /docs/go-plugin-runner/3.12/
Disallow: /docs/go-plugin-runner/3.13/
Disallow: /docs/go-plugin-runner/3.14/
Disallow: /docs/go-plugin-runner/3.15/
Disallow: /docs/go-plugin-runner/next/
Disallow: /docs/python-plugin-runner/3.10/
Disallow: /docs/python-plugin-runner/3.11/
Disallow: /docs/python-plugin-runner/3.12/
Disallow: /docs/python-plugin-runner/3.13/
Disallow: /docs/python-plugin-runner/3.14/
Disallow: /docs/python-plugin-runner/3.15/
Disallow: /docs/python-plugin-runner/next/

# Chinese equivalents
Disallow: /zh/docs/apisix/3.10/
Disallow: /zh/docs/apisix/3.11/
Disallow: /zh/docs/apisix/3.12/
Disallow: /zh/docs/apisix/3.13/
Disallow: /zh/docs/apisix/3.14/
Disallow: /zh/docs/apisix/3.15/
Disallow: /zh/docs/apisix/next/
Disallow: /zh/docs/ingress-controller/3.10/
Disallow: /zh/docs/ingress-controller/3.11/
Disallow: /zh/docs/ingress-controller/3.12/
Disallow: /zh/docs/ingress-controller/3.13/
Disallow: /zh/docs/ingress-controller/3.14/
Disallow: /zh/docs/ingress-controller/3.15/
Disallow: /zh/docs/ingress-controller/next/
Disallow: /zh/docs/helm-chart/3.10/
Disallow: /zh/docs/helm-chart/3.11/
Disallow: /zh/docs/helm-chart/3.12/
Disallow: /zh/docs/helm-chart/3.13/
Disallow: /zh/docs/helm-chart/3.14/
Disallow: /zh/docs/helm-chart/3.15/
Disallow: /zh/docs/helm-chart/next/
Disallow: /zh/docs/docker/3.10/
Disallow: /zh/docs/docker/3.11/
Disallow: /zh/docs/docker/3.12/
Disallow: /zh/docs/docker/3.13/
Disallow: /zh/docs/docker/3.14/
Disallow: /zh/docs/docker/3.15/
Disallow: /zh/docs/docker/next/
Disallow: /zh/docs/java-plugin-runner/3.10/
Disallow: /zh/docs/java-plugin-runner/3.11/
Disallow: /zh/docs/java-plugin-runner/3.12/
Disallow: /zh/docs/java-plugin-runner/3.13/
Disallow: /zh/docs/java-plugin-runner/3.14/
Disallow: /zh/docs/java-plugin-runner/3.15/
Disallow: /zh/docs/java-plugin-runner/next/
Disallow: /zh/docs/go-plugin-runner/3.10/
Disallow: /zh/docs/go-plugin-runner/3.11/
Disallow: /zh/docs/go-plugin-runner/3.12/
Disallow: /zh/docs/go-plugin-runner/3.13/
Disallow: /zh/docs/go-plugin-runner/3.14/
Disallow: /zh/docs/go-plugin-runner/3.15/
Disallow: /zh/docs/go-plugin-runner/next/
Disallow: /zh/docs/python-plugin-runner/3.10/
Disallow: /zh/docs/python-plugin-runner/3.11/
Disallow: /zh/docs/python-plugin-runner/3.12/
Disallow: /zh/docs/python-plugin-runner/3.13/
Disallow: /zh/docs/python-plugin-runner/3.14/
Disallow: /zh/docs/python-plugin-runner/3.15/
Disallow: /zh/docs/python-plugin-runner/next/

Sitemap: https://apisix.apache.org/sitemap.xml

Sitemap: https://apisix.apache.org/zh/sitemap.xml
Loading