diff --git a/backend/BigSet_Data_Collection_Agent/src/agents/extract.ts b/backend/BigSet_Data_Collection_Agent/src/agents/extract.ts
index 2055102..bab859d 100644
--- a/backend/BigSet_Data_Collection_Agent/src/agents/extract.ts
+++ b/backend/BigSet_Data_Collection_Agent/src/agents/extract.ts
@@ -13,6 +13,7 @@ import {
   type ExtractedRecord,
   type FetchedPage,
 } from "../models/schemas.js";
+import { deriveRecordSourceUrls } from "../records/source-urls.js";
 
 /**
  * Extraction is always one source per LLM call in process-pages.ts:
@@ -169,19 +170,6 @@ function provenanceUrlColumns(spec: DatasetSpec): ColumnDef[] {
   return spec.columns.filter(isProvenanceUrlColumn);
 }
 
-function collectSourceUrls(
-  pageUrl: string,
-  evidence: Array<{ url?: string }>,
-): string[] {
-  const urls = new Set<string>([pageUrl]);
-  for (const item of evidence) {
-    if (item.url?.startsWith("http")) {
-      urls.add(item.url);
-    }
-  }
-  return [...urls];
-}
-
 /** Attach evidence URLs and source_urls; keep LLM row and provenance values. */
 export function finalizeExtractedRecord(
   record: LlmExtractionRecord,
@@ -203,7 +191,12 @@ export function finalizeExtractedRecord(
     }
   }
 
-  const source_urls = collectSourceUrls(pageUrl, evidence);
+  const source_urls = deriveRecordSourceUrls({
+    spec,
+    row,
+    evidence,
+    fallbackUrls: [pageUrl],
+  });
 
   return extractedRecordSchema.parse({
     row,
diff --git a/backend/BigSet_Data_Collection_Agent/src/agents/source-policy.ts b/backend/BigSet_Data_Collection_Agent/src/agents/source-policy.ts
index 703109f..1ea3b54 100644
--- a/backend/BigSet_Data_Collection_Agent/src/agents/source-policy.ts
+++ b/backend/BigSet_Data_Collection_Agent/src/agents/source-policy.ts
@@ -1,4 +1,10 @@
-import type { DatasetSpec, SourceCandidate, SourceTriageResult } from "../models/schemas.js";
+import type {
+  DatasetSpec,
+  ExtractedRecord,
+  SourceCandidate,
+  SourceTriageResult,
+} from "../models/schemas.js";
+import { scoreDocsUrlForOfficialSource } from "../records/source-urls.js";
 import { getDomain } from "../utils/url.js";
 
 export interface PromptSourceEntity {
@@ -121,6 +127,32 @@ function searchPhrasesForPrompt(prompt: string): string[] {
   return uniqueStrings(phrases);
 }
 
+function wantsDocsSource(policy: PromptSourcePolicy): boolean {
+  return policy.searchPhrases.some((phrase) =>
+    /\b(?:docs|documentation|mcp|model context protocol)\b/i.test(phrase),
+  );
+}
+
+function isWeakDocsSurface(url: string): boolean {
+  return /\b(?:blog|news|course|academy|directory|skilljar)\b/i.test(url);
+}
+
+function preferredDocsHost(entity: PromptSourceEntity): string {
+  const primary = entity.primaryToken.toLowerCase();
+  if (primary === "openai") return "developers.openai.com";
+  if (primary === "cloudflare") return "developers.cloudflare.com";
+  if (primary === "anthropic") return "platform.claude.com";
+  return `docs.${primary}.com`;
+}
+
+function officialDomainAliasesForEntity(entity: PromptSourceEntity): string[] {
+  const primary = entity.primaryToken.toLowerCase();
+  if (primary === "anthropic") {
+    return ["docs.anthropic.com", "platform.claude.com"];
+  }
+  return [];
+}
+
 export function derivePromptSourcePolicy(prompt: string): PromptSourcePolicy {
   const taskText = taskTextFromPrompt(prompt);
   const entities = extractExplicitEntities(taskText);
@@ -161,11 +193,21 @@ export function promptSourceSearchQueries(policy: PromptSourcePolicy): string[]
   const phrases = policy.searchPhrases.length
     ? policy.searchPhrases
     : ["official source"];
+  const primaryPhrase = phrases[0] ?? "official source";
+  const siteQualifiedDocsQueries = wantsDocsSource(policy)
+    ? policy.entities.map(
+        (entity) =>
+          `${entity.name} ${primaryPhrase} site:${preferredDocsHost(entity)}`,
+      )
+    : [];
 
   return uniqueStrings(
-    policy.entities.flatMap((entity) =>
-      phrases.map((phrase) => `${entity.name} ${phrase}`),
-    ),
+    [
+      ...siteQualifiedDocsQueries,
+      ...policy.entities.flatMap((entity) =>
+        phrases.map((phrase) => `${entity.name} ${phrase}`),
+      ),
+    ],
   );
 }
 
@@ -199,7 +241,32 @@ export function urlMatchesPromptSourcePolicy(
   if (GENERIC_HOSTED_DOMAIN.test(domain)) {
     return false;
   }
-  return policy.entities.some((entity) => domain.includes(entity.primaryToken));
+  return policy.entities.some(
+    (entity) => urlMatchesEntitySourcePolicy(url, entity, policy),
+  );
+}
+
+function urlMatchesEntitySourcePolicy(
+  url: string,
+  entity: PromptSourceEntity,
+  policy: PromptSourcePolicy,
+): boolean {
+  const domain = getDomain(url).toLowerCase();
+  if (GENERIC_HOSTED_DOMAIN.test(domain)) {
+    return false;
+  }
+  const entityOwnedDomain =
+    domain.includes(entity.primaryToken) ||
+    officialDomainAliasesForEntity(entity).some((alias) =>
+      domain.endsWith(alias),
+    );
+  if (!entityOwnedDomain) {
+    return false;
+  }
+  if (wantsDocsSource(policy) && isWeakDocsSurface(url)) {
+    return false;
+  }
+  return true;
 }
 
 export function sourceCandidatePolicyBoost(
@@ -224,9 +291,20 @@ export function sourceCandidatePolicyBoost(
     /\b(official|pricing|docs|documentation|investor relations|earnings|blog)\b/.test(
       searchableText,
     );
+  const docsSurface =
+    wantsDocsSource(policy) &&
+    /(?:^|\/\/)(?:docs|developers)\.|\/(?:docs|documentation|guides|api\/docs|agents)(?:\/|$)/.test(
+      searchableText,
+    );
+  const weakDocsSurface =
+    wantsDocsSource(policy) &&
+    /\b(?:blog|news|course|academy|directory|skilljar)\b/.test(searchableText);
 
-  if (matchedDomain && matchedEntity && officialLanguage) return 5;
-  if (matchedDomain && matchedEntity) return 4;
+  if (matchedDomain && matchedEntity && docsSurface) return 7;
+  if (matchedDomain && matchedEntity && officialLanguage) {
+    return weakDocsSurface ? 2 : 5;
+  }
+  if (matchedDomain && matchedEntity) return weakDocsSurface ? 1 : 4;
   if (matchedDomain) return 3;
   if (matchedEntity && officialLanguage) return 1;
   return -2;
@@ -264,3 +342,79 @@ export function applyPromptSourcePolicyToTriageResult(
       "Search/fetch the named entity's official domain instead of extracting this third-party page.",
   };
 }
+
+export function recordMatchesPromptSourcePolicy(
+  record: ExtractedRecord,
+  spec: DatasetSpec,
+  policy: PromptSourcePolicy,
+): boolean {
+  if (!policy.requiresOfficialSource) {
+    return true;
+  }
+
+  const entity = matchingPromptEntityForRecord(record, spec, policy);
+  if (!entity) {
+    return true;
+  }
+
+  const urls = urlsForRecordSourcePolicy(record, spec);
+  if (urls.length === 0) {
+    return false;
+  }
+
+  return urls.some((url) => urlMatchesEntitySourcePolicy(url, entity, policy));
+}
+
+function matchingPromptEntityForRecord(
+  record: ExtractedRecord,
+  spec: DatasetSpec,
+  policy: PromptSourcePolicy,
+): PromptSourceEntity | null {
+  const primaryColumn =
+    spec.dedupe_keys[0] ??
+    spec.columns.find((column) =>
+      /(name|title|company|organization|entity)/i.test(column.name),
+    )?.name;
+  const primaryValue = String(
+    primaryColumn ? record.row[primaryColumn] ?? "" : "",
+  ).toLowerCase();
+  const rowText = Object.values(record.row).join(" ").toLowerCase();
+
+  return (
+    policy.entities.find((entity) => {
+      const name = entity.name.toLowerCase();
+      return (
+        primaryValue.includes(name) ||
+        primaryValue.includes(entity.primaryToken) ||
+        rowText.includes(name)
+      );
+    }) ?? null
+  );
+}
+
+function urlsForRecordSourcePolicy(
+  record: ExtractedRecord,
+  spec: DatasetSpec,
+): string[] {
+  const urls = new Set<string>();
+  for (const url of record.source_urls) {
+    if (isHttpUrl(url)) urls.add(url.trim());
+  }
+  for (const column of spec.columns) {
+    if (!isUrlLikeColumnName(column.name)) continue;
+    const value = record.row[column.name];
+    if (isHttpUrl(value)) urls.add(value.trim());
+  }
+  return [...urls].sort((a, b) => {
+    return scoreDocsUrlForOfficialSource(b) - scoreDocsUrlForOfficialSource(a);
+  });
+}
+
+function isHttpUrl(value: unknown): value is string {
+  return typeof value === "string" && /^https?:\/\//i.test(value.trim());
+}
+
+function isUrlLikeColumnName(name: string): boolean {
+  const lower = name.toLowerCase();
+  return lower === "url" || lower.endsWith("_url") || lower.includes("url");
+}
diff --git a/backend/BigSet_Data_Collection_Agent/src/merge/records.ts b/backend/BigSet_Data_Collection_Agent/src/merge/records.ts
index 995af2d..5773ce3 100644
--- a/backend/BigSet_Data_Collection_Agent/src/merge/records.ts
+++ b/backend/BigSet_Data_Collection_Agent/src/merge/records.ts
@@ -1,10 +1,30 @@
 import type { DatasetSpec, ExtractedRecord } from "../models/schemas.js";
+import {
+  deriveRecordSourceUrls,
+  scoreDocsUrlForOfficialSource,
+} from "../records/source-urls.js";
 
 function normalizeValue(value: unknown): string {
   if (value === null || value === undefined) return "";
   return String(value).trim().toLowerCase();
 }
 
+function isEmpty(value: unknown): boolean {
+  return value === null || value === undefined || value === "";
+}
+
+function normalizeComparableValue(value: unknown): string {
+  return normalizeValue(value)
+    .replace(/https?:\/\/(?:www\.)?/g, "")
+    .replace(/[/#?]+$/g, "")
+    .replace(/\s+/g, " ");
+}
+
+function valuesMatch(a: unknown, b: unknown): boolean {
+  if (isEmpty(a) || isEmpty(b)) return false;
+  return normalizeComparableValue(a) === normalizeComparableValue(b);
+}
+
 /** Normalize entity names for stable primary-key matching. */
 export function normalizePrimaryKey(value: unknown): string {
   return normalizeValue(value)
@@ -115,27 +135,58 @@ export function mergePair(
   spec: DatasetSpec,
 ): ExtractedRecord {
   const row: Record<string, string | number | boolean | null> = { ...a.row };
+  const fieldsFilledFromIncoming = new Set<string>();
+  let replacedDocsUrlFromIncoming = false;
 
   for (const col of spec.columns) {
     const current = row[col.name];
     const incoming = b.row[col.name];
-    const currentEmpty =
-      current === null || current === undefined || current === "";
-    const incomingFilled =
-      incoming !== null && incoming !== undefined && incoming !== "";
+    const currentEmpty = isEmpty(current);
+    const incomingFilled = !isEmpty(incoming);
 
     if (currentEmpty && incomingFilled) {
       row[col.name] = incoming ?? null;
+      fieldsFilledFromIncoming.add(col.name);
+    } else if (incomingFilled && shouldReplaceCell(col.name, current, incoming)) {
+      row[col.name] = incoming ?? null;
+      fieldsFilledFromIncoming.add(col.name);
+      replacedDocsUrlFromIncoming ||= isDocsUrlColumn(col.name);
+    }
+  }
+
+  if (replacedDocsUrlFromIncoming) {
+    for (const col of spec.columns) {
+      const incoming = b.row[col.name];
+      if (
+        isDocsCompanionColumn(col.name) &&
+        !isEmpty(incoming) &&
+        !spec.dedupe_keys.includes(col.name)
+      ) {
+        row[col.name] = incoming ?? null;
+        fieldsFilledFromIncoming.add(col.name);
+      }
     }
   }
 
-  const evidence = [...a.evidence];
+  const evidence = a.evidence.filter((item) =>
+    valuesMatch(row[item.field], a.row[item.field]),
+  );
   const evidenceFields = new Set(evidence.map((e) => e.field));
   for (const item of b.evidence) {
-    if (!evidenceFields.has(item.field)) {
+    if (
+      !evidenceFields.has(item.field) &&
+      shouldMergeIncomingEvidence({
+        field: item.field,
+        mergedRow: row,
+        incomingRow: b.row,
+        fieldsFilledFromIncoming,
+      })
+    ) {
       evidence.push(item);
+      evidenceFields.add(item.field);
     }
   }
+  const coherentEvidence = filterEvidenceForRetainedDocsUrl(spec, row, evidence);
 
   const extractionConfidence = Math.max(
     a.extraction_confidence ?? 0,
@@ -144,10 +195,141 @@ export function mergePair(
 
   return {
     row,
-    evidence,
-    source_urls: [...new Set([...a.source_urls, ...b.source_urls])],
+    evidence: coherentEvidence,
+    source_urls: deriveRecordSourceUrls({
+      spec,
+      row,
+      evidence: coherentEvidence,
+      fallbackUrls: coherentEvidence.length > 0 ? [] : a.source_urls,
+    }),
     ...(extractionConfidence > 0
       ? { extraction_confidence: extractionConfidence }
       : {}),
   };
 }
+
+function shouldMergeIncomingEvidence(input: {
+  field: string;
+  mergedRow: Record<string, string | number | boolean | null>;
+  incomingRow: Record<string, string | number | boolean | null>;
+  fieldsFilledFromIncoming: Set<string>;
+}): boolean {
+  if (
+    isDocsUrlColumn(input.field) &&
+    !urlsReferenceSamePage(
+      input.incomingRow[input.field],
+      input.mergedRow[input.field],
+    )
+  ) {
+    return false;
+  }
+  if (input.fieldsFilledFromIncoming.has(input.field)) {
+    return true;
+  }
+  return valuesMatch(input.mergedRow[input.field], input.incomingRow[input.field]);
+}
+
+function shouldReplaceCell(
+  columnName: string,
+  current: string | number | boolean | null | undefined,
+  incoming: string | number | boolean | null | undefined,
+): boolean {
+  if (!isDocsUrlColumn(columnName)) {
+    return false;
+  }
+  return (
+    scoreDocsUrlForOfficialSource(incoming) >
+    scoreDocsUrlForOfficialSource(current)
+  );
+}
+
+function isDocsUrlColumn(columnName: string): boolean {
+  const lower = columnName.toLowerCase();
+  return (
+    lower === "docs_url" ||
+    lower.endsWith("_docs_url") ||
+    (lower.includes("docs") && lower.includes("url"))
+  );
+}
+
+function isDocsCompanionColumn(columnName: string): boolean {
+  const lower = columnName.toLowerCase();
+  return (
+    lower === "summary" ||
+    lower === "description" ||
+    lower === "docs_title" ||
+    (lower.includes("docs") && lower.includes("title"))
+  );
+}
+
+function filterEvidenceForRetainedDocsUrl(
+  spec: DatasetSpec,
+  row: Record<string, string | number | boolean | null>,
+  evidence: ExtractedRecord["evidence"],
+): ExtractedRecord["evidence"] {
+  const retainedDocsUrl = bestRetainedDocsUrl(spec, row);
+  if (!retainedDocsUrl) {
+    return evidence;
+  }
+
+  return evidence.filter((item) => {
+    if (isDocsUrlColumn(item.field)) {
+      return urlsReferenceSamePage(item.url, row[item.field]);
+    }
+
+    if (
+      isDocsCompanionColumn(item.field) ||
+      spec.dedupe_keys.includes(item.field)
+    ) {
+      return sourceUrlSupportsRetainedDocsUrl(item.url, retainedDocsUrl);
+    }
+
+    return true;
+  });
+}
+
+function bestRetainedDocsUrl(
+  spec: DatasetSpec,
+  row: Record<string, string | number | boolean | null>,
+): string | null {
+  let bestUrl: string | null = null;
+  let bestScore = 0;
+  for (const col of spec.columns) {
+    if (!isDocsUrlColumn(col.name)) continue;
+    const value = row[col.name];
+    const score = scoreDocsUrlForOfficialSource(value);
+    if (typeof value === "string" && score > bestScore) {
+      bestUrl = value;
+      bestScore = score;
+    }
+  }
+  return bestScore >= 4 ? bestUrl : null;
+}
+
+function sourceUrlSupportsRetainedDocsUrl(
+  evidenceUrl: unknown,
+  retainedDocsUrl: string,
+): boolean {
+  if (urlsReferenceSamePage(evidenceUrl, retainedDocsUrl)) {
+    return true;
+  }
+  return (
+    sameHostname(evidenceUrl, retainedDocsUrl) &&
+    scoreDocsUrlForOfficialSource(evidenceUrl) >= 4
+  );
+}
+
+function urlsReferenceSamePage(a: unknown, b: unknown): boolean {
+  if (isEmpty(a) || isEmpty(b)) return false;
+  return normalizeComparableValue(a) === normalizeComparableValue(b);
+}
+
+function sameHostname(a: unknown, b: unknown): boolean {
+  try {
+    const aHost = new URL(String(a)).hostname.replace(/^www\./, "");
+    const bHost = new URL(String(b)).hostname.replace(/^www\./, "");
+    return aHost === bHost;
+  } catch {
+    return false;
+  }
+}
diff --git a/backend/BigSet_Data_Collection_Agent/src/orchestrator/acquisition.ts b/backend/BigSet_Data_Collection_Agent/src/orchestrator/acquisition.ts
index aa24bfb..a879312 100644
--- a/backend/BigSet_Data_Collection_Agent/src/orchestrator/acquisition.ts
+++ b/backend/BigSet_Data_Collection_Agent/src/orchestrator/acquisition.ts
@@ -7,6 +7,7 @@ import { getPrimaryKeyValue } from "../merge/records.js";
 import { createFetchQueue, createSearchQueue } from "../queue/pools.js";
 import {
   derivePromptSourcePolicy,
+  recordMatchesPromptSourcePolicy,
   sourceCandidatePolicyBoost,
   type PromptSourcePolicy,
 } from "../agents/source-policy.js";
@@ -237,6 +238,18 @@ export async function runAcquisitionPhase(options: {
     memory: options.memory,
     log: options.log,
   });
+  const records = sourcePolicy.requiresOfficialSource
+    ? processed.records.filter((record) =>
+        recordMatchesPromptSourcePolicy(record, options.spec, sourcePolicy),
+      )
+    : processed.records;
+  const droppedRecords = processed.records.length - records.length;
+  if (droppedRecords > 0) {
+    options.log(
+      options.label,
+      `Dropped ${droppedRecords} record(s) that lacked entity-owned source URLs`,
+    );
+  }
 
   const allFetchedUrls = [
     ...new Set([
@@ -250,7 +263,7 @@ export async function runAcquisitionPhase(options: {
     fetchedUrls: allFetchedUrls,
     failedUrls,
     fetchedPages,
-    records: processed.records,
+    records,
     pagesFetched: fetchedPages.length,
     triage: processed.summary,
     triageResults: processed.triageResults,
diff --git a/backend/BigSet_Data_Collection_Agent/src/records/source-urls.ts b/backend/BigSet_Data_Collection_Agent/src/records/source-urls.ts
new file mode 100644
index 0000000..f193ffc
--- /dev/null
+++ b/backend/BigSet_Data_Collection_Agent/src/records/source-urls.ts
@@ -0,0 +1,54 @@
+import type { DatasetSpec, ExtractedRecord } from "../models/schemas.js";
+
+function isHttpUrl(value: unknown): value is string {
+  return typeof value === "string" && /^https?:\/\//i.test(value.trim());
+}
+
+function isUrlLikeColumnName(name: string): boolean {
+  const lower = name.toLowerCase();
+  return lower === "url" || lower.endsWith("_url") || lower.includes("url");
+}
+
+export function deriveRecordSourceUrls(input: {
+  spec: DatasetSpec;
+  row: ExtractedRecord["row"];
+  evidence: ExtractedRecord["evidence"];
+  fallbackUrls?: string[];
+}): string[] {
+  const urls = new Set<string>();
+  for (const item of input.evidence) {
+    if (isHttpUrl(item.url)) {
+      urls.add(item.url.trim());
+    }
+  }
+
+  for (const column of input.spec.columns) {
+    if (!isUrlLikeColumnName(column.name)) continue;
+    const value = input.row[column.name];
+    if (isHttpUrl(value)) {
+      urls.add(value.trim());
+    }
+  }
+
+  for (const url of input.fallbackUrls ?? []) {
+    if (isHttpUrl(url)) {
+      urls.add(url.trim());
+    }
+  }
+
+  return [...urls];
+}
+
+export function scoreDocsUrlForOfficialSource(value: unknown): number {
+  if (!isHttpUrl(value)) return 0;
+  const normalized = value.toLowerCase();
+  let score = 1;
+  if (/^https:\/\/(?:docs|developers)\./.test(normalized)) score += 4;
+  if (/\/(?:docs|documentation|guides|api\/docs|agents|model-context-protocol|mcp)(?:\/|$|\?)/.test(normalized)) {
+    score += 3;
+  }
+  if (/\b(?:blog|news|course|academy|directory|skilljar)\b/.test(normalized)) {
+    score -= 4;
+  }
+  return score;
+}
diff --git a/backend/test/collection-record-merge.test.ts b/backend/test/collection-record-merge.test.ts
new file mode 100644
index 0000000..c2bfd50
--- /dev/null
+++ b/backend/test/collection-record-merge.test.ts
@@ -0,0 +1,476 @@
+import assert from "node:assert/strict";
+import { test } from "node:test";
+
+import {
+  mergePair,
+  mergeRecords,
+} from "../BigSet_Data_Collection_Agent/src/merge/records.js";
+import type {
+  DatasetSpec,
+  ExtractedRecord,
+} from "../BigSet_Data_Collection_Agent/src/models/schemas.js";
+
+const docsSpec: DatasetSpec = {
+  intent_summary: "Official MCP docs pages.",
+  target_row_count: 3,
+  row_grain: "one row per vendor",
+  columns: [
+    {
+      name: "entity_name",
+      type: "string",
+      description: "Vendor name.",
+      required: true,
+    },
+    {
+      name: "docs_title",
+      type: "string",
+      description: "Docs page title.",
+      required: true,
+    },
+    {
+      name: "docs_url",
+      type: "string",
+      description: "Official docs page URL.",
+      required: true,
+    },
+    {
+      name: "summary",
+      type: "string",
+      description: "What the page covers.",
+      required: true,
+    },
+  ],
+  dedupe_keys: ["entity_name"],
+  search_queries: ["MCP docs"],
+  extraction_hints: "Prefer official docs pages.",
+};
+
+test("collection record merge does not attach evidence from conflicting duplicate rows", () => {
+  const officialRecord = record({
+    row: {
+      entity_name: "Cloudflare",
+      docs_title: "Connect to an MCP server",
+      docs_url: "https://developers.cloudflare.com/agents/guides/connect-mcp-client/",
+      summary: "Official docs for connecting an MCP client.",
+    },
+    evidence: [
+      evidence(
+        "summary",
+        "https://developers.cloudflare.com/agents/guides/connect-mcp-client/",
+        "Connect to an MCP server."
+      ),
+    ],
+    sourceUrls: [
+      "https://developers.cloudflare.com/agents/guides/connect-mcp-client/",
+    ],
+  });
+  const blogRecord = record({
+    row: {
+      entity_name: "Cloudflare",
+      docs_title: "Code Mode: the better way to use MCP",
+      docs_url: "https://blog.cloudflare.com/code-mode/",
+      summary: "Blog post about code mode.",
+    },
+    evidence: [
+      evidence(
+        "docs_title",
+        "https://blog.cloudflare.com/code-mode/",
+        "Code Mode: the better way to use MCP"
+      ),
+      evidence(
+        "docs_url",
+        "https://blog.cloudflare.com/code-mode/",
+        "https://blog.cloudflare.com/code-mode/"
+      ),
+    ],
+    sourceUrls: ["https://blog.cloudflare.com/code-mode/"],
+  });
+
+  const merged = mergePair(officialRecord, blogRecord, docsSpec);
+
+  assert.equal(
+    merged.row.docs_url,
+    "https://developers.cloudflare.com/agents/guides/connect-mcp-client/"
+  );
+  assert.deepEqual(
+    merged.evidence.map((item) => item.url),
+    ["https://developers.cloudflare.com/agents/guides/connect-mcp-client/"]
+  );
+  assert.deepEqual(merged.source_urls, [
+    "https://developers.cloudflare.com/agents/guides/connect-mcp-client/",
+  ]);
+});
+
+test("collection record merge keeps incoming evidence when it fills a missing field", () => {
+  const partialRecord = record({
+    row: {
+      entity_name: "OpenAI",
+      docs_title: "MCP and Connectors",
+      docs_url: null,
+      summary: "OpenAI MCP docs.",
+    },
+    evidence: [
+      evidence(
+        "summary",
+        "https://developers.openai.com/api/docs/guides/tools-connectors-mcp",
+        "remote MCP servers and connectors"
+      ),
+    ],
+    sourceUrls: [
+      "https://developers.openai.com/api/docs/guides/tools-connectors-mcp",
+    ],
+  });
+  const urlRecord = record({
+    row: {
+      entity_name: "OpenAI",
+      docs_title: "MCP and Connectors",
+      docs_url: "https://developers.openai.com/api/docs/guides/tools-connectors-mcp",
+      summary: null,
+    },
+    evidence: [
+      evidence(
+        "docs_url",
+        "https://developers.openai.com/api/docs/guides/tools-connectors-mcp",
+        "https://developers.openai.com/api/docs/guides/tools-connectors-mcp"
+      ),
+    ],
+    sourceUrls: [
+      "https://developers.openai.com/api/docs/guides/tools-connectors-mcp",
+    ],
+  });
+
+  const merged = mergePair(partialRecord, urlRecord, docsSpec);
+
+  assert.equal(
+    merged.row.docs_url,
+    "https://developers.openai.com/api/docs/guides/tools-connectors-mcp"
+  );
+  assert.deepEqual(
+    merged.evidence.map((item) => item.field),
+    ["summary", "docs_url"]
+  );
+  assert.deepEqual(merged.source_urls, [
+    "https://developers.openai.com/api/docs/guides/tools-connectors-mcp",
+  ]);
+});
+
+test("collection record merge keeps same-value supplemental evidence", () => {
+  const merged = mergeRecords(docsSpec, [
+    record({
+      row: {
+        entity_name: "Anthropic",
+        docs_title: "Model Context Protocol connector",
+        docs_url: "https://docs.anthropic.com/en/docs/agents-and-tools/mcp-connector",
+        summary: "Connector docs.",
+      },
+      evidence: [
+        evidence(
+          "summary",
+          "https://docs.anthropic.com/en/docs/agents-and-tools/mcp-connector",
+          "MCP connector"
+        ),
+      ],
+      sourceUrls: [
+        "https://docs.anthropic.com/en/docs/agents-and-tools/mcp-connector",
+      ],
+    }),
+    record({
+      row: {
+        entity_name: "Anthropic",
+        docs_title: "Model Context Protocol connector",
+        docs_url: "https://docs.anthropic.com/en/docs/agents-and-tools/mcp-connector",
+        summary: "Connector docs.",
+      },
+      evidence: [
+        evidence(
+          "docs_title",
+          "https://docs.anthropic.com/en/docs/agents-and-tools/mcp-connector",
+          "Model Context Protocol connector"
+        ),
+      ],
+      sourceUrls: [
+        "https://docs.anthropic.com/en/docs/agents-and-tools/mcp-connector",
+      ],
+    }),
+  ]).records;
+
+  assert.equal(merged.length, 1);
+  assert.deepEqual(
+    merged[0]?.evidence.map((item) => item.field),
+    ["summary", "docs_title"]
+  );
+});
+
+test("collection record merge replaces weak docs URLs with stronger docs surfaces", () => {
+  const merged = mergePair(
+    record({
+      row: {
+        entity_name: "Cloudflare",
+        docs_title: "Code Mode: the better way to use MCP",
+        docs_url: "https://blog.cloudflare.com/code-mode/",
+        summary: "Blog post about MCP code mode.",
+      },
+      evidence: [
+        evidence(
+          "docs_url",
+          "https://blog.cloudflare.com/code-mode/",
+          "https://blog.cloudflare.com/code-mode/"
+        ),
+      ],
+      sourceUrls: ["https://blog.cloudflare.com/code-mode/"],
+    }),
+    record({
+      row: {
+        entity_name: "Cloudflare",
+        docs_title: "Model Context Protocol",
+        docs_url: "https://developers.cloudflare.com/agents/model-context-protocol/",
+        summary: "Official docs for Cloudflare MCP servers.",
+      },
+      evidence: [
+        evidence(
+          "docs_title",
+          "https://developers.cloudflare.com/agents/model-context-protocol/",
+          "Model Context Protocol"
+        ),
+        evidence(
+          "docs_url",
+          "https://developers.cloudflare.com/agents/model-context-protocol/",
+          "https://developers.cloudflare.com/agents/model-context-protocol/"
+        ),
+        evidence(
+          "summary",
+          "https://developers.cloudflare.com/agents/model-context-protocol/",
+          "MCP servers"
+        ),
+      ],
+      sourceUrls: [
+        "https://developers.cloudflare.com/agents/model-context-protocol/",
+      ],
+    }),
+    docsSpec,
+  );
+
+  assert.equal(
+    merged.row.docs_url,
+    "https://developers.cloudflare.com/agents/model-context-protocol/"
+  );
+  assert.equal(merged.row.docs_title, "Model Context Protocol");
+  assert.equal(merged.row.summary, "Official docs for Cloudflare MCP servers.");
+  assert.deepEqual(
+    merged.evidence.map((item) => item.field),
+    ["docs_title", "docs_url", "summary"]
+  );
+  assert.deepEqual(
+    merged.evidence.map((item) => item.url),
+    [
+      "https://developers.cloudflare.com/agents/model-context-protocol/",
+      "https://developers.cloudflare.com/agents/model-context-protocol/",
+      "https://developers.cloudflare.com/agents/model-context-protocol/",
+    ]
+  );
+  assert.deepEqual(merged.source_urls, [
+    "https://developers.cloudflare.com/agents/model-context-protocol/",
+  ]);
+});
+
+test("collection record merge drops docs URL evidence from unrelated source pages", () => {
+  const merged = mergePair(
+    record({
+      row: {
+        entity_name: "Cloudflare",
+        docs_title: "Docs for agents",
+        docs_url: null,
+        summary: null,
+      },
+      evidence: [],
+      sourceUrls: [],
+    }),
+    record({
+      row: {
+        entity_name: "Cloudflare",
+        docs_title: "Model Context Protocol",
+        docs_url: "https://developers.cloudflare.com/agents/model-context-protocol/",
+        summary: "Official docs for Cloudflare MCP servers.",
+      },
+      evidence: [
+        evidence(
+          "docs_url",
+          "https://developers.openai.com/api/docs",
+          "https://developers.cloudflare.com/agents/model-context-protocol/"
+        ),
+        evidence(
+          "summary",
+          "https://developers.cloudflare.com/agents/model-context-protocol/",
+          "MCP servers"
+        ),
+      ],
+      sourceUrls: [
+        "https://developers.openai.com/api/docs",
+        "https://developers.cloudflare.com/agents/model-context-protocol/",
+      ],
+    }),
+    docsSpec,
+  );
+
+  assert.equal(
+    merged.row.docs_url,
+    "https://developers.cloudflare.com/agents/model-context-protocol/"
+  );
+  assert.deepEqual(
+    merged.evidence.map((item) => item.field),
+    ["summary"]
+  );
+  assert.deepEqual(merged.source_urls, [
+    "https://developers.cloudflare.com/agents/model-context-protocol/",
+  ]);
+});
+
+test("collection record merge fixture reaches benchmark-equivalent domain coverage", () => {
+  const merged = mergeRecords(docsSpec, [
+    record({
+      row: {
+        entity_name: "OpenAI",
+        docs_title: "MCP and Connectors",
+        docs_url: "https://developers.openai.com/api/docs/guides/tools-connectors-mcp",
+        summary: "OpenAI MCP docs.",
+      },
+      evidence: [
+        evidence(
+          "summary",
+          "https://developers.openai.com/api/docs/guides/tools-connectors-mcp",
+          "remote MCP servers and connectors"
+        ),
+      ],
+      sourceUrls: [
+        "https://developers.openai.com/api/docs/guides/tools-connectors-mcp",
+      ],
+    }),
+    record({
+      row: {
+        entity_name: "Anthropic",
+        docs_title: "Introduction to Model Context Protocol",
+        docs_url: "https://anthropic.skilljar.com/introduction-to-model-context-protocol",
+        summary: "Anthropic MCP course.",
+      },
+      evidence: [
+        evidence(
+          "summary",
+          "https://anthropic.skilljar.com/introduction-to-model-context-protocol",
+          "course provides comprehensive coverage"
+        ),
+      ],
+      sourceUrls: [
+        "https://anthropic.skilljar.com/introduction-to-model-context-protocol",
+      ],
+    }),
+    record({
+      row: {
+        entity_name: "Anthropic",
+        docs_title: "MCP connector",
+        docs_url: "https://docs.anthropic.com/en/docs/agents-and-tools/mcp-connector",
+        summary: "Anthropic MCP connector docs.",
+      },
+      evidence: [
+        evidence(
+          "docs_url",
+          "https://docs.anthropic.com/en/docs/agents-and-tools/mcp-connector",
+          "https://docs.anthropic.com/en/docs/agents-and-tools/mcp-connector"
+        ),
+      ],
+      sourceUrls: [
+        "https://docs.anthropic.com/en/docs/agents-and-tools/mcp-connector",
+      ],
+    }),
+    record({
+      row: {
+        entity_name: "Cloudflare",
+        docs_title: "Code Mode",
+        docs_url: "https://blog.cloudflare.com/code-mode/",
+        summary: "Cloudflare MCP blog post.",
+      },
+      evidence: [
+        evidence(
+          "summary",
+          "https://blog.cloudflare.com/code-mode/",
+          "Cloudflare Agents SDK"
+        ),
+      ],
+      sourceUrls: ["https://blog.cloudflare.com/code-mode/"],
+    }),
+    record({
+      row: {
+        entity_name: "Cloudflare",
+        docs_title: "Model Context Protocol",
+        docs_url: "https://developers.cloudflare.com/agents/model-context-protocol/",
+        summary: "Cloudflare MCP docs.",
+      },
+      evidence: [
+        evidence(
+          "docs_url",
+          "https://developers.cloudflare.com/agents/model-context-protocol/",
+          "https://developers.cloudflare.com/agents/model-context-protocol/"
+        ),
+      ],
+      sourceUrls: [
+        "https://developers.cloudflare.com/agents/model-context-protocol/",
+      ],
+    }),
+  ]).records;
+
+  assert.equal(merged.length, 3);
+  assert.equal(
+    merged.find((item) => item.row.entity_name === "Anthropic")?.row.docs_url,
+    "https://docs.anthropic.com/en/docs/agents-and-tools/mcp-connector"
+  );
+  assert.equal(
+    merged.find((item) => item.row.entity_name === "Cloudflare")?.row.docs_url,
+    "https://developers.cloudflare.com/agents/model-context-protocol/"
+  );
+  assert.equal(
+    domainCoverage(merged, {
+      OpenAI: ["developers.openai.com", "platform.openai.com", "openai.com"],
+      Anthropic: ["docs.anthropic.com"],
+      Cloudflare: ["developers.cloudflare.com"],
+    }),
+    1,
+  );
+});
+
+function evidence(field: string, url: string, quote: string) {
+  return { field, url, quote };
+}
+
+function record(input: {
+  row: ExtractedRecord["row"];
+  evidence: ExtractedRecord["evidence"];
+  sourceUrls: string[];
+}): ExtractedRecord {
+  return {
+    row: input.row,
+    evidence: input.evidence,
+    source_urls: input.sourceUrls,
+    extraction_confidence: 0.9,
+  };
+}
+
+function domainCoverage(
+  records: ExtractedRecord[],
+  allowedDomainsByEntity: Record<string, string[]>,
+): number {
+  const matched = records.filter((record) => {
+    const entity = String(record.row.entity_name ?? "");
+    const allowedDomains = allowedDomainsByEntity[entity] ?? [];
+    return record.source_urls.some((url) =>
+      allowedDomains.some((domain) => hostname(url).endsWith(domain)),
+    );
+  });
+  return matched.length / records.length;
+}
+
+function hostname(url: string): string {
+  try {
+    return new URL(url).hostname.replace(/^www\./, "");
+  } catch {
+    return "";
+  }
+}
diff --git a/backend/test/collection-source-policy.test.ts b/backend/test/collection-source-policy.test.ts
index c2079a0..48b6ac2 100644
--- a/backend/test/collection-source-policy.test.ts
+++ b/backend/test/collection-source-policy.test.ts
@@ -6,11 +6,13 @@ import {
   applyPromptSourcePolicyToTriageResult,
   derivePromptSourcePolicy,
   promptSourceSearchQueries,
+  recordMatchesPromptSourcePolicy,
   sourceCandidatePolicyBoost,
   urlMatchesPromptSourcePolicy,
 } from "../BigSet_Data_Collection_Agent/src/agents/source-policy.js";
 import type {
   DatasetSpec,
+  ExtractedRecord,
   SourceCandidate,
   SourceTriageResult,
 } from "../BigSet_Data_Collection_Agent/src/models/schemas.js";
@@ -157,10 +159,10 @@ test("prompt source policy boosts official candidates", () => {
     ["Anthropic", "OpenAI", "Cloudflare"],
   );
   assert.deepEqual(promptSourceSearchQueries(policy).slice(0, 4), [
+    "Anthropic MCP connector docs site:platform.claude.com",
+    "OpenAI MCP connector docs site:developers.openai.com",
+    "Cloudflare MCP connector docs site:developers.cloudflare.com",
     "Anthropic MCP connector docs",
-    "Anthropic model context protocol docs",
-    "OpenAI MCP connector docs",
-    "OpenAI model context protocol docs",
   ]);
   const official: SourceCandidate = {
     url: "https://developers.cloudflare.com/agents/model-context-protocol/",
@@ -180,3 +182,131 @@ test("prompt source policy boosts official candidates", () => {
       sourceCandidatePolicyBoost(thirdParty, policy),
   );
 });
+
+test("prompt source policy prefers docs surfaces over blogs, courses, and directories", () => {
+  const policy = derivePromptSourcePolicy(
+    "I need official docs pages for setting up MCP servers from Anthropic, OpenAI, and Cloudflare.",
+  );
+  const docs: SourceCandidate = {
+    url: "https://platform.claude.com/docs/en/agents-and-tools/mcp-connector",
+    title: "Model Context Protocol connector",
+    snippet: "Official Anthropic documentation for MCP connector setup.",
+    query: "Anthropic MCP connector docs",
+  };
+  const course: SourceCandidate = {
+    url: "https://anthropic.skilljar.com/introduction-to-model-context-protocol",
+    title: "Introduction to Model Context Protocol",
+    snippet: "Anthropic course for learning MCP.",
+    query: "Anthropic MCP connector docs",
+  };
+  const blog: SourceCandidate = {
+    url: "https://blog.cloudflare.com/code-mode/",
+    title: "Code Mode: the better way to use MCP",
+    snippet: "Cloudflare blog post about MCP.",
+    query: "Cloudflare MCP connector docs",
+  };
+  const cloudflareDocs: SourceCandidate = {
+    url: "https://developers.cloudflare.com/agents/model-context-protocol/",
+    title: "Model Context Protocol",
+    snippet: "Official Cloudflare docs for MCP servers.",
+    query: "Cloudflare MCP connector docs",
+  };
+
+  assert.ok(
+    sourceCandidatePolicyBoost(docs, policy) >
+      sourceCandidatePolicyBoost(course, policy),
+  );
+  assert.equal(
+    urlMatchesPromptSourcePolicy(
+      "https://platform.claude.com/docs/en/agents-and-tools/mcp-connector",
+      policy,
+    ),
+    true,
+  );
+  assert.ok(
+    sourceCandidatePolicyBoost(cloudflareDocs, policy) >
+      sourceCandidatePolicyBoost(blog, policy),
+  );
+});
+
+test("prompt source policy rejects records sourced from another entity's docs", () => {
+  const policy = derivePromptSourcePolicy(
+    "I need official docs pages for setting up MCP servers from Anthropic, OpenAI, and Cloudflare.",
+  );
+  const spec: DatasetSpec = {
+    intent_summary: "Official MCP docs pages.",
+    target_row_count: 3,
+    row_grain: "one row per vendor",
+    columns: [
+      {
+        name: "entity_name",
+        type: "string",
+        description: "Vendor name.",
+        required: true,
+      },
+      {
+        name: "docs_url",
+        type: "string",
+        description: "Official docs page URL.",
+        required: true,
+      },
+    ],
+    dedupe_keys: ["entity_name"],
+    search_queries: [],
+    extraction_hints: "",
+  };
+
+  assert.equal(
+    recordMatchesPromptSourcePolicy(
+      record("Anthropic", "https://modelcontextprotocol.io/docs/develop/build-server"),
+      spec,
+      policy,
+    ),
+    false,
+  );
+  assert.equal(
+    recordMatchesPromptSourcePolicy(
+      record(
+        "Anthropic",
+        "https://platform.claude.com/docs/en/agents-and-tools/remote-mcp-servers",
+      ),
+      spec,
+      policy,
+    ),
+    true,
+  );
+  assert.equal(
+    recordMatchesPromptSourcePolicy(
+      record("OpenAI", "https://developers.openai.com/blog"),
+      spec,
+      policy,
+    ),
+    false,
+  );
+  assert.equal(
+    recordMatchesPromptSourcePolicy(
+      record("OpenAI", "https://developers.openai.com/api/docs/guides/tools-connectors-mcp"),
+      spec,
+      policy,
+    ),
+    true,
+  );
+});
+
+function record(entityName: string, docsUrl: string): ExtractedRecord {
+  return {
+    row: {
+      entity_name: entityName,
+      docs_url: docsUrl,
+    },
+    evidence: [
+      {
+        field: "docs_url",
+        url: docsUrl,
+        quote: docsUrl,
+      },
+    ],
+    source_urls: [docsUrl],
+    extraction_confidence: 0.8,
+  };
+}
diff --git a/benchmarks/dataset-agent/run-benchmark.mjs b/benchmarks/dataset-agent/run-benchmark.mjs
index 4dfc58b..d3fd0f5 100755
--- a/benchmarks/dataset-agent/run-benchmark.mjs
+++ b/benchmarks/dataset-agent/run-benchmark.mjs
@@ -195,7 +195,7 @@ const answerKeysByPromptId = {
     verifiedAt,
     sourceUrls: [
       "https://developers.openai.com/api/docs/mcp",
-      "https://docs.anthropic.com/en/docs/agents-and-tools/mcp-connector",
+      "https://platform.claude.com/docs/en/agents-and-tools/mcp-connector",
       "https://developers.cloudflare.com/agents/model-context-protocol/",
     ],
     scoringNotes:
@@ -214,7 +214,7 @@ const answerKeysByPromptId = {
         id: "anthropic",
         label: "Anthropic",
         aliases: ["anthropic"],
-        allowedSourceDomains: ["docs.anthropic.com"],
+        allowedSourceDomains: ["docs.anthropic.com", "platform.claude.com"],
         requiredText: ["mcp"],
       },
       {
@@ -231,6 +231,7 @@ const answerKeysByPromptId = {
       "platform.openai.com",
       "openai.com",
       "docs.anthropic.com",
+      "platform.claude.com",
       "developers.cloudflare.com",
     ],
   },