Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 22 additions & 9 deletions core/audits/seo/canonical.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
*/

import {Audit} from '../audit.js';
import UrlUtils from '../../lib/url-utils.js';
import {MainResource} from '../../computed/main-resource.js';
import * as i18n from '../../lib/i18n/i18n.js';

Expand Down Expand Up @@ -90,12 +89,26 @@ class Canonical extends Audit {
// Links that don't have an href aren't canonical references for SEO, skip them
if (!link.hrefRaw) continue;

// Links that had an hrefRaw but didn't have a valid href were invalid, flag them
if (!link.href) invalidCanonicalLink = link;
// Links that had a valid href but didn't have a valid hrefRaw must have been relatively resolved, flag them
else if (!UrlUtils.isValid(link.hrefRaw)) relativeCanonicallink = link;
// Otherwise, it was a valid canonical URL
else uniqueCanonicalURLs.add(link.href);
// Links that don't have an href are invalid, flag them
if (!link.href) {
invalidCanonicalLink = link;
continue;
}

if (URL.parse(link.hrefRaw, 'https://example.com') === null) {
// Links that are syntactically INVALID with a base, flag them
invalidCanonicalLink = link;
} else {
// Links that are syntactically VALID with a base:
if (URL.parse(link.hrefRaw) === null) {
// Links that are INVALID without a base must be relative, flag them
relativeCanonicallink = link;
} else {
// Links that are valid without a base are absolute
uniqueCanonicalURLs.add(link.href);
}
}

} else if (link.rel === 'alternate') {
if (link.href && link.hreflang) hreflangURLs.add(link.href);
}
Expand Down Expand Up @@ -154,7 +167,7 @@ class Canonical extends Audit {
* @return {LH.Audit.Product|undefined}
*/
static findCommonCanonicalURLMistakes(canonicalURLData, canonicalURL, baseURL) {
const {hreflangURLs} = canonicalURLData;
const { hreflangURLs } = canonicalURLData;

// cross-language or cross-country canonicals are a common issue
if (
Expand Down Expand Up @@ -189,7 +202,7 @@ class Canonical extends Audit {
static async audit(artifacts, context) {
const devtoolsLog = artifacts.DevtoolsLog;

const mainResource = await MainResource.request({devtoolsLog, URL: artifacts.URL}, context);
const mainResource = await MainResource.request({ devtoolsLog, URL: artifacts.URL }, context);
const baseURL = new URL(mainResource.url);
const canonicalURLData = Canonical.collectCanonicalURLs(artifacts.LinkElements);

Expand Down
55 changes: 38 additions & 17 deletions core/test/audits/seo/canonical-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -63,25 +63,46 @@ describe('SEO: Document has valid canonical link', () => {
});
});

it('fails when canonical url is invalid', () => {
const mainDocumentUrl = 'http://www.example.com';
const mainResource = {url: mainDocumentUrl};
const devtoolsLog = networkRecordsToDevtoolsLog([mainResource]);
const artifacts = {
DevtoolsLog: devtoolsLog,
URL: {mainDocumentUrl},
LinkElements: [
link({rel: 'canonical', source: 'head', href: null, hrefRaw: 'https:// example.com'}),
],
};
it('fails for a URL with a space in the host (simulating a permissive parser)', () => {
const mainDocumentUrl = 'http://www.example.com';
const mainResource = {url: mainDocumentUrl};
const devtoolsLog = networkRecordsToDevtoolsLog([mainResource]);
const artifacts = {
DevtoolsLog: devtoolsLog,
URL: {mainDocumentUrl},
LinkElements: [
link({rel: 'canonical', source: 'head', href: 'https://%20example.com', hrefRaw: 'https:// example.com'}),
],
};

const context = {computedCache: new Map()};
return CanonicalAudit.audit(artifacts, context).then(auditResult => {
const {score, explanation} = auditResult;
assert.equal(score, 0);
expect(explanation).toBeDisplayString('Invalid URL (https:// example.com)');
});
const context = {computedCache: new Map()};
return CanonicalAudit.audit(artifacts, context).then(auditResult => {
const {score, explanation} = auditResult;
assert.equal(score, 0);
expect(explanation).toBeDisplayString('Invalid URL (https:// example.com)');
});
});

it('fails for a URL invalid, the parser returns null', () => {
const mainDocumentUrl = 'http://www.example.com';
const mainResource = {url: mainDocumentUrl};
const devtoolsLog = networkRecordsToDevtoolsLog([mainResource]);
const artifacts = {
DevtoolsLog: devtoolsLog,
URL: {mainDocumentUrl},
LinkElements: [
link({rel: 'canonical', source: 'head', href: null, hrefRaw: 'I am not a URL'}),
],
};

const context = {computedCache: new Map()};
return CanonicalAudit.audit(artifacts, context).then(auditResult => {
const {score, explanation} = auditResult;
assert.equal(score, 0);
expect(explanation).toBeDisplayString('Invalid URL (I am not a URL)');
});
});


it('fails when canonical url is relative', () => {
const mainDocumentUrl = 'https://example.com/de/';
Expand Down