Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reviewing CWV report V2 #38

Draft
wants to merge 10 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion definitions/declarations/httparchive.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ for (const table of stagingTables) {

declare({
schema: 'wappalyzer',
name: 'apps'
name: 'technologies'
})
41 changes: 17 additions & 24 deletions definitions/output/core_web_vitals/technologies.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,16 @@ CREATE TEMP FUNCTION IS_NON_ZERO(
good + needs_improvement + poor > 0
);
`).query(ctx => `
WITH geo_summary AS (
WITH pages AS (
SELECT
client,
page,
technologies
FROM ${ctx.ref('crawl', 'pages')}
WHERE
date = '${pastMonth}'
${constants.devRankFilter}
), geo_summary AS (
SELECT
CAST(REGEXP_REPLACE(CAST(yyyymm AS STRING), r'(\\d{4})(\\d{2})', r'\\1-\\2-01') AS DATE) AS date,
* EXCEPT (country_code),
Expand Down Expand Up @@ -94,49 +103,36 @@ crux AS (

technologies AS (
SELECT
technology.technology,
tech.technology,
client,
page
FROM ${ctx.ref('crawl', 'pages')},
UNNEST(technologies) AS technology
WHERE
date = '${pastMonth}'
${constants.devRankFilter} AND
technology.technology IS NOT NULL AND
technology.technology != ''
FROM pages,
UNNEST(technologies) AS tech
UNION ALL
SELECT
'ALL' AS technology,
client,
page
FROM ${ctx.ref('crawl', 'pages')}
WHERE
date = '${pastMonth}'
${constants.devRankFilter}
FROM pages
),

categories AS (
SELECT
technology.technology,
ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category
FROM ${ctx.ref('crawl', 'pages')},
FROM pages,
UNNEST(technologies) AS technology,
UNNEST(technology.categories) AS category
WHERE
date = '${pastMonth}'
${constants.devRankFilter}
GROUP BY technology
UNION ALL
SELECT
'ALL' AS technology,
ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category
FROM ${ctx.ref('crawl', 'pages')},
FROM pages,
UNNEST(technologies) AS technology,
UNNEST(technology.categories) AS category
WHERE
date = '${pastMonth}' AND
client = 'mobile'
${constants.devRankFilter}
),

summary_stats AS (
Expand All @@ -152,10 +148,7 @@ summary_stats AS (
SAFE.FLOAT64(lighthouse.categories.performance.score) AS performance,
SAFE.FLOAT64(lighthouse.categories.pwa.score) AS pwa,
SAFE.FLOAT64(lighthouse.categories.seo.score) AS seo
FROM ${ctx.ref('crawl', 'pages')}
WHERE
date = '${pastMonth}'
${constants.devRankFilter}
FROM pages
),

lab_data AS (
Expand Down
4 changes: 2 additions & 2 deletions definitions/output/reports/cwv_tech_adoption.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ SELECT
rank,
geo,
STRUCT(
COALESCE(MAX(IF(client = 'desktop', origins, NULL))) AS desktop,
COALESCE(MAX(IF(client = 'mobile', origins, NULL))) AS mobile
COALESCE(MAX(IF(client = 'desktop', origins, 0))) AS desktop,
COALESCE(MAX(IF(client = 'mobile', origins, 0))) AS mobile
) AS adoption
FROM ${ctx.ref('core_web_vitals', 'technologies')}
WHERE date = '${pastMonth}'
Expand Down
2 changes: 1 addition & 1 deletion definitions/output/reports/cwv_tech_categories.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ SELECT
categories.origins,
ARRAY_AGG(technology IGNORE NULLS ORDER BY technologies.origins DESC) AS technologies
FROM categories
JOIN technologies
INNER JOIN technologies
USING (category)
GROUP BY
category,
Expand Down
68 changes: 59 additions & 9 deletions definitions/output/reports/cwv_tech_technologies.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,69 @@ publish('cwv_tech_technologies', {
tags: ['crux_ready']
}).query(ctx => `
/* {"dataform_trigger": "report_cwv_tech_complete", "name": "technologies", "type": "dict"} */
WITH pages AS (
SELECT
client,
root_page AS origin,
tech.technology
FROM ${ctx.ref('crawl', 'pages')},
UNNEST(technologies) AS tech
WHERE
date = '${pastMonth}'
${constants.devRankFilter}
), total_pages AS (
SELECT
client,
COUNT(DISTINCT origin) AS origins
FROM pages
GROUP BY client
), technologies AS (
SELECT
technology,
description,
ARRAY_TO_STRING(categories, ', ') AS category,
categories AS category_obj,
NULL AS similar_technologies
FROM ${ctx.ref('wappalyzer', 'technologies')}
), tech_origins AS (
SELECT
client,
technology,
COUNT(DISTINCT origin) AS origins
FROM pages
GROUP BY
client,
technology
)

SELECT
client,
app AS technology,
technology,
description,
category,
SPLIT(category, ",") AS category_obj,
category_obj,
similar_technologies,
COALESCE(origins, 0) AS origins
FROM tech_origins
INNER JOIN technologies
USING(technology)
ORDER BY origins DESC

UNION ALL

SELECT
client,
'ALL' AS technology,
NULL AS description,
ARRAY_TO_STRING(categories, ', ') AS category,
categories AS category_obj,
NULL AS similar_technologies,
origins
FROM ${ctx.ref('core_web_vitals', 'technologies')}
LEFT JOIN ${ctx.ref('wappalyzer', 'apps')}
ON app = name
WHERE date = '${pastMonth}' AND
geo = 'ALL' AND
rank = 'ALL'
ORDER BY origins DESC
FROM total_pages
CROSS JOIN (
SELECT
ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category) AS categories
FROM technologies,
UNNEST(category_obj) AS category
) AS cat
`)
6 changes: 3 additions & 3 deletions definitions/output/wappalyzer/tech_detections.js
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ tech_deprecated_gone_origins AS (
-- aggregation of technology adoption/deprecation metrics
SELECT
DATE('${constants.currentMonth}') AS date,
COALESCE(before_summary.technology, tech_adopted_existing_origins.technology, tech_adopted_new_origins.technology, apps.name) AS technology,
COALESCE(before_summary.technology, tech_adopted_existing_origins.technology, tech_adopted_new_origins.technology, technologies.name) AS technology,

-- origins summary
0-COALESCE(total_origins_deprecated_existing, 0) AS total_origins_deprecated_existing,
Expand All @@ -139,6 +139,6 @@ LEFT JOIN tech_deprecated_existing_origins
ON before_summary.technology = tech_deprecated_existing_origins.technology
LEFT JOIN tech_deprecated_gone_origins
ON before_summary.technology = tech_deprecated_gone_origins.technology
FULL OUTER JOIN wappalyzer.apps
ON before_summary.technology = apps.name
FULL OUTER JOIN ${ctx.ref('wappalyzer', 'technologies')} AS technologies
ON before_summary.technology = technologies.name
`)
Loading