-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* pages and legacy lighthouse * fix * first test * pages insert * date var * js rewrite * dataset * Initial commit * init * core_web_vitals * clean graph, tested * publish core_web_vitals.technologies * Dev (#1) * workspace init * pages and legacy lighthouse * fix * first test * pages insert * date var * js rewrite * dataset * core_web_vitals * clean graph, tested * publish core_web_vitals.technologies * technologies partitioning * past month date for cwv * 8pm * package-lock.json * ignore full-refresh * readme * updated tags and example assert * dependency assertions * current month commented * assert fix * all tables publish * incremental tables * node script * enable legacy * missing package name * table configs * all.requests and all.parsed_css * dev sampling vars * sampling instead of rank * readme upd * dev hints * dev sampling for tech report * tech report workflow * removed sampling * dates flexibility * fix * formatting * other legacy tables * docs and dependencies * comment * Update definitions/output/pages.js Co-authored-by: Barry Pollard <[email protected]> * Update definitions/output/technologies.js Co-authored-by: Barry Pollard <[email protected]> * Update package.json Co-authored-by: Barry Pollard <[email protected]> * Update workflow_settings.yaml Co-authored-by: Barry Pollard <[email protected]> * format * not dependent on all.pages * migrated to function trigger * cloud function * readme update * deployed function * readme updates * readme update * init stable copies * requests ready * adjusted requests pipeline * use release configs in prod * readme update * tags update * dev sampling * prune summary * sorted * false when target exists * dev sampling * newline * trigger cleanup * formatting * forEach iteration * create table with operate * new test tables script * tested * merge * JSON columns * job per client * native object pruning * Update definitions/output/all/reprocess_requests.js Co-authored-by: Barry Pollard <[email protected]> --------- Co-authored-by: Barry Pollard <[email protected]>
- Loading branch information
1 parent
6640ffe
commit 94718f9
Showing
6 changed files
with
169 additions
and
49 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,26 +1,28 @@ | ||
const two_months_ago = constants.fn_past_month(constants.fn_past_month(constants.current_month)); | ||
const date = constants.fn_past_month(constants.current_month); | ||
|
||
operate("test_env", { | ||
hasOutput: true, | ||
disabled: true // MUST NOT be commented in main branch | ||
}).queries(ctx => ` | ||
CREATE OR REPLACE TABLE ${ctx.ref("all", "pages")} AS | ||
SELECT * | ||
FROM httparchive.all.pages ${constants.dev_TABLESAMPLE} | ||
WHERE date = '${two_months_ago}'; | ||
var resources_list = [ | ||
//{datasetId: "all", tableId: "pages"}, | ||
{datasetId: "all", tableId: "requests"}, | ||
//{datasetId: "all", tableId: "parsed_css"}, | ||
//{datasetId: "core_web_vitals", tableId: "technologies"}, | ||
]; | ||
|
||
CREATE OR REPLACE TABLE ${ctx.ref("all", "requests")} AS | ||
SELECT * | ||
FROM httparchive.all.requests ${constants.dev_TABLESAMPLE} | ||
WHERE date = '${two_months_ago}'; | ||
resources_list.forEach(resource => { | ||
operate(`test_table ${resource.datasetId}_${resource.tableId}`, { | ||
disabled: !constants.is_dev_env // enabled when workflow variable env_name = "dev" | ||
}).tags([ | ||
"test_tables" | ||
]).queries(ctx => ` | ||
CREATE SCHEMA IF NOT EXISTS ${resource.datasetId}_dev; | ||
CREATE OR REPLACE TABLE ${ctx.ref("all", "parsed_css")} AS | ||
SELECT * | ||
FROM httparchive.all.parsed_css ${constants.dev_TABLESAMPLE} | ||
WHERE date = '${two_months_ago}'; | ||
DROP TABLE ${resource.datasetId}_dev.dev_${resource.tableId}; | ||
CREATE TABLE ${resource.datasetId}_dev.dev_${resource.tableId} | ||
LIKE httparchive.${resource.datasetId}.${resource.tableId}; | ||
CREATE OR REPLACE TABLE ${ctx.ref("core_web_vitals", "technologies")} AS | ||
INSERT INTO ${resource.datasetId}_dev.dev_${resource.tableId} | ||
SELECT * | ||
FROM httparchive.core_web_vitals.technologies | ||
WHERE date = '${two_months_ago}' | ||
`) | ||
FROM httparchive.${resource.datasetId}.${resource.tableId} ${constants.dev_TABLESAMPLE} | ||
WHERE date = '${date}' | ||
`); | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
operate(`all_requests_stable_pre`).tags( | ||
["all_requests_stable"] | ||
).queries(` | ||
CREATE SCHEMA IF NOT EXISTS all_dev; | ||
DROP TABLE IF EXISTS \`all_dev.requests_stable\`; | ||
CREATE TABLE \`all_dev.requests_stable\` | ||
( | ||
date DATE NOT NULL OPTIONS(description="YYYY-MM-DD format of the HTTP Archive monthly crawl"), | ||
client STRING NOT NULL OPTIONS(description="Test environment: desktop or mobile"), | ||
page STRING NOT NULL OPTIONS(description="The URL of the page being tested"), | ||
is_root_page BOOL OPTIONS(description="Whether the page is the root of the origin."), | ||
root_page STRING NOT NULL OPTIONS(description="The URL of the root page being tested"), | ||
rank INT64 OPTIONS(description="Site popularity rank, from CrUX"), | ||
url STRING NOT NULL OPTIONS(description="The URL of the request"), | ||
is_main_document BOOL NOT NULL OPTIONS(description="Whether this request corresponds with the main HTML document of the page, which is the first HTML request after redirects"), | ||
type STRING OPTIONS(description="Simplified description of the type of resource (script, html, css, text, other, etc)"), | ||
index INT64 OPTIONS(description="The sequential 0-based index of the request"), | ||
payload JSON OPTIONS(description="JSON-encoded WebPageTest result data for this request"), | ||
summary JSON OPTIONS(description="JSON-encoded summarization of request data"), | ||
request_headers ARRAY<STRUCT< | ||
name STRING OPTIONS(description="Request header name"), | ||
value STRING OPTIONS(description="Request header value") | ||
>> OPTIONS(description="Request headers"), | ||
response_headers ARRAY<STRUCT< | ||
name STRING OPTIONS(description="Response header name"), | ||
value STRING OPTIONS(description="Response header value") | ||
>> OPTIONS(description="Response headers"), | ||
response_body STRING OPTIONS(description="Text-based response body") | ||
) | ||
PARTITION BY date | ||
CLUSTER BY client, is_root_page, type, rank | ||
OPTIONS( | ||
require_partition_filter=true | ||
); | ||
`); | ||
|
||
const iterations = []; | ||
const clients = constants.clients; | ||
|
||
for ( | ||
let month = constants.current_month; | ||
month >= '2024-09-01'; // 2022-07-01 | ||
month = constants.fn_past_month(month)) { | ||
clients.forEach((client) => { | ||
iterations.push({ | ||
month: month, | ||
client: client | ||
}) | ||
}) | ||
} | ||
|
||
iterations.forEach((iteration, i) => { | ||
operate(`all_requests_stable ${iteration.month} ${iteration.client}`).tags( | ||
["all_requests_stable"] | ||
).dependencies([ | ||
i===0 ? "all_requests_stable_pre" : `all_requests_stable ${iterations[i-1].month} ${iterations[i-1].client}` | ||
]).queries(ctx => ` | ||
INSERT INTO \`all_dev.requests_stable\` | ||
SELECT | ||
requests.date, | ||
requests.client, | ||
requests.page, | ||
requests.is_root_page, | ||
requests.root_page, | ||
crux.rank, | ||
requests.url, | ||
requests.is_main_document, | ||
requests.type, | ||
requests.index, | ||
JSON_REMOVE( | ||
SAFE.PARSE_JSON(payload, wide_number_mode => 'round'), | ||
'$._headers' | ||
) AS payload, | ||
JSON_REMOVE( | ||
SAFE.PARSE_JSON(requests.summary, wide_number_mode => 'round'), | ||
'$.firstHtml', | ||
'$.firstReq', | ||
'$.req_accept_encoding', | ||
'$.req_accept_language', | ||
'$.req_accept', | ||
'$.req_if_modified_since', | ||
'$.req_if_none_match', | ||
'$.req_referer', | ||
'$.req_user_agent', | ||
'$.reqOtherHeaders', | ||
'$.requestid', | ||
'$.resp_age', | ||
'$.resp_cache_control', | ||
'$.resp_content_length', | ||
'$.resp_content_type', | ||
'$.resp_date', | ||
'$.resp_etag', | ||
'$.resp_last_modified', | ||
'$.resp_server', | ||
'$.resp_vary', | ||
'$.respOtherHeaders', | ||
'$.startedDateTime', | ||
'$.url', | ||
'$.urlShort' | ||
) as summary, | ||
requests.request_headers, | ||
requests.response_headers, | ||
requests.response_body | ||
FROM ( | ||
SELECT * | ||
FROM \`all.requests\` ${constants.dev_TABLESAMPLE} | ||
WHERE date = '${iteration.month}' | ||
AND client = '${iteration.client}') AS requests | ||
LEFT JOIN ( | ||
SELECT DISTINCT | ||
CONCAT(origin, '/') AS page, | ||
experimental.popularity.rank AS rank | ||
FROM ${ctx.resolve("chrome-ux-report", "experimental", "global")} | ||
WHERE yyyymm = ${constants.fn_past_month(iteration.month).substring(0, 7).replace('-', '')} | ||
) AS crux | ||
ON requests.root_page = crux.page; | ||
`) | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters