Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adjustments for new organisation names #26

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 14 additions & 30 deletions service_report/LPA-endpoints-state.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ee2d101773054bfe81845d7b6288d8fb",
"model_id": "3dd8dd42f1fd4bc8b0631bfaf324bea2",
"version_major": 2,
"version_minor": 0
},
Expand Down Expand Up @@ -94,9 +94,9 @@
"\n",
"global organisation_options \n",
"organisation_options = {\n",
" \"All LPA's\":None,\"Newcastle\": \"local-authority-eng:NET\",\"Medway\": \"local-authority-eng:MDW\",\"Lambeth\": \"local-authority-eng:LBH\",\n",
" \"Gloucester\": \"local-authority-eng:GLO\",\"Doncaster\": \"local-authority-eng:DNC\",\"Buckinghamshire\": \"local-authority-eng:BUC\",\"Epsom and Ewell\": \"local-authority-eng:EPS\",\n",
" \"Canterbury\": \"local-authority-eng:CAT\"\n",
" \"All LPA's\":None,\"Newcastle\": \"local-authority:NET\",\"Medway\": \"local-authority:MDW\",\"Lambeth\": \"local-authority:LBH\",\n",
" \"Gloucester\": \"local-authority:GLO\",\"Doncaster\": \"local-authority:DNC\",\"Buckinghamshire\": \"local-authority:BUC\",\"Epsom and Ewell\": \"local-authority:EPS\",\n",
" \"Canterbury\": \"local-authority:CAT\"\n",
" \n",
"}\n",
"global organisation_dropdown\n",
Expand All @@ -113,15 +113,7 @@
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdin",
"output_type": "stream",
"text": [
"Do you want to download the table with all endpoints? (yes/no): no\n"
]
}
],
"outputs": [],
"source": [
"download = input(\"Do you want to download the table with all endpoints? (yes/no): \")\n",
"\n",
Expand All @@ -145,7 +137,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "11ca8559ea6c4cb5928e01e70464f0af",
"model_id": "c0af82bcb9ce4db8a15cbb08b0708e08",
"version_major": 2,
"version_minor": 0
},
Expand Down Expand Up @@ -193,15 +185,7 @@
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdin",
"output_type": "stream",
"text": [
"Do you want to download the table with latest endpoints? (yes/no): no\n"
]
}
],
"outputs": [],
"source": [
"download = input(\"Do you want to download the table with latest endpoints? (yes/no): \")\n",
"\n",
Expand All @@ -225,7 +209,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "727d8865e9d54f729656e1eff1beca9f",
"model_id": "14cc4f6a68f54949b732220f18b8a952",
"version_major": 2,
"version_minor": 0
},
Expand Down Expand Up @@ -257,10 +241,10 @@
"metadata": {},
"outputs": [
{
"name": "stdin",
"name": "stdout",
"output_type": "stream",
"text": [
"Do you want to download the table with erroring endpoints being collected till date? (yes/no): no\n"
"Query result downloaded as 'endpoints_not_200.csv'\n"
]
}
],
Expand All @@ -287,7 +271,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f10e7d6716e443069d909ab7ecde7015",
"model_id": "fca39301393542a9b17041a5bd5f01ff",
"version_major": 2,
"version_minor": 0
},
Expand Down Expand Up @@ -393,10 +377,10 @@
"metadata": {},
"outputs": [
{
"name": "stdin",
"name": "stdout",
"output_type": "stream",
"text": [
"Do you want to download the table with erroring endpoints being collected till date? (yes/no): no\n"
"Query result downloaded as 'endpoints_not_200_first_4_datasets.csv'\n"
]
}
],
Expand Down Expand Up @@ -432,7 +416,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
"version": "3.10.12"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion service_report/brownfield-site-within-lpa-validator.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@
"for lpa in df_lpa.itertuples():\n",
" df_brownfield_sites = get_brownfield_sites_for_organisation(lpa.organisation_entity)\n",
" df_brownfield_sites = df_brownfield_sites.merge(df_lpa, left_on=\"organisation_entity\", right_on=\"organisation_entity\")\n",
" if (\"local-authority-eng\" in lpa.organisation):\n",
" if (\"local-authority\" in lpa.organisation):\n",
" multipol = get_LPA_multipolygon(lpa.statistical_geography)\n",
" if multipol is not None:\n",
" area, issue = parse_wkt(multipol)\n",
Expand Down
10 changes: 5 additions & 5 deletions service_report/endpoints_stale.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@
"sql = F\"\"\"\n",
"select organisation, entry_date, source, start_date \n",
"from source \n",
"where collection = \"{collection}\" and documentation_url = \"\" and end_date = \"\" and organisation like \"local-authority-eng%\" \n",
"where collection = \"{collection}\" and documentation_url = \"\" and end_date = \"\" and organisation like \"local-authority%\" \n",
"order by 2 \n",
"\"\"\"\n",
"live_endpoints_no_docs = query_datasette(sql)\n",
Expand Down Expand Up @@ -393,7 +393,7 @@
}
],
"source": [
"organisation= \"local-authority-eng:COR\"\n",
"organisation= \"local-authority:COR\"\n",
"\n",
"\n",
"sql = F\"\"\"\n",
Expand Down Expand Up @@ -675,7 +675,7 @@
"\tfrom source src \n",
"\tinner join endpoint ep on ep.endpoint = src.endpoint\n",
"\twhere src.collection = \"{collection}\"\n",
" and organisation like \"local-authority-eng:%\"\n",
" and organisation like \"local-authority:%\"\n",
" order by 1, 2\n",
" \"\"\"\n",
"\n",
Expand Down Expand Up @@ -1370,7 +1370,7 @@
" inner join endpoint ep on src.endpoint = ep.endpoint\n",
" where src.collection = \"{collection}\"\n",
" and src.organisation not in (SELECT organisation FROM {viewName})\n",
" and src.organisation like \"local-authority-eng%\"\n",
" and src.organisation like \"local-authority%\"\n",
" and ep.end_date != \"\"\n",
" order by src.organisation, ep.start_date DESC NULLS LAST\n",
"\"\"\"\n",
Expand Down Expand Up @@ -1495,7 +1495,7 @@
" select distinct src.organisation from source src\n",
" where src.collection = \"{collection}\"\n",
" and src.organisation in (SELECT organisation FROM {viewName})\n",
" and src.organisation like \"local-authority-eng%\"\n",
" and src.organisation like \"local-authority%\"\n",
" order by src.organisation\n",
"\"\"\"\n",
"df = query_datasette(http_sql)\n",
Expand Down
20 changes: 10 additions & 10 deletions service_report/master_report/compliance-to-standard.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,15 @@
" print('Input file found. Using', len(organisation_list), 'organisations from input file.')\n",
"else:\n",
" organisation_list = [\n",
" 'local-authority-eng:BUC', \n",
" 'local-authority-eng:DAC', 'local-authority-eng:DNC',\n",
" 'local-authority-eng:GLO', 'local-authority-eng:CMD', 'local-authority-eng:LBH', 'local-authority-eng:SWK',\n",
" 'local-authority-eng:MDW', 'local-authority-eng:NET', 'local-authority-eng:BIR', 'local-authority-eng:CAT',\n",
" 'local-authority-eng:EPS', 'local-authority-eng:BNE', 'local-authority-eng:GAT', 'local-authority-eng:GRY',\n",
" 'local-authority-eng:KTT', 'local-authority-eng:SAL', 'local-authority-eng:TEW', 'local-authority-eng:WBK',\n",
" 'local-authority-eng:DST', 'local-authority-eng:DOV', 'local-authority-eng:LIV', 'local-authority-eng:RDB',\n",
" 'local-authority-eng:WFT', 'local-authority-eng:NLN', 'local-authority-eng:NSM', 'local-authority-eng:SLF',\n",
" 'local-authority-eng:WRL' ]\n",
" 'local-authority:BUC', \n",
" 'local-authority:DAC', 'local-authority:DNC',\n",
" 'local-authority:GLO', 'local-authority:CMD', 'local-authority:LBH', 'local-authority:SWK',\n",
" 'local-authority:MDW', 'local-authority:NET', 'local-authority:BIR', 'local-authority:CAT',\n",
" 'local-authority:EPS', 'local-authority:BNE', 'local-authority:GAT', 'local-authority:GRY',\n",
" 'local-authority:KTT', 'local-authority:SAL', 'local-authority:TEW', 'local-authority:WBK',\n",
" 'local-authority:DST', 'local-authority:DOV', 'local-authority:LIV', 'local-authority:RDB',\n",
" 'local-authority:WFT', 'local-authority:NLN', 'local-authority:NSM', 'local-authority:SLF',\n",
" 'local-authority:WRL' ]\n",
" print('Input file not found. Using default list of organisations.')"
]
},
Expand Down Expand Up @@ -266,7 +266,7 @@
" skip_dataset = handle_skip_dataset(same_datasets_df, dataset, row)\n",
" else:\n",
" skip_dataset = False\n",
" # print(organisation, dataset, resource)\n",
" print(organisation, dataset, resource)\n",
" \n",
"\n",
" dataset_field_df = pd.read_csv('https://raw.githubusercontent.com/digital-land/specification/main/specification/dataset-field.csv')\n",
Expand Down
20 changes: 10 additions & 10 deletions service_report/master_report/endpoint_issue_master_report.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,14 @@
" organisation_list = input_df['organisation'].tolist()\n",
" print('Input file found. Using', len(organisation_list), 'organisations from input file.')\n",
"else:\n",
" organisation_list = ['local-authority-eng:BUC', 'local-authority-eng:DAC', 'local-authority-eng:DNC',\n",
" 'local-authority-eng:GLO', 'local-authority-eng:CMD', 'local-authority-eng:LBH', 'local-authority-eng:SWK',\n",
" 'local-authority-eng:MDW', 'local-authority-eng:NET', 'local-authority-eng:BIR', 'local-authority-eng:CAT',\n",
" 'local-authority-eng:EPS', 'local-authority-eng:BNE', 'local-authority-eng:GAT', 'local-authority-eng:GRY',\n",
" 'local-authority-eng:KTT', 'local-authority-eng:SAL', 'local-authority-eng:TEW', 'local-authority-eng:WBK',\n",
" 'local-authority-eng:DST', 'local-authority-eng:DOV', 'local-authority-eng:LIV', 'local-authority-eng:RDB',\n",
" 'local-authority-eng:WFT', 'local-authority-eng:NLN', 'local-authority-eng:NSM', 'local-authority-eng:SLF',\n",
" 'local-authority-eng:WRL' ]\n",
" organisation_list = ['local-authority:BUC', 'local-authority:DAC', 'local-authority:DNC',\n",
" 'local-authority:GLO', 'local-authority:CMD', 'local-authority:LBH', 'local-authority:SWK',\n",
" 'local-authority:MDW', 'local-authority:NET', 'local-authority:BIR', 'local-authority:CAT',\n",
" 'local-authority:EPS', 'local-authority:BNE', 'local-authority:GAT', 'local-authority:GRY',\n",
" 'local-authority:KTT', 'local-authority:SAL', 'local-authority:TEW', 'local-authority:WBK',\n",
" 'local-authority:DST', 'local-authority:DOV', 'local-authority:LIV', 'local-authority:RDB',\n",
" 'local-authority:WFT', 'local-authority:NLN', 'local-authority:NSM', 'local-authority:SLF',\n",
" 'local-authority:WRL' ]\n",
" print('Input file not found. Using default list of organisations.')"
]
},
Expand Down Expand Up @@ -225,9 +225,9 @@
"outputs": [],
"source": [
"# Create output csv containing endpoints with issues\n",
"has_issues_output_columns = ['name', 'pipelines', 'endpoint_url', 'organisation', 'collection', 'maxentrydate', 'entrydate', 'end_date', 'last_status', 'last_updated_date']\n",
"has_issues_output_columns = ['pipelines', 'endpoint_url', 'organisation', 'collection', 'maxentrydate', 'entrydate', 'end_date', 'last_status', 'last_updated_date']\n",
"\n",
"has_issues_output_df = produce_output_csv(all_orgs_latest_endpoints, organisation_dataset_issues_dict, \"issues\", \"No issues\", has_issues_output_columns)\n",
"has_issues_output_df = produce_output_csv(all_orgs_latest_endpoints, organisation_dataset_issues_dict, \"issues\", \"No issues\", has_issues_output_columns, organisation_name_dict)\n",
"has_issues_output_df.to_csv('endpoint_issues_has_issues.csv', index=False)"
]
}
Expand Down
26 changes: 13 additions & 13 deletions service_report/master_report/endpoint_status_master_report.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,14 @@
" organisation_list = input_df['organisation'].tolist()\n",
" print('Input file found. Using', len(organisation_list), 'organisations from input file.')\n",
"else:\n",
" organisation_list = ['local-authority-eng:BUC', 'local-authority-eng:DAC', 'local-authority-eng:DNC',\n",
" 'local-authority-eng:GLO', 'local-authority-eng:CMD', 'local-authority-eng:LBH', 'local-authority-eng:SWK',\n",
" 'local-authority-eng:MDW', 'local-authority-eng:NET', 'local-authority-eng:BIR', 'local-authority-eng:CAT',\n",
" 'local-authority-eng:EPS', 'local-authority-eng:BNE', 'local-authority-eng:GAT', 'local-authority-eng:GRY',\n",
" 'local-authority-eng:KTT', 'local-authority-eng:SAL', 'local-authority-eng:TEW', 'local-authority-eng:WBK',\n",
" 'local-authority-eng:DST', 'local-authority-eng:DOV', 'local-authority-eng:LIV', 'local-authority-eng:RDB',\n",
" 'local-authority-eng:WFT', 'local-authority-eng:NLN', 'local-authority-eng:NSM', 'local-authority-eng:SLF',\n",
" 'local-authority-eng:WRL' ]\n",
" organisation_list = ['local-authority:BUC', 'local-authority:DAC', 'local-authority:DNC',\n",
" 'local-authority:GLO', 'local-authority:CMD', 'local-authority:LBH', 'local-authority:SWK',\n",
" 'local-authority:MDW', 'local-authority:NET', 'local-authority:BIR', 'local-authority:CAT',\n",
" 'local-authority:EPS', 'local-authority:BNE', 'local-authority:GAT', 'local-authority:GRY',\n",
" 'local-authority:KTT', 'local-authority:SAL', 'local-authority:TEW', 'local-authority:WBK',\n",
" 'local-authority:DST', 'local-authority:DOV', 'local-authority:LIV', 'local-authority:RDB',\n",
" 'local-authority:WFT', 'local-authority:NLN', 'local-authority:NSM', 'local-authority:SLF',\n",
" 'local-authority:WRL' ]\n",
" print('Input file not found. Using default list of organisations.')"
]
},
Expand Down Expand Up @@ -220,9 +220,9 @@
"outputs": [],
"source": [
"# Create output csv containing endpoints with a status other than 200\n",
"not_200_output_columns = ['name', 'pipelines', 'endpoint_url', 'organisation', 'collection', 'maxentrydate', 'entrydate', 'end_date', 'last_status', 'last_updated_date']\n",
"not_200_output_columns = ['pipelines', 'endpoint_url', 'organisation', 'collection', 'maxentrydate', 'entrydate', 'end_date', 'last_status', 'last_updated_date']\n",
"\n",
"not_200_output_df = produce_output_csv(all_orgs_latest_endpoints, organisation_dataset_statuses_dict, \"status\", 200, not_200_output_columns)\n",
"not_200_output_df = produce_output_csv(all_orgs_latest_endpoints, organisation_dataset_statuses_dict, \"status\", 200, not_200_output_columns, organisation_name_dict)\n",
"not_200_output_df.to_csv('endpoint_status_not_200.csv', index=False)"
]
},
Expand All @@ -233,9 +233,9 @@
"outputs": [],
"source": [
"# Create output csv containing endpoints with any status\n",
"all_status_output_columns = ['name', 'pipelines', 'endpoint_url', 'organisation', 'collection', 'maxentrydate', 'entrydate', 'end_date', 'last_status', 'last_updated_date']\n",
"all_status_output_columns = ['pipelines', 'endpoint_url', 'organisation', 'collection', 'maxentrydate', 'entrydate', 'end_date', 'last_status', 'last_updated_date']\n",
"\n",
"all_status_output_df = produce_output_csv(all_orgs_latest_endpoints, organisation_dataset_statuses_dict, \"status\", \"\", all_status_output_columns)\n",
"all_status_output_df = produce_output_csv(all_orgs_latest_endpoints, organisation_dataset_statuses_dict, \"status\", \"\", all_status_output_columns, organisation_name_dict)\n",
"all_status_output_df.to_csv('all_status_output.csv', index=False)"
]
}
Expand All @@ -256,7 +256,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
"version": "3.10.12"
}
},
"nbformat": 4,
Expand Down
5 changes: 2 additions & 3 deletions service_report/master_report/master_report_endpoint_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ def get_endpoints(organisation):
s.collection,
group_concat(DISTINCT sp.pipeline) as pipelines,
s.organisation,
o.name,
re.resource,
max(l.entry_date) maxentrydate,
max(e.entry_date) entrydate,
Expand All @@ -27,7 +26,6 @@ def get_endpoints(organisation):
log l
inner join source s on l.endpoint = s.endpoint
inner join resource_endpoint re on l.endpoint = re.endpoint
inner join organisation o on s.organisation=o.organisation
inner join endpoint e on l.endpoint = e.endpoint
inner join source_pipeline sp on s.source = sp.source
where
Expand Down Expand Up @@ -131,14 +129,15 @@ def get_issues_for_resource(resource, dataset):
issues_df = pd.read_csv(url)
return issues_df

def produce_output_csv(all_orgs_recent_endpoints, organisation_dataset_property_dict, property_name, ignore_property_value, output_columns):
def produce_output_csv(all_orgs_recent_endpoints, organisation_dataset_property_dict, property_name, ignore_property_value, output_columns, organisation_name_dict):
rows_list = []
for organisation, dataset_property in organisation_dataset_property_dict.items():
for dataset, property in dataset_property.items():
if property != ignore_property_value:
row = all_orgs_recent_endpoints[organisation][all_orgs_recent_endpoints[organisation]['pipelines'].str.contains(dataset)]
row = row[output_columns]
row['pipelines']=dataset
row.insert(0, 'name', organisation_name_dict[organisation])
row.insert(2, property_name, property)
row=row.drop_duplicates(subset='pipelines')
rows_list.append(row)
Expand Down