Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Entity organisation #235

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions digital_land/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
from digital_land.phase.organisation import OrganisationPhase
from digital_land.phase.parse import ParsePhase
from digital_land.phase.patch import PatchPhase
from digital_land.phase.priority import PriorityPhase
from digital_land.phase.pivot import PivotPhase
from digital_land.phase.prefix import EntityPrefixPhase
from digital_land.phase.prune import FieldPrunePhase, EntityPrunePhase, FactPrunePhase
Expand Down Expand Up @@ -262,6 +263,7 @@ def pipeline_run(
enabled=save_harmonised,
),
EntityPrunePhase(dataset_resource_log=dataset_resource_log),
PriorityPhase(),
PivotPhase(),
FactCombinePhase(issue_log=issue_log, fields=combine_fields),
FactorPhase(),
Expand Down
2 changes: 1 addition & 1 deletion digital_land/package/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def load_entities(self):
self.execute(
"select entity, field, value from fact"
" where value != '' or field == 'end-date'"
" order by entity, field, entry_date"
" order by entity, field, priority desc, entry_date"
)
results = self.cursor.fetchall()

Expand Down
1 change: 1 addition & 0 deletions digital_land/phase/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def process(self, stream):
"field": field,
"value": value,
# entry
"priority": block["priority"],
"resource": block["resource"],
"line-number": block["line-number"],
"entry-number": block["entry-number"],
Expand Down
29 changes: 29 additions & 0 deletions digital_land/phase/priority.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import sqlite3
from .phase import Phase


class PriorityPhase(Phase):
"""
Deduce priority of the entry when assembling facts
"""

def __init__(self, connection=None):
if not connection:
connection = sqlite3.connect("var/cache/pipeline.sqlite3")
self.cursor = connection.cursor()

def entity_organisation(self, entity):
self.cursor.execute(
f"select organisation from entity_organisation where entity_minimum <= {entity} and entity_maximum >= {entity}"
)
row = self.cursor.fetchone()
return row[0] if row else None

def priority(self, entity, organisation):
return 1 if self.entity_organisation(entity) == organisation else 2

def process(self, stream):
for block in stream:
row = block["row"]
block["priority"] = self.priority(row["entity"], row["organisation"])
yield block
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def get_long_description():
"canonicaljson",
"click",
"cchardet",
"dask[dataframe]",
"esridump",
"pandas",
"pyproj",
Expand Down
Loading