-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path03_create_acquisition_subgraph.py
138 lines (123 loc) · 3.38 KB
/
03_create_acquisition_subgraph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
"""
Create a subgraph of companies that have participated in acquisition events.
"""
import kuzu
import polars as pl
from yfiles_jupyter_graphs_for_kuzu import KuzuGraphWidget
DB_PATH = "ex_kuzu_db"
db = kuzu.Database(DB_PATH)
conn = kuzu.Connection(db)
# yfiles_jupyter_graphs_for_kuzu widget for interactive graph visualization
g = KuzuGraphWidget(conn)
acquisitions_df = pl.read_json("data/acquisitions.json")
# NODE: Article
conn.execute(
"""
LOAD FROM acquisitions_df
WITH DISTINCT id, url, title, CAST(date AS DATE) AS date
MERGE (a:Article {id: id})
SET a.url = url, a.title = title, a.date = date
"""
)
# NODE: TimePeriod
conn.execute(
"""
LOAD FROM acquisitions_df
WITH DISTINCT time_period
MERGE (:TimePeriod {month_year: time_period})
"""
)
# NODE: Commodity
conn.execute(
"""
LOAD FROM acquisitions_df
UNWIND commodities AS commodity
WITH DISTINCT commodity
MERGE (:Commodity {name: commodity})
"""
)
# NODE: Company
res = conn.execute(
"""
LOAD FROM acquisitions_df
UNWIND [parent_company, child_company] AS company
WITH DISTINCT company
WHERE company IS NOT NULL
MERGE (:Company {name: company})
"""
)
# Node: Country
conn.execute(
"""
LOAD FROM acquisitions_df
UNWIND [parent_company_country, child_company_country] AS country
WITH DISTINCT country
WHERE country <> "Unknown"
MERGE (:Country {name: country})
"""
)
# Relationships
conn.execute(
"""
LOAD FROM acquisitions_df
UNWIND commodities AS commodity
MATCH (a:Article {id: id}),
(ct:Commodity {name: commodity}),
(c1:Company {name: parent_company}),
(c2:Company {name: child_company}),
(tp:TimePeriod {month_year: time_period})
MERGE (a)-[:MENTIONS_COMMODITY]->(ct)
MERGE (a)-[:MENTIONS_COMPANY]->(c1)
MERGE (a)-[:MENTIONS_COMPANY]->(c2)
MERGE (c1)-[:PRODUCES]->(ct)
MERGE (c2)-[:PRODUCES]->(ct)
MERGE (tp)-[:CONTAINS]->(a)
"""
)
# Country parent company-country relationships
conn.execute(
"""
LOAD FROM acquisitions_df
MATCH (c1:Company {name: parent_company}),
(parent_country:Country {name: parent_company_country})
WHERE parent_company_country <> "Unknown"
MERGE (c1)-[:IS_FROM_COUNTRY]->(parent_country)
"""
)
# Country child company-country relationships
conn.execute(
"""
LOAD FROM acquisitions_df
MATCH (c2:Company {name: child_company}),
(child_country:Country {name: child_company_country})
WHERE child_company_country <> "Unknown"
MERGE (c2)-[:IS_FROM_COUNTRY]->(child_country)
"""
)
# Acquisition relationships
res = conn.execute(
"""
LOAD FROM acquisitions_df
MATCH (c1:Company {name: parent_company}),
(c2:Company {name: child_company})
MERGE (c2)-[r1:ACQUIRED_BY]->(c1)
SET r1.amount = deal_amount, r1.currency = deal_currency
"""
)
# --- Query the graph ---
# Query: Which companies produce Gold and where are they located?
res = conn.execute(
"""
MATCH (c:Company)-[:PRODUCES]->(ct:Commodity {name: "Gold"}),
(c)-[:IS_FROM_COUNTRY]->(country:Country)
RETURN c.name, country.name
"""
)
print(f"Companies that produce Gold:\n{res.get_as_pl()}") # type: ignore
# Optional: Visualize the graph using the yFiles Jupyter graphs for Kuzu widget
# g.show_cypher(
# """
# MATCH (a)-[b]->(c)
# RETURN *
# """
# )