-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathgraph_protection.yaml
More file actions
343 lines (277 loc) · 11.6 KB
/
graph_protection.yaml
File metadata and controls
343 lines (277 loc) · 11.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
# Graph Protection Configuration Example
#
# This configuration file demonstrates recommended settings for protecting
# knowledge graphs and vector embeddings from unauthorized extraction.
#
# Based on research: "Making Stolen Data Unusable for AI Training" (2026)
# https://www.golem.de/news/schutz-fuer-wissensgraphen-forscher-machen-gestohlene-daten-fuer-ki-unbrauchbar-2601-203870.html
# ============================================================================
# GRAPH PROTECTION SETTINGS
# ============================================================================
graph_protection:
# Enable graph-specific security features
enabled: true
# -------------------------------------------------------------------------
# ACCESS MONITORING
# Track and analyze access patterns to detect potential data exfiltration
# -------------------------------------------------------------------------
access_monitoring:
enabled: true
# Log all graph traversal operations (BFS, DFS, shortest path, etc.)
track_traversals: true
# Log all bulk export operations
track_exports: true
# Log vector embedding queries
track_embedding_access: true
# Enable real-time anomaly detection
anomaly_detection: true
# Anomaly detection thresholds
anomaly_thresholds:
# Alert if user traverses more than N nodes per minute
max_nodes_per_minute: 5000
# Alert if user performs more than N deep traversals per hour
max_deep_traversals_per_hour: 50
# Alert if user exports more than N MB per day
max_export_mb_per_day: 500
# Alert if user accesses embeddings more than N times per minute
max_embedding_queries_per_minute: 100
# Suspicious pattern detection
patterns:
# Detect systematic node enumeration (e.g., sequential PK access)
detect_enumeration: true
# Detect exhaustive graph crawling
detect_crawling: true
# Detect time-of-day anomalies (e.g., bulk access at 3 AM)
detect_temporal_anomalies: true
# Detect geographic anomalies (e.g., access from unusual locations)
detect_geographic_anomalies: true
# -------------------------------------------------------------------------
# RATE LIMITING
# Restrict query volume and complexity to prevent bulk extraction
# -------------------------------------------------------------------------
rate_limits:
# Graph traversal limits
max_traversal_depth: 5 # Maximum BFS/DFS depth
max_nodes_per_query: 1000 # Maximum nodes returned per query
max_edges_per_query: 10000 # Maximum edges returned per query
# Vector search limits
max_embeddings_per_query: 500 # Maximum embeddings per KNN search
max_vector_dimensions: 1536 # Maximum embedding dimensions
# Query frequency limits (per user)
queries_per_minute: 50 # General query limit
graph_queries_per_minute: 30 # Graph-specific queries
vector_queries_per_minute: 100 # Vector search queries
traversals_per_hour: 200 # Deep traversal operations
# Bulk operation limits
bulk_operations_per_day: 10 # Maximum bulk exports per day
# Concurrent query limits
max_concurrent_queries: 5 # Per user
# Time-window based limits
hourly_node_access_limit: 50000 # Nodes per hour
daily_data_transfer_limit_mb: 1000 # MB per day
# -------------------------------------------------------------------------
# EXPORT CONTROLS
# Restrict and monitor bulk data exports
# -------------------------------------------------------------------------
export_controls:
# Disable bulk export by default (enable only for approved users)
bulk_export_enabled: false
# Require manual approval for large exports
require_approval: true
approval_threshold_mb: 100 # Exports > 100 MB require approval
# Maximum export size limits
max_export_size_mb: 500 # Hard limit per export
max_export_nodes: 100000 # Maximum nodes per export
max_export_edges: 500000 # Maximum edges per export
# Export audit trail
audit_all_exports: true
include_export_hash: true # Hash exported data for tracking
# Watermarking for exports (Phase 2 - optional)
watermark_exports: false # Enable after watermarking implemented
# Export scheduling (restrict to business hours)
restrict_to_business_hours: false
business_hours:
start: "08:00"
end: "18:00"
timezone: "UTC"
# -------------------------------------------------------------------------
# GRAPH WATERMARKING (Phase 2 - Future Feature)
# Embed imperceptible marks in graph structure for theft detection
# -------------------------------------------------------------------------
watermarking:
# Enable graph watermarking (requires implementation)
enabled: false
# Watermark strength (low, medium, high)
# Higher strength = more robust but potentially more detectable
strength: medium
# Watermark method
method: edge_perturbation # Options: edge_perturbation, dummy_nodes, weight_modification
# Watermark key rotation
key_rotation_days: 90
# Watermark verification
verify_on_export: true
verify_on_query: false # May impact performance
# -------------------------------------------------------------------------
# EMBEDDING PROTECTION (Phase 2 - Future Feature)
# Add imperceptible noise to embeddings for fingerprinting
# -------------------------------------------------------------------------
embedding_protection:
# Enable embedding fingerprinting (requires implementation)
enabled: false
# Noise magnitude (0.0 - 1.0)
# Lower values = less impact on search quality
noise_magnitude: 0.01
# Noise method
method: deterministic_gaussian # Options: deterministic_gaussian, laplace, uniform
# Fingerprint verification
verify_on_access: false # May impact performance
# Secret key management
key_rotation_days: 90
use_hsm: true # Use Hardware Security Module if available
# -------------------------------------------------------------------------
# DIFFERENTIAL PRIVACY (Phase 3 - Future Feature)
# Add noise to aggregations for privacy preservation
# -------------------------------------------------------------------------
differential_privacy:
# Enable differential privacy (requires implementation)
enabled: false
# Privacy parameters
epsilon: 1.0 # Privacy budget (lower = more private)
delta: 1e-5 # Failure probability
# Apply to specific query types
apply_to_aggregations: true
apply_to_counts: true
apply_to_statistics: true
# Noise mechanism
mechanism: laplace # Options: laplace, gaussian
# Privacy budget management
budget_per_user_per_day: 10.0
budget_reset_interval_hours: 24
# ============================================================================
# INTEGRATION WITH EXISTING SECURITY FEATURES
# ============================================================================
security:
# -------------------------------------------------------------------------
# RBAC INTEGRATION
# Define graph-specific permissions
# -------------------------------------------------------------------------
rbac:
enabled: true
# Example roles with graph permissions
roles:
- name: data_viewer
graph_permissions:
read: true
traverse: true
max_depth: 3
export: false
- name: data_analyst
graph_permissions:
read: true
traverse: true
max_depth: 5
export: true
export_approval_required: true
- name: data_scientist
graph_permissions:
read: true
traverse: true
max_depth: 10
export: true
bulk_export: true
export_approval_required: false
- name: admin
graph_permissions:
read: true
write: true
traverse: true
max_depth: -1 # unlimited
export: true
bulk_export: true
configure_protection: true
# -------------------------------------------------------------------------
# AUDIT LOGGING
# Enhanced logging for graph operations
# -------------------------------------------------------------------------
audit:
enabled: true
# Graph-specific events
log_events:
- GRAPH_TRAVERSAL
- BULK_NODE_ACCESS
- BULK_EDGE_ACCESS
- EMBEDDING_EXPORT
- GRAPH_EXPORT
- TEMPORAL_QUERY
- ANOMALY_DETECTED
# Include detailed metadata
include_query_details: true
include_access_patterns: true
include_data_volume: true
# Retention
retention_days: 365
# SIEM integration
siem_enabled: true
# -------------------------------------------------------------------------
# ENCRYPTION
# Protect data at rest and in transit
# -------------------------------------------------------------------------
encryption:
# Field-level encryption for sensitive graph attributes
field_encryption: true
# Vector embedding encryption
vector_encryption: true
# Encrypt audit logs
audit_log_encryption: true
# ============================================================================
# MONITORING & ALERTING
# ============================================================================
monitoring:
# Prometheus metrics
prometheus:
enabled: true
# Graph-specific metrics
metrics:
- themis_graph_traversal_depth
- themis_graph_nodes_accessed
- themis_graph_edges_accessed
- themis_embeddings_queried
- themis_graph_exports_total
- themis_graph_anomalies_detected
# Alert rules
alerts:
- name: SuspiciousGraphTraversal
condition: rate(themis_graph_traversal_depth_bucket{le="10"}[5m]) > 10
severity: warning
description: "Unusual deep graph traversal detected"
- name: BulkGraphExport
condition: rate(themis_graph_nodes_exported[5m]) > 1000
severity: critical
description: "Large-scale graph export detected"
- name: EmbeddingTheft
condition: rate(themis_embeddings_queried[5m]) > 500
severity: warning
description: "Suspicious embedding access pattern"
- name: GraphAnomalyDetected
condition: themis_graph_anomalies_detected > 0
severity: high
description: "Anomalous graph access pattern detected"
# ============================================================================
# DEPLOYMENT RECOMMENDATIONS
# ============================================================================
# For production deployments:
# 1. Start with conservative limits and adjust based on legitimate usage
# 2. Enable access monitoring and review logs regularly
# 3. Configure alerts for suspicious activities
# 4. Conduct regular security audits
# 5. Plan for Phase 2 features (watermarking, fingerprinting) if needed
# 6. Document approved export workflows
# 7. Train users on security best practices
# For high-security environments:
# 1. Enable all monitoring features
# 2. Set strict rate limits
# 3. Require approval for all exports
# 4. Implement watermarking (Phase 2)
# 5. Enable differential privacy (Phase 3) for aggregations
# 6. Use HSM for key management
# 7. Deploy in air-gapped network if possible