|
| 1 | +import datetime |
| 2 | +import logging |
| 3 | +import time |
| 4 | +from functools import partial |
| 5 | +from uuid import uuid4 |
| 6 | + |
| 7 | +import requests |
| 8 | +from django.conf import settings |
| 9 | +from google.protobuf.struct_pb2 import Struct |
| 10 | +from google.protobuf.timestamp_pb2 import Timestamp |
| 11 | +from requests import Response |
| 12 | +from requests.exceptions import RequestException |
| 13 | +from sentry_protos.conduit.v1alpha.publish_pb2 import Phase, PublishRequest |
| 14 | + |
| 15 | +from sentry.silo.base import SiloMode |
| 16 | +from sentry.tasks.base import instrumented_task |
| 17 | +from sentry.taskworker.namespaces import conduit_tasks |
| 18 | +from sentry.utils import jwt |
| 19 | +from sentry.utils.retries import ConditionalRetryPolicy, exponential_delay |
| 20 | + |
| 21 | +logger = logging.getLogger(__name__) |
| 22 | + |
| 23 | +PUBLISH_REQUEST_TIMEOUT_SECONDS = 5 |
| 24 | +PUBLISH_REQUEST_MAX_RETRIES = 5 |
| 25 | +NUM_DELTAS = 100 |
| 26 | +SEND_INTERVAL_SECONDS = 0.15 |
| 27 | +JWT_EXPIRATION_SECONDS = 300 # 5 minutes |
| 28 | +TASK_PROCESSING_DEADLINE_SECONDS = 60 * 3 # 3 minutes |
| 29 | + |
| 30 | + |
| 31 | +@instrumented_task( |
| 32 | + name="sentry.conduit.tasks.stream_demo_data", |
| 33 | + namespace=conduit_tasks, |
| 34 | + processing_deadline_duration=TASK_PROCESSING_DEADLINE_SECONDS, |
| 35 | + silo_mode=SiloMode.REGION, |
| 36 | +) |
| 37 | +def stream_demo_data(org_id: int, channel_id: str) -> None: |
| 38 | + """Asynchronously stream data to Conduit.""" |
| 39 | + token = generate_jwt(subject="demo") |
| 40 | + logger.info( |
| 41 | + "conduit.stream_demo_data.started", extra={"org_id": org_id, "channel_id": channel_id} |
| 42 | + ) |
| 43 | + sequence = 0 |
| 44 | + start_publish_request = PublishRequest( |
| 45 | + channel_id=channel_id, |
| 46 | + message_id=str(uuid4()), |
| 47 | + sequence=sequence, |
| 48 | + client_timestamp=get_timestamp(), |
| 49 | + phase=Phase.PHASE_START, |
| 50 | + ) |
| 51 | + publish_data(org_id, start_publish_request, token) |
| 52 | + sequence += 1 |
| 53 | + |
| 54 | + for i in range(NUM_DELTAS): |
| 55 | + payload = Struct() |
| 56 | + payload.update({"value": str(i)}) |
| 57 | + delta_publish_request = PublishRequest( |
| 58 | + channel_id=channel_id, |
| 59 | + message_id=str(uuid4()), |
| 60 | + sequence=sequence, |
| 61 | + client_timestamp=get_timestamp(), |
| 62 | + phase=Phase.PHASE_DELTA, |
| 63 | + payload=payload, |
| 64 | + ) |
| 65 | + publish_data(org_id, delta_publish_request, token) |
| 66 | + sequence += 1 |
| 67 | + time.sleep(SEND_INTERVAL_SECONDS) |
| 68 | + |
| 69 | + end_publish_request = PublishRequest( |
| 70 | + channel_id=channel_id, |
| 71 | + message_id=str(uuid4()), |
| 72 | + sequence=sequence, |
| 73 | + client_timestamp=get_timestamp(), |
| 74 | + phase=Phase.PHASE_END, |
| 75 | + ) |
| 76 | + publish_data(org_id, end_publish_request, token) |
| 77 | + logger.info( |
| 78 | + "conduit.stream_demo_data.ended", extra={"org_id": org_id, "channel_id": channel_id} |
| 79 | + ) |
| 80 | + |
| 81 | + |
| 82 | +def generate_jwt( |
| 83 | + subject: str, issuer: str | None = None, audience: str | None = None, secret: str | None = None |
| 84 | +) -> str: |
| 85 | + """ |
| 86 | + Generate a JWT token for the Conduit publish API. |
| 87 | +
|
| 88 | + Uses HS256 algorithm with a 5 minute expiration. |
| 89 | + """ |
| 90 | + if issuer is None: |
| 91 | + issuer = settings.CONDUIT_PUBLISH_JWT_ISSUER |
| 92 | + if audience is None: |
| 93 | + audience = settings.CONDUIT_PUBLISH_JWT_AUDIENCE |
| 94 | + if secret is None: |
| 95 | + secret = settings.CONDUIT_PUBLISH_SECRET |
| 96 | + if secret is None: |
| 97 | + raise ValueError("CONDUIT_PUBLISH_SECRET not configured") |
| 98 | + claims = { |
| 99 | + "sub": subject, |
| 100 | + "iss": issuer, |
| 101 | + "aud": audience, |
| 102 | + "exp": int(time.time()) + JWT_EXPIRATION_SECONDS, |
| 103 | + } |
| 104 | + return jwt.encode(claims, secret, algorithm="HS256") |
| 105 | + |
| 106 | + |
| 107 | +def should_retry_publish(attempt: int, exception: Exception) -> bool: |
| 108 | + return attempt < PUBLISH_REQUEST_MAX_RETRIES and isinstance(exception, RequestException) |
| 109 | + |
| 110 | + |
| 111 | +publish_retry_policy = ConditionalRetryPolicy( |
| 112 | + should_retry_publish, |
| 113 | + exponential_delay(0.5), |
| 114 | +) |
| 115 | + |
| 116 | + |
| 117 | +def publish_data( |
| 118 | + org_id: int, |
| 119 | + publish_request: PublishRequest, |
| 120 | + token: str, |
| 121 | + publish_url: str | None = None, |
| 122 | +) -> Response: |
| 123 | + """ |
| 124 | + Publish a protobuf message to Conduit with retries. |
| 125 | +
|
| 126 | + Retries up to 5 times with exponential backoff. |
| 127 | + """ |
| 128 | + if publish_url is None: |
| 129 | + publish_url = settings.CONDUIT_PUBLISH_URL |
| 130 | + return publish_retry_policy( |
| 131 | + partial( |
| 132 | + _do_publish, |
| 133 | + org_id=org_id, |
| 134 | + publish_request=publish_request, |
| 135 | + token=token, |
| 136 | + publish_url=publish_url, |
| 137 | + ) |
| 138 | + ) |
| 139 | + |
| 140 | + |
| 141 | +def _do_publish( |
| 142 | + org_id: int, |
| 143 | + publish_request: PublishRequest, |
| 144 | + token: str, |
| 145 | + publish_url: str, |
| 146 | +) -> Response: |
| 147 | + response = requests.post( |
| 148 | + url=f"{publish_url}/publish/{org_id}/{publish_request.channel_id}", |
| 149 | + headers={ |
| 150 | + "Authorization": f"Bearer {token}", |
| 151 | + "Content-Type": "application/x-protobuf", |
| 152 | + }, |
| 153 | + data=publish_request.SerializeToString(), |
| 154 | + timeout=PUBLISH_REQUEST_TIMEOUT_SECONDS, |
| 155 | + ) |
| 156 | + response.raise_for_status() |
| 157 | + return response |
| 158 | + |
| 159 | + |
| 160 | +def get_timestamp(dt: datetime.datetime | None = None) -> Timestamp: |
| 161 | + if dt is None: |
| 162 | + dt = datetime.datetime.now(datetime.UTC) |
| 163 | + timestamp = Timestamp() |
| 164 | + timestamp.FromDatetime(dt) |
| 165 | + return timestamp |
0 commit comments