Skip to content

Commit 39fa127

Browse files
authored
Podigee total numbers (#101)
* Improve debug logging for Podigee refresh token usage, truncating token display for security * Add endpoint for podigee total numbers of podcasts and episode Fixes #100 * Refactor date handling for podcast metadata and analytics; improve date conversion utility * Fix environment variable name for Podigee access token in .env.sample
1 parent 69c387c commit 39fa127

File tree

3 files changed

+193
-45
lines changed

3 files changed

+193
-45
lines changed

connector_manager/manager/podigee_connector.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def handle_podigee_refresh(db_connection, account_id, source_name, source_access
8383

8484
# Get refresh token from source_access_keys
8585
refresh_token = source_access_keys.get("PODIGEE_REFRESH_TOKEN")
86-
logger.debug(f"Using refresh token: {refresh_token}")
86+
logger.debug(f"Using refresh token: {refresh_token[:10]}... for {pod_name}")
8787
if not client_id or not client_secret or not refresh_token:
8888
logger.error(f"Missing required OAuth credentials for Podigee: {pod_name}")
8989
return None

podigee/.env.sample

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@ PODIGEE_USERNAME="username"
55
PODIGEE_PASSWORD="password"
66
PODCAST_ID=12345
77

8-
OPENPODCAST_ACCESS_TOKEN=your_access_token_here
8+
PODIGEE_ACCESS_TOKEN=your_access_token_here

podigee/job/__main__.py

Lines changed: 191 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from queue import Queue
88
from datetime import datetime, timedelta
9+
import calendar
910

1011
from job.fetch_params import FetchParams
1112
from job.worker import worker
@@ -43,8 +44,10 @@
4344

4445
# Start- and end-date for the data we want to fetch
4546
# Load from environment variable if set, otherwise set to defaults
47+
# Podigee default is last 30 days
48+
TODAY_DATE = dt.datetime.now()
4649
START_DATE = load_env(
47-
"START_DATE", (dt.datetime.now() - dt.timedelta(days=30)
50+
"START_DATE", (dt.datetime.now() - dt.timedelta(days=31)
4851
).strftime("%Y-%m-%d")
4952
)
5053
END_DATE = load_env(
@@ -130,6 +133,9 @@
130133

131134
# Extract and validate podcast title
132135
podcast_title = podcast.get("title")
136+
# published at format is "2022-01-25T22:19:42Z"
137+
podcast_published_at = datetime.fromisoformat(podcast.get("published_at").replace("Z", "+00:00"))
138+
133139
if not podcast_title:
134140
logger.error(f"Podcast with ID {PODCAST_ID} has no title")
135141
exit(1)
@@ -149,6 +155,36 @@
149155
exit(1)
150156

151157

158+
def get_date_string(date_obj):
159+
"""
160+
Convert date object to string if needed, or return string as-is.
161+
"""
162+
if isinstance(date_obj, str):
163+
return date_obj
164+
elif isinstance(date_obj, datetime):
165+
return date_obj.strftime("%Y-%m-%d")
166+
elif hasattr(date_obj, 'strftime'): # handles date objects too
167+
return date_obj.strftime("%Y-%m-%d")
168+
else:
169+
return str(date_obj)
170+
171+
172+
def extract_date_str_from_iso(iso_string):
173+
"""
174+
Extract date string (YYYY-MM-DD) from ISO datetime string.
175+
Since Podigee always sends UTC timestamps with 'Z', this preserves the UTC date.
176+
"""
177+
if not iso_string:
178+
return ""
179+
try:
180+
# Python 3.11+ handles 'Z' suffix directly
181+
dt = datetime.fromisoformat(iso_string)
182+
return dt.strftime("%Y-%m-%d")
183+
except (ValueError, AttributeError):
184+
# Fallback to split method if parsing fails
185+
return iso_string.split("T")[0] if "T" in iso_string else iso_string
186+
187+
152188
def get_request_lambda(f, *args, **kwargs):
153189
"""
154190
Capture arguments in the closure so we can use them later in the call
@@ -165,19 +201,89 @@ def get_podcast_metadata():
165201
"name": podcast_title
166202
}
167203

204+
def get_end_date_on_granularity(granularity, start_date):
205+
"""
206+
Get end date based on granularity and start date.
207+
Returns a string in YYYY-MM-DD format.
208+
"""
209+
if granularity == "day":
210+
return get_date_string(start_date)
211+
elif granularity == "month":
212+
# Convert to datetime object if needed
213+
if isinstance(start_date, str):
214+
date_obj = datetime.strptime(start_date, "%Y-%m-%d")
215+
else:
216+
date_obj = start_date
217+
218+
# Get last day of the month
219+
last_day = calendar.monthrange(date_obj.year, date_obj.month)[1]
220+
end_date = date_obj.replace(day=last_day)
221+
return get_date_string(end_date)
222+
return get_date_string(start_date)
223+
224+
def transform_podigee_podcast_overview(overview_data):
225+
"""
226+
Transform Podigee podcast overview data to OpenPodcast format.
227+
Format is {"published_episodes_count":0,
228+
"audio_published_minutes":0.0,
229+
"unique_listeners_number":305,
230+
"unique_subscribers_number":283,
231+
"mean_audio_published_minutes":0,
232+
"mean_episode_download":8.897435897435898,
233+
"total_downloads":694.0,
234+
"meta":{"from":"2025-08-01T00:00:00.000Z","to":"2025-08-31T23:59:59.999Z"}
235+
"""
236+
237+
if not overview_data or "meta" not in overview_data:
238+
logger.error(f"Invalid overview data structure: {overview_data}")
239+
return {"metrics": []}
240+
241+
metrics = []
242+
243+
if "unique_listeners_number" in overview_data:
244+
metrics.append({
245+
"start": extract_date_str_from_iso(overview_data["meta"]["from"]),
246+
"end": extract_date_str_from_iso(overview_data["meta"]["to"]),
247+
"dimension": "listeners",
248+
"subdimension": "unique",
249+
"value": overview_data["unique_listeners_number"]
250+
})
251+
if "unique_subscribers_number" in overview_data:
252+
metrics.append({
253+
"start": extract_date_str_from_iso(overview_data["meta"]["from"]),
254+
"end": extract_date_str_from_iso(overview_data["meta"]["to"]),
255+
"dimension": "subscribers",
256+
"subdimension": "unique",
257+
"value": overview_data["unique_subscribers_number"]
258+
})
259+
if "total_downloads" in overview_data:
260+
metrics.append({
261+
"start": extract_date_str_from_iso(overview_data["meta"]["from"]),
262+
"end": extract_date_str_from_iso(overview_data["meta"]["to"]),
263+
"dimension": "downloads",
264+
"subdimension": "downloads",
265+
"value": overview_data["total_downloads"]
266+
})
267+
268+
if not metrics:
269+
logger.warning(f"No valid metrics found in overview data: {overview_data}")
270+
271+
return {"metrics": metrics}
272+
168273

169-
def transform_podigee_analytics_to_metrics(analytics_data):
274+
def transform_podigee_analytics_to_metrics(analytics_data, store_downloads_only=False):
170275
"""
171276
Transform Podigee analytics data to OpenPodcast metrics format.
172277
Expected format: {"metrics": [{"start": "date", "end": "date", "dimension": "string", "subdimension": "string", "value": number}]}
173278
"""
174279
if not analytics_data or "objects" not in analytics_data:
175280
return {"metrics": []}
176-
281+
282+
aggregation_granularity = analytics_data.get("meta", {}).get("aggregation_granularity", "day")
177283
metrics = []
178284

179285
for day_data in analytics_data["objects"]:
180-
date = day_data.get("downloaded_on", "").split("T")[0] # Extract YYYY-MM-DD
286+
date = extract_date_str_from_iso(day_data.get("downloaded_on", ""))
181287
if not date:
182288
continue
183289

@@ -186,44 +292,45 @@ def transform_podigee_analytics_to_metrics(analytics_data):
186292
for download_type, value in day_data["downloads"].items():
187293
metrics.append({
188294
"start": date,
189-
"end": date,
295+
"end": get_end_date_on_granularity(aggregation_granularity, date),
190296
"dimension": "downloads",
191297
"subdimension": download_type,
192298
"value": value
193299
})
194-
195-
# Process platforms
196-
if "platforms" in day_data:
197-
for platform, value in day_data["platforms"].items():
198-
metrics.append({
199-
"start": date,
200-
"end": date,
201-
"dimension": "platforms",
202-
"subdimension": platform,
203-
"value": value
204-
})
205-
206-
# Process clients
207-
if "clients" in day_data:
208-
for client, value in day_data["clients"].items():
209-
metrics.append({
210-
"start": date,
211-
"end": date,
212-
"dimension": "clients",
213-
"subdimension": client,
214-
"value": value
215-
})
216-
217-
# Process sources
218-
if "sources" in day_data:
219-
for source, value in day_data["sources"].items():
220-
metrics.append({
221-
"start": date,
222-
"end": date,
223-
"dimension": "sources",
224-
"subdimension": source,
225-
"value": value
226-
})
300+
301+
if not store_downloads_only:
302+
# Process platforms
303+
if "platforms" in day_data:
304+
for platform, value in day_data["platforms"].items():
305+
metrics.append({
306+
"start": date,
307+
"end": get_end_date_on_granularity(aggregation_granularity, date),
308+
"dimension": "platforms",
309+
"subdimension": platform,
310+
"value": value
311+
})
312+
313+
# Process clients
314+
if "clients" in day_data:
315+
for client, value in day_data["clients"].items():
316+
metrics.append({
317+
"start": date,
318+
"end": get_end_date_on_granularity(aggregation_granularity, date),
319+
"dimension": "clients",
320+
"subdimension": client,
321+
"value": value
322+
})
323+
324+
# Process sources
325+
if "sources" in day_data:
326+
for source, value in day_data["sources"].items():
327+
metrics.append({
328+
"start": date,
329+
"end": get_end_date_on_granularity(aggregation_granularity, date),
330+
"dimension": "sources",
331+
"subdimension": source,
332+
"value": value
333+
})
227334

228335
return {"metrics": metrics}
229336

@@ -236,11 +343,33 @@ def transform_podigee_analytics_to_metrics(analytics_data):
236343
start_date=date_range.start,
237344
end_date=date_range.end,
238345
),
239-
# Podcast metrics - analytics data for the podcast
346+
# Podcast metrics like apps and platforms and downloads per day of last 30 days
347+
FetchParams(
348+
openpodcast_endpoint="metrics",
349+
podigee_call=lambda: transform_podigee_analytics_to_metrics(
350+
podigee.podcast_analytics(PODCAST_ID, start=date_range.start, end=date_range.end),
351+
# we fetch this just every week on Monday and the first day of the month
352+
# daily downloads are stored every day
353+
not (TODAY_DATE.weekday() == 0 or TODAY_DATE.day == 1)
354+
),
355+
start_date=date_range.start,
356+
end_date=date_range.end,
357+
),
358+
# Fetch total downloads since beginning which is returned in months
240359
FetchParams(
241-
openpodcast_endpoint="metrics",
360+
openpodcast_endpoint="metrics",
242361
podigee_call=lambda: transform_podigee_analytics_to_metrics(
243-
podigee.podcast_analytics(PODCAST_ID, start=date_range.start, end=date_range.end)
362+
podigee.podcast_analytics(PODCAST_ID, start=podcast_published_at, end=date_range.end),
363+
store_downloads_only=True
364+
),
365+
start_date=podcast_published_at,
366+
end_date=date_range.end,
367+
),
368+
# Fetch overview metrics for the podcast, endpoint "overview"
369+
FetchParams(
370+
openpodcast_endpoint="metrics",
371+
podigee_call=lambda: transform_podigee_podcast_overview(
372+
podigee.podcast_overview(PODCAST_ID, start=date_range.start, end=date_range.end),
244373
),
245374
start_date=date_range.start,
246375
end_date=date_range.end,
@@ -251,6 +380,9 @@ def transform_podigee_analytics_to_metrics(analytics_data):
251380

252381
for episode in episodes:
253382
print(episode)
383+
episode_published_at_str = extract_date_str_from_iso(episode.get("published_at", ""))
384+
# Convert to datetime object for API calls
385+
episode_published_at = datetime.strptime(episode_published_at_str, "%Y-%m-%d") if episode_published_at_str else date_range.start
254386
endpoints += [
255387
# Episode metadata - basic episode information
256388
FetchParams(
@@ -269,14 +401,30 @@ def transform_podigee_analytics_to_metrics(analytics_data):
269401
openpodcast_endpoint="metrics",
270402
podigee_call=get_request_lambda(
271403
lambda ep_id: transform_podigee_analytics_to_metrics(
272-
podigee.episode_analytics(ep_id, granularity=None, start=date_range.start, end=date_range.end)
273-
),
404+
podigee.episode_analytics(ep_id, granularity=None, start=date_range.start, end=date_range.end),
405+
# for now we just store the downloads and do not store platforms etc. per episode
406+
store_downloads_only=True
407+
),
274408
str(episode["id"])
275409
),
276410
start_date=date_range.start,
277411
end_date=date_range.end,
278412
meta={"episode": str(episode["id"])},
279413
),
414+
# We store the downloads since publication. The Podigee API returns one data point per month.
415+
FetchParams(
416+
openpodcast_endpoint="metrics",
417+
podigee_call=get_request_lambda(
418+
lambda ep_id: transform_podigee_analytics_to_metrics(
419+
podigee.episode_analytics(ep_id, granularity="monthly", start=episode_published_at, end=date_range.end),
420+
store_downloads_only=True
421+
),
422+
str(episode["id"])
423+
),
424+
start_date=episode_published_at,
425+
end_date=date_range.end,
426+
meta={"episode": str(episode["id"])},
427+
)
280428
]
281429

282430
# Create a queue to hold the FetchParams objects

0 commit comments

Comments
 (0)