66
77from queue import Queue
88from datetime import datetime , timedelta
9+ import calendar
910
1011from job .fetch_params import FetchParams
1112from job .worker import worker
4344
4445# Start- and end-date for the data we want to fetch
4546# Load from environment variable if set, otherwise set to defaults
47+ # Podigee default is last 30 days
48+ TODAY_DATE = dt .datetime .now ()
4649START_DATE = load_env (
47- "START_DATE" , (dt .datetime .now () - dt .timedelta (days = 30 )
50+ "START_DATE" , (dt .datetime .now () - dt .timedelta (days = 31 )
4851 ).strftime ("%Y-%m-%d" )
4952)
5053END_DATE = load_env (
130133
131134# Extract and validate podcast title
132135podcast_title = podcast .get ("title" )
136+ # published at format is "2022-01-25T22:19:42Z"
137+ podcast_published_at = datetime .fromisoformat (podcast .get ("published_at" ).replace ("Z" , "+00:00" ))
138+
133139if not podcast_title :
134140 logger .error (f"Podcast with ID { PODCAST_ID } has no title" )
135141 exit (1 )
149155 exit (1 )
150156
151157
158+ def get_date_string (date_obj ):
159+ """
160+ Convert date object to string if needed, or return string as-is.
161+ """
162+ if isinstance (date_obj , str ):
163+ return date_obj
164+ elif isinstance (date_obj , datetime ):
165+ return date_obj .strftime ("%Y-%m-%d" )
166+ elif hasattr (date_obj , 'strftime' ): # handles date objects too
167+ return date_obj .strftime ("%Y-%m-%d" )
168+ else :
169+ return str (date_obj )
170+
171+
172+ def extract_date_str_from_iso (iso_string ):
173+ """
174+ Extract date string (YYYY-MM-DD) from ISO datetime string.
175+ Since Podigee always sends UTC timestamps with 'Z', this preserves the UTC date.
176+ """
177+ if not iso_string :
178+ return ""
179+ try :
180+ # Python 3.11+ handles 'Z' suffix directly
181+ dt = datetime .fromisoformat (iso_string )
182+ return dt .strftime ("%Y-%m-%d" )
183+ except (ValueError , AttributeError ):
184+ # Fallback to split method if parsing fails
185+ return iso_string .split ("T" )[0 ] if "T" in iso_string else iso_string
186+
187+
152188def get_request_lambda (f , * args , ** kwargs ):
153189 """
154190 Capture arguments in the closure so we can use them later in the call
@@ -165,19 +201,89 @@ def get_podcast_metadata():
165201 "name" : podcast_title
166202 }
167203
204+ def get_end_date_on_granularity (granularity , start_date ):
205+ """
206+ Get end date based on granularity and start date.
207+ Returns a string in YYYY-MM-DD format.
208+ """
209+ if granularity == "day" :
210+ return get_date_string (start_date )
211+ elif granularity == "month" :
212+ # Convert to datetime object if needed
213+ if isinstance (start_date , str ):
214+ date_obj = datetime .strptime (start_date , "%Y-%m-%d" )
215+ else :
216+ date_obj = start_date
217+
218+ # Get last day of the month
219+ last_day = calendar .monthrange (date_obj .year , date_obj .month )[1 ]
220+ end_date = date_obj .replace (day = last_day )
221+ return get_date_string (end_date )
222+ return get_date_string (start_date )
223+
224+ def transform_podigee_podcast_overview (overview_data ):
225+ """
226+ Transform Podigee podcast overview data to OpenPodcast format.
227+ Format is {"published_episodes_count":0,
228+ "audio_published_minutes":0.0,
229+ "unique_listeners_number":305,
230+ "unique_subscribers_number":283,
231+ "mean_audio_published_minutes":0,
232+ "mean_episode_download":8.897435897435898,
233+ "total_downloads":694.0,
234+ "meta":{"from":"2025-08-01T00:00:00.000Z","to":"2025-08-31T23:59:59.999Z"}
235+ """
236+
237+ if not overview_data or "meta" not in overview_data :
238+ logger .error (f"Invalid overview data structure: { overview_data } " )
239+ return {"metrics" : []}
240+
241+ metrics = []
242+
243+ if "unique_listeners_number" in overview_data :
244+ metrics .append ({
245+ "start" : extract_date_str_from_iso (overview_data ["meta" ]["from" ]),
246+ "end" : extract_date_str_from_iso (overview_data ["meta" ]["to" ]),
247+ "dimension" : "listeners" ,
248+ "subdimension" : "unique" ,
249+ "value" : overview_data ["unique_listeners_number" ]
250+ })
251+ if "unique_subscribers_number" in overview_data :
252+ metrics .append ({
253+ "start" : extract_date_str_from_iso (overview_data ["meta" ]["from" ]),
254+ "end" : extract_date_str_from_iso (overview_data ["meta" ]["to" ]),
255+ "dimension" : "subscribers" ,
256+ "subdimension" : "unique" ,
257+ "value" : overview_data ["unique_subscribers_number" ]
258+ })
259+ if "total_downloads" in overview_data :
260+ metrics .append ({
261+ "start" : extract_date_str_from_iso (overview_data ["meta" ]["from" ]),
262+ "end" : extract_date_str_from_iso (overview_data ["meta" ]["to" ]),
263+ "dimension" : "downloads" ,
264+ "subdimension" : "downloads" ,
265+ "value" : overview_data ["total_downloads" ]
266+ })
267+
268+ if not metrics :
269+ logger .warning (f"No valid metrics found in overview data: { overview_data } " )
270+
271+ return {"metrics" : metrics }
272+
168273
169- def transform_podigee_analytics_to_metrics (analytics_data ):
274+ def transform_podigee_analytics_to_metrics (analytics_data , store_downloads_only = False ):
170275 """
171276 Transform Podigee analytics data to OpenPodcast metrics format.
172277 Expected format: {"metrics": [{"start": "date", "end": "date", "dimension": "string", "subdimension": "string", "value": number}]}
173278 """
174279 if not analytics_data or "objects" not in analytics_data :
175280 return {"metrics" : []}
176-
281+
282+ aggregation_granularity = analytics_data .get ("meta" , {}).get ("aggregation_granularity" , "day" )
177283 metrics = []
178284
179285 for day_data in analytics_data ["objects" ]:
180- date = day_data .get ("downloaded_on" , "" ). split ( "T" )[ 0 ] # Extract YYYY-MM-DD
286+ date = extract_date_str_from_iso ( day_data .get ("downloaded_on" , "" ))
181287 if not date :
182288 continue
183289
@@ -186,44 +292,45 @@ def transform_podigee_analytics_to_metrics(analytics_data):
186292 for download_type , value in day_data ["downloads" ].items ():
187293 metrics .append ({
188294 "start" : date ,
189- "end" : date ,
295+ "end" : get_end_date_on_granularity ( aggregation_granularity , date ) ,
190296 "dimension" : "downloads" ,
191297 "subdimension" : download_type ,
192298 "value" : value
193299 })
194-
195- # Process platforms
196- if "platforms" in day_data :
197- for platform , value in day_data ["platforms" ].items ():
198- metrics .append ({
199- "start" : date ,
200- "end" : date ,
201- "dimension" : "platforms" ,
202- "subdimension" : platform ,
203- "value" : value
204- })
205-
206- # Process clients
207- if "clients" in day_data :
208- for client , value in day_data ["clients" ].items ():
209- metrics .append ({
210- "start" : date ,
211- "end" : date ,
212- "dimension" : "clients" ,
213- "subdimension" : client ,
214- "value" : value
215- })
216-
217- # Process sources
218- if "sources" in day_data :
219- for source , value in day_data ["sources" ].items ():
220- metrics .append ({
221- "start" : date ,
222- "end" : date ,
223- "dimension" : "sources" ,
224- "subdimension" : source ,
225- "value" : value
226- })
300+
301+ if not store_downloads_only :
302+ # Process platforms
303+ if "platforms" in day_data :
304+ for platform , value in day_data ["platforms" ].items ():
305+ metrics .append ({
306+ "start" : date ,
307+ "end" : get_end_date_on_granularity (aggregation_granularity , date ),
308+ "dimension" : "platforms" ,
309+ "subdimension" : platform ,
310+ "value" : value
311+ })
312+
313+ # Process clients
314+ if "clients" in day_data :
315+ for client , value in day_data ["clients" ].items ():
316+ metrics .append ({
317+ "start" : date ,
318+ "end" : get_end_date_on_granularity (aggregation_granularity , date ),
319+ "dimension" : "clients" ,
320+ "subdimension" : client ,
321+ "value" : value
322+ })
323+
324+ # Process sources
325+ if "sources" in day_data :
326+ for source , value in day_data ["sources" ].items ():
327+ metrics .append ({
328+ "start" : date ,
329+ "end" : get_end_date_on_granularity (aggregation_granularity , date ),
330+ "dimension" : "sources" ,
331+ "subdimension" : source ,
332+ "value" : value
333+ })
227334
228335 return {"metrics" : metrics }
229336
@@ -236,11 +343,33 @@ def transform_podigee_analytics_to_metrics(analytics_data):
236343 start_date = date_range .start ,
237344 end_date = date_range .end ,
238345 ),
239- # Podcast metrics - analytics data for the podcast
346+ # Podcast metrics like apps and platforms and downloads per day of last 30 days
347+ FetchParams (
348+ openpodcast_endpoint = "metrics" ,
349+ podigee_call = lambda : transform_podigee_analytics_to_metrics (
350+ podigee .podcast_analytics (PODCAST_ID , start = date_range .start , end = date_range .end ),
351+ # we fetch this just every week on Monday and the first day of the month
352+ # daily downloads are stored every day
353+ not (TODAY_DATE .weekday () == 0 or TODAY_DATE .day == 1 )
354+ ),
355+ start_date = date_range .start ,
356+ end_date = date_range .end ,
357+ ),
358+ # Fetch total downloads since beginning which is returned in months
240359 FetchParams (
241- openpodcast_endpoint = "metrics" ,
360+ openpodcast_endpoint = "metrics" ,
242361 podigee_call = lambda : transform_podigee_analytics_to_metrics (
243- podigee .podcast_analytics (PODCAST_ID , start = date_range .start , end = date_range .end )
362+ podigee .podcast_analytics (PODCAST_ID , start = podcast_published_at , end = date_range .end ),
363+ store_downloads_only = True
364+ ),
365+ start_date = podcast_published_at ,
366+ end_date = date_range .end ,
367+ ),
368+ # Fetch overview metrics for the podcast, endpoint "overview"
369+ FetchParams (
370+ openpodcast_endpoint = "metrics" ,
371+ podigee_call = lambda : transform_podigee_podcast_overview (
372+ podigee .podcast_overview (PODCAST_ID , start = date_range .start , end = date_range .end ),
244373 ),
245374 start_date = date_range .start ,
246375 end_date = date_range .end ,
@@ -251,6 +380,9 @@ def transform_podigee_analytics_to_metrics(analytics_data):
251380
252381for episode in episodes :
253382 print (episode )
383+ episode_published_at_str = extract_date_str_from_iso (episode .get ("published_at" , "" ))
384+ # Convert to datetime object for API calls
385+ episode_published_at = datetime .strptime (episode_published_at_str , "%Y-%m-%d" ) if episode_published_at_str else date_range .start
254386 endpoints += [
255387 # Episode metadata - basic episode information
256388 FetchParams (
@@ -269,14 +401,30 @@ def transform_podigee_analytics_to_metrics(analytics_data):
269401 openpodcast_endpoint = "metrics" ,
270402 podigee_call = get_request_lambda (
271403 lambda ep_id : transform_podigee_analytics_to_metrics (
272- podigee .episode_analytics (ep_id , granularity = None , start = date_range .start , end = date_range .end )
273- ),
404+ podigee .episode_analytics (ep_id , granularity = None , start = date_range .start , end = date_range .end ),
405+ # for now we just store the downloads and do not store platforms etc. per episode
406+ store_downloads_only = True
407+ ),
274408 str (episode ["id" ])
275409 ),
276410 start_date = date_range .start ,
277411 end_date = date_range .end ,
278412 meta = {"episode" : str (episode ["id" ])},
279413 ),
414+ # We store the downloads since publication. The Podigee API returns one data point per month.
415+ FetchParams (
416+ openpodcast_endpoint = "metrics" ,
417+ podigee_call = get_request_lambda (
418+ lambda ep_id : transform_podigee_analytics_to_metrics (
419+ podigee .episode_analytics (ep_id , granularity = "monthly" , start = episode_published_at , end = date_range .end ),
420+ store_downloads_only = True
421+ ),
422+ str (episode ["id" ])
423+ ),
424+ start_date = episode_published_at ,
425+ end_date = date_range .end ,
426+ meta = {"episode" : str (episode ["id" ])},
427+ )
280428 ]
281429
282430# Create a queue to hold the FetchParams objects
0 commit comments