1313
1414from plot_app .config_tables import *
1515
16+ # Rate limiting settings
17+ DEFAULT_DELAY_SECONDS = 6 # 10 requests/minute = 6 seconds between requests
18+ DEFAULT_MAX_NUM = 10 # Safe default to prevent accidental bulk downloads
19+ WARN_THRESHOLD = 100 # Warn user if downloading more than this many files
20+
1621
1722def get_arguments ():
1823 """ Get parsed CLI arguments """
1924 parser = argparse .ArgumentParser (description = 'Python script for downloading public logs '
2025 'from the PX4/flight_review database.' ,
2126 formatter_class = argparse .ArgumentDefaultsHelpFormatter )
22- parser .add_argument ('--max-num' , '-n' , type = int , default = - 1 ,
27+ parser .add_argument ('--max-num' , '-n' , type = int , default = DEFAULT_MAX_NUM ,
2328 help = 'Maximum number of files to download that match the search criteria. '
24- 'Default: download all files.' )
29+ 'Set to -1 to download all files (requires confirmation for >100 files) .' )
2530 parser .add_argument ('-d' , '--download-folder' , type = str , default = "data/downloaded/" ,
2631 help = 'The folder to store the downloaded logfiles.' )
2732 parser .add_argument ('--print' , action = 'store_true' , dest = "print_entries" ,
@@ -60,6 +65,10 @@ def get_arguments():
6065 help = 'The source of the log upload. e.g. ["webui", "CI"]' )
6166 parser .add_argument ('--git-hash' , default = None , type = str ,
6267 help = 'The git hash of the PX4 Firmware version.' )
68+ parser .add_argument ('--delay' , type = float , default = DEFAULT_DELAY_SECONDS ,
69+ help = 'Delay in seconds between downloads to respect server rate limits.' )
70+ parser .add_argument ('--yes' , '-y' , action = 'store_true' , default = False ,
71+ help = 'Skip confirmation prompt for large downloads.' )
6372 return parser .parse_args ()
6473
6574
@@ -83,13 +92,95 @@ def error_labels_to_ids(error_labels):
8392 return error_ids
8493
8594
95+ def confirm_large_download (n_files , delay ):
96+ """
97+ Ask user to confirm large downloads
98+ """
99+ estimated_time = n_files * delay
100+ time_str = str (datetime .timedelta (seconds = int (estimated_time )))
101+
102+ print (f"\n { '=' * 60 } " )
103+ print (f"WARNING: You are about to download { n_files } files." )
104+ print (f"Estimated time: { time_str } (at { delay } s between downloads)" )
105+ print (f"{ '=' * 60 } " )
106+ print (f"\n The server has rate limits in place. Bulk downloading without" )
107+ print (f"appropriate delays may result in your IP being blocked." )
108+ print (f"\n Network and storage costs for Flight Review are funded by the" )
109+ print (f"Dronecode Foundation. If you find this service useful, please" )
110+ print (f"consider supporting the project: https://www.dronecode.org/membership/" )
111+ print (f"\n To download more files, use: --max-num { n_files } --yes" )
112+
113+ response = input (f"\n Continue with download? [y/N]: " )
114+ return response .lower () in ['y' , 'yes' ]
115+
116+
117+ def download_with_retry (url , entry_id , max_retries = 5 ):
118+ """
119+ Download a file with rate-limit-aware retry logic
120+ """
121+ for attempt in range (max_retries ):
122+ try :
123+ request = requests .get (url = url + "?log=" + entry_id , stream = True , timeout = 10 * 60 )
124+
125+ if request .status_code == 503 :
126+ # Rate limited - back off exponentially
127+ wait_time = min (30 * (2 ** attempt ), 300 ) # Max 5 minutes
128+ retry_after = request .headers .get ('Retry-After' )
129+ if retry_after :
130+ wait_time = int (retry_after )
131+ print (f' Rate limited (503). Waiting { wait_time } s before retry...' )
132+ time .sleep (wait_time )
133+ continue
134+
135+ if request .status_code in [403 , 444 ]:
136+ # IP has been blocked
137+ print (f'\n { "=" * 60 } ' )
138+ print (f'ERROR: Your IP address has been blocked (HTTP { request .status_code } ).' )
139+ print (f'This may be due to excessive download requests.' )
140+ print (f'\n If you believe this is an error, please contact:' )
141+ print (f'https://github.com/PX4/flight_review/issues' )
142+ print (f'{ "=" * 60 } \n ' )
143+ sys .exit (1 )
144+
145+ if request .status_code == 404 :
146+ print (f' Log not found (404). Skipping.' )
147+ return None
148+
149+ if request .status_code != 200 :
150+ print (f' Unexpected status { request .status_code } . Retrying...' )
151+ time .sleep (10 )
152+ continue
153+
154+ return request
155+
156+ except requests .exceptions .ConnectionError :
157+ # Connection refused or reset - could be IP block (444 closes connection)
158+ if attempt == 0 :
159+ print (f' Connection failed. This may indicate your IP has been blocked.' )
160+ print (f' Retrying ({ attempt + 1 } /{ max_retries } )...' )
161+ else :
162+ print (f' Connection failed. Retrying ({ attempt + 1 } /{ max_retries } )...' )
163+ time .sleep (10 * (attempt + 1 ))
164+ except requests .exceptions .Timeout :
165+ print (f' Request timed out. Retrying ({ attempt + 1 } /{ max_retries } )...' )
166+ time .sleep (10 )
167+ except requests .exceptions .RequestException as ex :
168+ print (f' Request failed: { ex } ' )
169+ time .sleep (10 )
170+
171+ print (f' Failed after { max_retries } attempts. Skipping.' )
172+ return None
173+
174+
86175def main ():
87176 """ main script entry point """
88177 args = get_arguments ()
89178
90179 try :
91180 # the db_info_api sends a json file with a list of all public database entries
181+ print ("Fetching database info..." )
92182 db_entries_list = requests .get (url = args .db_info_api , timeout = 5 * 60 ).json ()
183+ print (f"Found { len (db_entries_list )} total public logs in database." )
93184 except :
94185 print ("Server request failed." )
95186 raise
@@ -202,45 +293,57 @@ def main():
202293 reverse = True )
203294
204295 # set number of files to download
205- n_en = len (db_entries_list )
296+ n_matched = len (db_entries_list )
297+ print (f"{ n_matched } logs match your filter criteria." )
298+
206299 if args .max_num > 0 :
207- n_en = min (n_en , args .max_num )
300+ n_en = min (n_matched , args .max_num )
301+ if n_matched > args .max_num :
302+ print (f"Limiting to { args .max_num } files (use --max-num to change)." )
303+ else :
304+ n_en = n_matched
305+
306+ # Warn for large downloads
307+ if n_en > WARN_THRESHOLD and not args .yes :
308+ if not confirm_large_download (n_en , args .delay ):
309+ print ("Download cancelled." )
310+ sys .exit (0 )
311+
208312 n_downloaded = 0
209313 n_skipped = 0
314+ n_failed = 0
210315
211316 for i in range (n_en ):
212317 entry_id = db_entries_list [i ]['log_id' ]
213318
214- num_tries = 0
215- for num_tries in range (100 ):
216- try :
217- if args .overwrite or entry_id not in logids :
218-
219- file_path = os .path .join (args .download_folder , entry_id + ".ulg" )
220-
221- print ('downloading {:}/{:} ({:})' .format (i + 1 , n_en , entry_id ))
222- request = requests .get (url = args .download_api +
223- "?log=" + entry_id , stream = True ,
224- timeout = 10 * 60 )
225- with open (file_path , 'wb' ) as log_file :
226- for chunk in request .iter_content (chunk_size = 1024 ):
227- if chunk : # filter out keep-alive new chunks
228- log_file .write (chunk )
229- n_downloaded += 1
230- else :
231- n_skipped += 1
232- break
233- except Exception as ex :
234- print (ex )
235- print ('Waiting for 30 seconds to retry' )
236- time .sleep (30 )
237- if num_tries == 99 :
238- print ('Retried' , str (num_tries + 1 ), 'times without success, exiting.' )
239- sys .exit (1 )
240-
241-
242- print ('{:} logs downloaded to {:}, {:} logs skipped (already downloaded)' .format (
243- n_downloaded , args .download_folder , n_skipped ))
319+ if not args .overwrite and entry_id in logids :
320+ n_skipped += 1
321+ continue
322+
323+ file_path = os .path .join (args .download_folder , entry_id + ".ulg" )
324+ print ('Downloading {}/{} ({})' .format (i + 1 , n_en , entry_id ))
325+
326+ request = download_with_retry (args .download_api , entry_id )
327+
328+ if request is None :
329+ n_failed += 1
330+ continue
331+
332+ with open (file_path , 'wb' ) as log_file :
333+ for chunk in request .iter_content (chunk_size = 1024 ):
334+ if chunk : # filter out keep-alive new chunks
335+ log_file .write (chunk )
336+ n_downloaded += 1
337+
338+ # Rate limit delay between downloads (skip on last file)
339+ if i < n_en - 1 :
340+ time .sleep (args .delay )
341+
342+ print (f'\n Download complete:' )
343+ print (f' { n_downloaded } logs downloaded to { args .download_folder } ' )
344+ print (f' { n_skipped } logs skipped (already downloaded)' )
345+ if n_failed > 0 :
346+ print (f' { n_failed } logs failed' )
244347
245348
246349if __name__ == '__main__' :
0 commit comments