Skip to content

Commit 165c4fe

Browse files
committed
script: adjust download script defaults
* warn users trying to download the whole database * max number of logs by default is 10 * increase delay between downloads considering rate limiting server side if you are using this script to download tons of logs please consider supporting Dronecode with a membership to offset the costs of the network bandwidth Signed-off-by: Ramon Roche <mrpollo@gmail.com>
1 parent 7b9d819 commit 165c4fe

1 file changed

Lines changed: 137 additions & 34 deletions

File tree

app/download_logs.py

Lines changed: 137 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,20 @@
1313

1414
from plot_app.config_tables import *
1515

16+
# Rate limiting settings
17+
DEFAULT_DELAY_SECONDS = 6 # 10 requests/minute = 6 seconds between requests
18+
DEFAULT_MAX_NUM = 10 # Safe default to prevent accidental bulk downloads
19+
WARN_THRESHOLD = 100 # Warn user if downloading more than this many files
20+
1621

1722
def get_arguments():
1823
""" Get parsed CLI arguments """
1924
parser = argparse.ArgumentParser(description='Python script for downloading public logs '
2025
'from the PX4/flight_review database.',
2126
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
22-
parser.add_argument('--max-num', '-n', type=int, default=-1,
27+
parser.add_argument('--max-num', '-n', type=int, default=DEFAULT_MAX_NUM,
2328
help='Maximum number of files to download that match the search criteria. '
24-
'Default: download all files.')
29+
'Set to -1 to download all files (requires confirmation for >100 files).')
2530
parser.add_argument('-d', '--download-folder', type=str, default="data/downloaded/",
2631
help='The folder to store the downloaded logfiles.')
2732
parser.add_argument('--print', action='store_true', dest="print_entries",
@@ -60,6 +65,10 @@ def get_arguments():
6065
help='The source of the log upload. e.g. ["webui", "CI"]')
6166
parser.add_argument('--git-hash', default=None, type=str,
6267
help='The git hash of the PX4 Firmware version.')
68+
parser.add_argument('--delay', type=float, default=DEFAULT_DELAY_SECONDS,
69+
help='Delay in seconds between downloads to respect server rate limits.')
70+
parser.add_argument('--yes', '-y', action='store_true', default=False,
71+
help='Skip confirmation prompt for large downloads.')
6372
return parser.parse_args()
6473

6574

@@ -83,13 +92,95 @@ def error_labels_to_ids(error_labels):
8392
return error_ids
8493

8594

95+
def confirm_large_download(n_files, delay):
96+
"""
97+
Ask user to confirm large downloads
98+
"""
99+
estimated_time = n_files * delay
100+
time_str = str(datetime.timedelta(seconds=int(estimated_time)))
101+
102+
print(f"\n{'='*60}")
103+
print(f"WARNING: You are about to download {n_files} files.")
104+
print(f"Estimated time: {time_str} (at {delay}s between downloads)")
105+
print(f"{'='*60}")
106+
print(f"\nThe server has rate limits in place. Bulk downloading without")
107+
print(f"appropriate delays may result in your IP being blocked.")
108+
print(f"\nNetwork and storage costs for Flight Review are funded by the")
109+
print(f"Dronecode Foundation. If you find this service useful, please")
110+
print(f"consider supporting the project: https://www.dronecode.org/membership/")
111+
print(f"\nTo download more files, use: --max-num {n_files} --yes")
112+
113+
response = input(f"\nContinue with download? [y/N]: ")
114+
return response.lower() in ['y', 'yes']
115+
116+
117+
def download_with_retry(url, entry_id, max_retries=5):
118+
"""
119+
Download a file with rate-limit-aware retry logic
120+
"""
121+
for attempt in range(max_retries):
122+
try:
123+
request = requests.get(url=url + "?log=" + entry_id, stream=True, timeout=10*60)
124+
125+
if request.status_code == 503:
126+
# Rate limited - back off exponentially
127+
wait_time = min(30 * (2 ** attempt), 300) # Max 5 minutes
128+
retry_after = request.headers.get('Retry-After')
129+
if retry_after:
130+
wait_time = int(retry_after)
131+
print(f' Rate limited (503). Waiting {wait_time}s before retry...')
132+
time.sleep(wait_time)
133+
continue
134+
135+
if request.status_code in [403, 444]:
136+
# IP has been blocked
137+
print(f'\n{"="*60}')
138+
print(f'ERROR: Your IP address has been blocked (HTTP {request.status_code}).')
139+
print(f'This may be due to excessive download requests.')
140+
print(f'\nIf you believe this is an error, please contact:')
141+
print(f'https://github.com/PX4/flight_review/issues')
142+
print(f'{"="*60}\n')
143+
sys.exit(1)
144+
145+
if request.status_code == 404:
146+
print(f' Log not found (404). Skipping.')
147+
return None
148+
149+
if request.status_code != 200:
150+
print(f' Unexpected status {request.status_code}. Retrying...')
151+
time.sleep(10)
152+
continue
153+
154+
return request
155+
156+
except requests.exceptions.ConnectionError:
157+
# Connection refused or reset - could be IP block (444 closes connection)
158+
if attempt == 0:
159+
print(f' Connection failed. This may indicate your IP has been blocked.')
160+
print(f' Retrying ({attempt + 1}/{max_retries})...')
161+
else:
162+
print(f' Connection failed. Retrying ({attempt + 1}/{max_retries})...')
163+
time.sleep(10 * (attempt + 1))
164+
except requests.exceptions.Timeout:
165+
print(f' Request timed out. Retrying ({attempt + 1}/{max_retries})...')
166+
time.sleep(10)
167+
except requests.exceptions.RequestException as ex:
168+
print(f' Request failed: {ex}')
169+
time.sleep(10)
170+
171+
print(f' Failed after {max_retries} attempts. Skipping.')
172+
return None
173+
174+
86175
def main():
87176
""" main script entry point """
88177
args = get_arguments()
89178

90179
try:
91180
# the db_info_api sends a json file with a list of all public database entries
181+
print("Fetching database info...")
92182
db_entries_list = requests.get(url=args.db_info_api, timeout=5*60).json()
183+
print(f"Found {len(db_entries_list)} total public logs in database.")
93184
except:
94185
print("Server request failed.")
95186
raise
@@ -202,45 +293,57 @@ def main():
202293
reverse=True)
203294

204295
# set number of files to download
205-
n_en = len(db_entries_list)
296+
n_matched = len(db_entries_list)
297+
print(f"{n_matched} logs match your filter criteria.")
298+
206299
if args.max_num > 0:
207-
n_en = min(n_en, args.max_num)
300+
n_en = min(n_matched, args.max_num)
301+
if n_matched > args.max_num:
302+
print(f"Limiting to {args.max_num} files (use --max-num to change).")
303+
else:
304+
n_en = n_matched
305+
306+
# Warn for large downloads
307+
if n_en > WARN_THRESHOLD and not args.yes:
308+
if not confirm_large_download(n_en, args.delay):
309+
print("Download cancelled.")
310+
sys.exit(0)
311+
208312
n_downloaded = 0
209313
n_skipped = 0
314+
n_failed = 0
210315

211316
for i in range(n_en):
212317
entry_id = db_entries_list[i]['log_id']
213318

214-
num_tries = 0
215-
for num_tries in range(100):
216-
try:
217-
if args.overwrite or entry_id not in logids:
218-
219-
file_path = os.path.join(args.download_folder, entry_id + ".ulg")
220-
221-
print('downloading {:}/{:} ({:})'.format(i + 1, n_en, entry_id))
222-
request = requests.get(url=args.download_api +
223-
"?log=" + entry_id, stream=True,
224-
timeout=10*60)
225-
with open(file_path, 'wb') as log_file:
226-
for chunk in request.iter_content(chunk_size=1024):
227-
if chunk: # filter out keep-alive new chunks
228-
log_file.write(chunk)
229-
n_downloaded += 1
230-
else:
231-
n_skipped += 1
232-
break
233-
except Exception as ex:
234-
print(ex)
235-
print('Waiting for 30 seconds to retry')
236-
time.sleep(30)
237-
if num_tries == 99:
238-
print('Retried', str(num_tries + 1), 'times without success, exiting.')
239-
sys.exit(1)
240-
241-
242-
print('{:} logs downloaded to {:}, {:} logs skipped (already downloaded)'.format(
243-
n_downloaded, args.download_folder, n_skipped))
319+
if not args.overwrite and entry_id in logids:
320+
n_skipped += 1
321+
continue
322+
323+
file_path = os.path.join(args.download_folder, entry_id + ".ulg")
324+
print('Downloading {}/{} ({})'.format(i + 1, n_en, entry_id))
325+
326+
request = download_with_retry(args.download_api, entry_id)
327+
328+
if request is None:
329+
n_failed += 1
330+
continue
331+
332+
with open(file_path, 'wb') as log_file:
333+
for chunk in request.iter_content(chunk_size=1024):
334+
if chunk: # filter out keep-alive new chunks
335+
log_file.write(chunk)
336+
n_downloaded += 1
337+
338+
# Rate limit delay between downloads (skip on last file)
339+
if i < n_en - 1:
340+
time.sleep(args.delay)
341+
342+
print(f'\nDownload complete:')
343+
print(f' {n_downloaded} logs downloaded to {args.download_folder}')
344+
print(f' {n_skipped} logs skipped (already downloaded)')
345+
if n_failed > 0:
346+
print(f' {n_failed} logs failed')
244347

245348

246349
if __name__ == '__main__':

0 commit comments

Comments
 (0)