-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathLangFlow-Flask-Wrapper
More file actions
93 lines (81 loc) · 3.01 KB
/
LangFlow-Flask-Wrapper
File metadata and controls
93 lines (81 loc) · 3.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from flask import Flask, request, jsonify
import subprocess
import validators
import logging
import os
import datetime
from urllib.parse import urlparse
from pathlib import Path
# Configure logging
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
app = Flask(__name__)
app.debug = True
@app.before_request
def log_request():
app.logger.debug(f"Request Headers: {dict(request.headers)}")
app.logger.debug(f"Request Body: {request.get_data(as_text=True)}")
@app.after_request
def log_response(response):
app.logger.debug(f"Response Status: {response.status}")
app.logger.debug(f"Response Data: {response.get_data(as_text=True)}")
return response
def validate_url(url: str):
"""Validate the provided URL."""
if not url:
return False, "URL is missing."
if not validators.url(url):
return False, "Invalid URL format."
return True, ""
def create_output_folder(url: str) -> str:
"""Generate a unique folder name based on the URL and timestamp."""
parsed_url = urlparse(url)
domain = parsed_url.netloc.replace('.', '')
timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
folder_path = Path.home() / f"SF_Local/{domain}_{timestamp}"
folder_path.mkdir(parents=True, exist_ok=True)
return str(folder_path)
def run_screaming_frog(url: str, output_folder: str):
"""Execute the Screaming Frog crawl."""
command = [
'screamingfrogseospider',
'--crawl', url,
'--headless',
'--export-tabs', 'images:all',
'--output-folder', output_folder,
'--export-format', 'csv',
]
try:
app.logger.info(f"Running command: {' '.join(command)}")
subprocess.run(command, check=True, capture_output=True, text=True)
return True, "Crawl completed successfully."
except subprocess.CalledProcessError as e:
return False, f"Error: {e.stderr}"
except Exception as e:
return False, f"Unexpected error: {str(e)}"
@app.route('/crawl', methods=['POST'])
def crawl():
"""Handle crawling requests."""
try:
data = request.get_json()
if not data or 'url' not in data:
return jsonify(success=False, error="URL not provided."), 400
url = data['url']
is_valid, error_message = validate_url(url)
if not is_valid:
return jsonify(success=False, error=error_message), 400
# Generate output folder and run the Screaming Frog crawl
output_folder = create_output_folder(url)
success, message = run_screaming_frog(url, output_folder)
return jsonify({
'success': success,
'message': message,
'file_location': output_folder if success else None
}), 200 if success else 500
except Exception as e:
app.logger.error(f"Internal server error: {str(e)}")
return jsonify(success=False, error="Internal server error."), 500
if __name__ == '__main__':
app.run(host='127.0.0.1', port=8090, debug=True)