-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathserver.py
More file actions
85 lines (66 loc) · 2.52 KB
/
Copy pathserver.py
File metadata and controls
85 lines (66 loc) · 2.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from flask import Flask, request, jsonify
from flask_cors import CORS
import g4f
# from g4f.api import run_api
import PyPDF2
import re
app = Flask(__name__)
CORS(app)
g4f.logging = True # enable logging
g4f.check_version = False # Disable automatic version checking
print(g4f.version) # check version
print(g4f.Provider.Ails.params) # supported args
def extract_text_from_pdf(file_stream):
reader = PyPDF2.PdfReader(file_stream)
text = ""
for page in reader.pages:
text += page.extract_text()
return text
def parse_text(response): # IF THIS RETURNS AN EMPTY ARRAY, REGENERATE THE RESPONSE AND RUN IT AGAIN
# Split the input string into entries based on "Front:" and "Back:"
front_entries = re.split(r'Front: ', response.strip())
result = []
for entry in front_entries[1:]: # Skip the first split as it will be empty
# Further split each entry into front and back using "Back:"
parts = re.split(r'Back: ', entry)
if len(parts) == 2:
front = parts[0].strip()
back = parts[1].strip()
# Ensure we do not include the number in the front text
front = re.sub(r'^\d+\.\s*', '', front)
result.append([front, back])
else:
print("Error in parsing: ", entry)
return result
def generate_response(query):
response = g4f.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": query}],
stream=True,
)
complete_message = ''
for char in response:
if isinstance(char, str):
complete_message += char
return complete_message
@app.route('/upload', methods=['POST'])
def upload_file():
uploaded_file = request.files['file']
if uploaded_file.filename != '':
text = extract_text_from_pdf(uploaded_file.stream) # Use the stream directly
query = "Parse the following text and output relevant information in flashcard format: 1: [front], 2: [back] " + text
max_attempts = 5
attempt = 0
parsed_data = []
while len(parsed_data) == 0 and attempt < max_attempts:
complete_message = generate_response(query)
parsed_data = parse_text(complete_message)
attempt += 1
if len(parsed_data) == 0:
print("Failed to generate valid data after multiple attempts.")
else:
print(parsed_data)
return jsonify(parsed_data)
return jsonify({"error": "No file provided"})
if __name__ == '__main__':
app.run()