Skip to content

Commit 942a5b1

Browse files
authored
Handle decode errors on github files (#313)
1 parent 0ce59bd commit 942a5b1

File tree

1 file changed

+6
-5
lines changed

1 file changed

+6
-5
lines changed

src/gurubase-backend/backend/core/github/data_source_handler.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -168,14 +168,15 @@ def clone_repository(repo_url):
168168
def get_file_content(file_path):
169169
"""Read and return the content of a file."""
170170
try:
171-
with open(file_path, 'r', encoding='utf-8') as f:
171+
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
172172
return f.read()
173-
except UnicodeDecodeError:
173+
except UnicodeDecodeError as e:
174+
logger.error(f"UnicodeDecodeError reading file {file_path}: {str(e)}")
174175
# Skip binary files
175-
return None
176+
return ''
176177
except Exception as e:
177-
logger.warning(f"Error reading file {file_path}: {str(e)}")
178-
return None
178+
logger.error(f"Error reading file {file_path}: {str(e)}")
179+
return ''
179180

180181
def read_repository(repo_path, include=True, glob_pattern=None):
181182
"""Get the directory structure and file contents of the repository.

0 commit comments

Comments
 (0)