Open
Description
Describe the bug
Loading data from feature group using as_dataframe()
doesn't clean temporary .csv files
To reproduce
After creating feature group and ingesting data, load athena query and then use as_dataframe()
method
Expected behavior
This methods loads .csv query file and returns the data inside, but it doesn't clean the .csv file itself.
So after running multiple queries it's really easy to just fill your local disk space up to maximum, because of multiple .csv files with query results
Screenshots or logs
`
def as_dataframe(self, **kwargs) -> DataFrame:
"""Download the result of the current query and load it into a DataFrame.
Args:
**kwargs (object): key arguments used for the method pandas.read_csv to be able to
have a better tuning on data. For more info read:
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html
Returns:
A pandas DataFrame contains the query result.
"""
query_state = self.get_query_execution().get("QueryExecution").get("Status").get("State")
if query_state != "SUCCEEDED":
if query_state in ("QUEUED", "RUNNING"):
raise RuntimeError(
f"Current query {self._current_query_execution_id} is still being executed."
)
raise RuntimeError(f"Failed to execute query {self._current_query_execution_id}")
output_filename = os.path.join(
tempfile.gettempdir(), f"{self._current_query_execution_id}.csv"
)
self.sagemaker_session.download_athena_query_result(
bucket=self._result_bucket,
prefix=self._result_file_prefix,
query_execution_id=self._current_query_execution_id,
filename=output_filename,
)
kwargs.pop("delimiter", None)
return pd.read_csv(filepath_or_buffer=output_filename, delimiter=",", **kwargs)
`