77import pandas as pd
88
99from openeo .extra .job_management ._interface import JobDatabaseInterface
10- from openeo .extra .job_management ._manager import MultiBackendJobManager
10+ from openeo .extra .job_management ._df_schema import _normalize , _COLUMN_REQUIREMENTS
1111
1212_log = logging .getLogger (__name__ )
1313
@@ -40,7 +40,7 @@ def initialize_from_df(self, df: pd.DataFrame, *, on_exists: str = "error"):
4040 else :
4141 # TODO handle other on_exists modes: e.g. overwrite, merge, ...
4242 raise ValueError (f"Invalid on_exists={ on_exists !r} " )
43- df = MultiBackendJobManager . _normalize_df (df )
43+ df = _normalize (df )
4444 self .persist (df )
4545 # Return self to allow chaining with constructor.
4646 return self
@@ -133,7 +133,7 @@ def read(self) -> pd.DataFrame:
133133 df = pd .read_csv (
134134 self .path ,
135135 # TODO: possible to avoid hidden coupling with MultiBackendJobManager here?
136- dtype = {c : r .dtype for (c , r ) in MultiBackendJobManager . _COLUMN_REQUIREMENTS .items ()},
136+ dtype = {c : r .dtype for (c , r ) in _COLUMN_REQUIREMENTS .items ()},
137137 )
138138 if (
139139 "geometry" in df .columns
@@ -203,3 +203,42 @@ def persist(self, df: pd.DataFrame):
203203 self .df .to_parquet (self .path , index = False )
204204
205205
206+ def get_job_db (path : Union [str , Path ]) -> JobDatabaseInterface :
207+ """
208+ Factory to get a job database at a given path,
209+ guessing the database type from filename extension.
210+
211+ :param path: path to job database file.
212+
213+ .. versionadded:: 0.33.0
214+ """
215+ path = Path (path )
216+ if path .suffix .lower () in {".csv" }:
217+ job_db = CsvJobDatabase (path = path )
218+ elif path .suffix .lower () in {".parquet" , ".geoparquet" }:
219+ job_db = ParquetJobDatabase (path = path )
220+ else :
221+ raise ValueError (f"Could not guess job database type from { path !r} " )
222+ return job_db
223+
224+
225+ def create_job_db (path : Union [str , Path ], df : pd .DataFrame , * , on_exists : str = "error" ):
226+ """
227+ Factory to create a job database at given path,
228+ initialized from a given dataframe,
229+ and its database type guessed from filename extension.
230+
231+ :param path: Path to the job database file.
232+ :param df: DataFrame to store in the job database.
233+ :param on_exists: What to do when the job database already exists:
234+ - "error": (default) raise an exception
235+ - "skip": work with existing database, ignore given dataframe and skip any initialization
236+
237+ .. versionadded:: 0.33.0
238+ """
239+ job_db = get_job_db (path )
240+ if isinstance (job_db , FullDataFrameJobDatabase ):
241+ job_db .initialize_from_df (df = df , on_exists = on_exists )
242+ else :
243+ raise NotImplementedError (f"Initialization of { type (job_db )} is not supported." )
244+ return job_db
0 commit comments