1515// specific language governing permissions and limitations
1616// under the License.
1717
18- //! [`FileScanConfig`] to configure scanning of possibly partitioned
19- //! file sources.
18+ //! [`FileScanConfig`] for configuring file-based data sources that implement [`DataSource`].
19+ //!
20+ //! [`DataSource`]: crate::source::DataSource
2021
2122use std:: {
2223 borrow:: Cow , collections:: HashMap , fmt:: Debug , fmt:: Formatter ,
@@ -54,10 +55,12 @@ use datafusion_physical_plan::{
5455
5556use log:: { debug, warn} ;
5657
57- /// The base configurations for a [`DataSourceExec`], the a physical plan for
58- /// any given file format.
58+ /// Configuration for file-based data sources.
5959///
60- /// Use [`DataSourceExec::from_data_source`] to create a [`DataSourceExec`] from a ``FileScanConfig`.
60+ /// Owned by file sources that implement [`DataSource`]. Use [`DataSourceExec::from_data_source`]
61+ /// to create a [`DataSourceExec`] from a data source that owns this config.
62+ ///
63+ /// [`DataSource`]: crate::source::DataSource
6164///
6265/// # Example
6366/// ```
@@ -88,7 +91,7 @@ use log::{debug, warn};
8891/// #[derive(Clone)]
8992/// # struct ParquetSource {
9093/// # projected_statistics: Option<Statistics>,
91- /// # schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>
94+ /// # schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
9295/// # config: FileScanConfig,
9396/// # };
9497/// # impl Debug for ParquetSource {fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { unimplemented!() }}
@@ -120,8 +123,8 @@ use log::{debug, warn};
120123/// PartitionedFile::new("file2.parquet", 56),
121124/// PartitionedFile::new("file3.parquet", 78),
122125/// ])).build();
123- /// // create an execution plan from the config
124- /// let plan: Arc<dyn ExecutionPlan> =DataSourceExec::from_data_source(ParquetSource::new(config.clone() ));
126+ /// // create an execution plan from the data source that owns the config
127+ /// let plan: Arc<dyn ExecutionPlan> = DataSourceExec::from_data_source(ParquetSource::new(config));
125128/// ```
126129#[ derive( Clone ) ]
127130pub struct FileScanConfig {
@@ -137,12 +140,10 @@ pub struct FileScanConfig {
137140 /// [`RuntimeEnv::register_object_store`]: datafusion_execution::runtime_env::RuntimeEnv::register_object_store
138141 /// [`RuntimeEnv::object_store`]: datafusion_execution::runtime_env::RuntimeEnv::object_store
139142 pub object_store_url : ObjectStoreUrl ,
140- /// Schema before `projection` is applied. It contains the all columns that may
141- /// appear in the files. It does not include table partition columns
142- /// that may be added.
143- /// Note that this is **not** the schema of the physical files.
144- /// This is the schema that the physical file schema will be
145- /// mapped onto, and the schema that the [`DataSourceExec`] will return.
143+ /// Schema before `projection` is applied. Contains all columns that may
144+ /// appear in the files, excluding table partition columns that may be added.
145+ /// This is the logical schema that the physical file schema will be
146+ /// mapped onto, and the schema that data sources will expose.
146147 pub file_schema : SchemaRef ,
147148 /// List of files to be processed, grouped into partitions
148149 ///
@@ -180,7 +181,7 @@ pub struct FileScanConfig {
180181 pub file_source_projected_statistics : Statistics ,
181182}
182183
183- /// A builder for [`FileScanConfig`]'s .
184+ /// A builder for [`FileScanConfig`].
184185///
185186/// Example:
186187///
0 commit comments