diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index c5ce3540fce5..d71f46a1cef9 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -//! DataFusion Configuration Options +//! Runtime configuration, via [`ConfigOptions`] use crate::{DataFusionError, Result}; use std::any::Any; diff --git a/datafusion/core/src/catalog/mod.rs b/datafusion/core/src/catalog/mod.rs index b7843ed66b83..7696856b3276 100644 --- a/datafusion/core/src/catalog/mod.rs +++ b/datafusion/core/src/catalog/mod.rs @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -//! This module contains interfaces and default implementations -//! of table namespacing concepts, including catalogs and schemas. +//! Interfaces and default implementations of catalogs and schemas. // TODO(clippy): Having a `catalog::catalog` module path is unclear and ambiguous. // The parent module should probably be renamed to something that more accurately diff --git a/datafusion/core/src/dataframe.rs b/datafusion/core/src/dataframe.rs index 7d0fddcf8226..2834fb571fd7 100644 --- a/datafusion/core/src/dataframe.rs +++ b/datafusion/core/src/dataframe.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -//! DataFrame API for building and executing query plans. +//! [`DataFrame`] API for building and executing query plans. use std::any::Any; use std::sync::Arc; diff --git a/datafusion/core/src/avro_to_arrow/arrow_array_reader.rs b/datafusion/core/src/datasource/avro_to_arrow/arrow_array_reader.rs similarity index 99% rename from datafusion/core/src/avro_to_arrow/arrow_array_reader.rs rename to datafusion/core/src/datasource/avro_to_arrow/arrow_array_reader.rs index 311e199f28c4..0d950656975f 100644 --- a/datafusion/core/src/avro_to_arrow/arrow_array_reader.rs +++ b/datafusion/core/src/datasource/avro_to_arrow/arrow_array_reader.rs @@ -957,7 +957,7 @@ where mod test { use crate::arrow::array::Array; use crate::arrow::datatypes::{Field, TimeUnit}; - use crate::avro_to_arrow::{Reader, ReaderBuilder}; + use crate::datasource::avro_to_arrow::{Reader, ReaderBuilder}; use arrow::datatypes::DataType; use datafusion_common::cast::{ as_int32_array, as_int64_array, as_list_array, as_timestamp_microsecond_array, diff --git a/datafusion/core/src/avro_to_arrow/mod.rs b/datafusion/core/src/datasource/avro_to_arrow/mod.rs similarity index 92% rename from datafusion/core/src/avro_to_arrow/mod.rs rename to datafusion/core/src/datasource/avro_to_arrow/mod.rs index 8ca7f22ef3b1..af0bb86a3e27 100644 --- a/datafusion/core/src/avro_to_arrow/mod.rs +++ b/datafusion/core/src/datasource/avro_to_arrow/mod.rs @@ -15,7 +15,9 @@ // specific language governing permissions and limitations // under the License. -//! This module contains utilities to manipulate avro metadata. +//! This module contains code for reading [Avro] data into `RecordBatch`es +//! +//! [Avro]: https://avro.apache.org/docs/1.2.0/ #[cfg(feature = "avro")] mod arrow_array_reader; diff --git a/datafusion/core/src/avro_to_arrow/reader.rs b/datafusion/core/src/datasource/avro_to_arrow/reader.rs similarity index 96% rename from datafusion/core/src/avro_to_arrow/reader.rs rename to datafusion/core/src/datasource/avro_to_arrow/reader.rs index c5dab22a2d00..5dc53c5c86c8 100644 --- a/datafusion/core/src/avro_to_arrow/reader.rs +++ b/datafusion/core/src/datasource/avro_to_arrow/reader.rs @@ -56,17 +56,21 @@ impl ReaderBuilder { /// # Example /// /// ``` - /// extern crate apache_avro; - /// /// use std::fs::File; /// - /// fn example() -> crate::datafusion::avro_to_arrow::Reader<'static, File> { + /// use datafusion::datasource::avro_to_arrow::{Reader, ReaderBuilder}; + /// + /// fn example() -> Reader<'static, File> { /// let file = File::open("test/data/basic.avro").unwrap(); /// /// // create a builder, inferring the schema with the first 100 records - /// let builder = crate::datafusion::avro_to_arrow::ReaderBuilder::new().read_schema().with_batch_size(100); + /// let builder = ReaderBuilder::new() + /// .read_schema() + /// .with_batch_size(100); /// - /// let reader = builder.build::(file).unwrap(); + /// let reader = builder + /// .build::(file) + /// .unwrap(); /// /// reader /// } diff --git a/datafusion/core/src/avro_to_arrow/schema.rs b/datafusion/core/src/datasource/avro_to_arrow/schema.rs similarity index 100% rename from datafusion/core/src/avro_to_arrow/schema.rs rename to datafusion/core/src/datasource/avro_to_arrow/schema.rs diff --git a/datafusion/core/src/datasource/file_format/avro.rs b/datafusion/core/src/datasource/file_format/avro.rs index ab9f1f5dd000..cfb146e4ff04 100644 --- a/datafusion/core/src/datasource/file_format/avro.rs +++ b/datafusion/core/src/datasource/file_format/avro.rs @@ -27,7 +27,7 @@ use datafusion_physical_expr::PhysicalExpr; use object_store::{GetResult, ObjectMeta, ObjectStore}; use super::FileFormat; -use crate::avro_to_arrow::read_avro_schema_from_reader; +use crate::datasource::avro_to_arrow::read_avro_schema_from_reader; use crate::datasource::physical_plan::{AvroExec, FileScanConfig}; use crate::error::Result; use crate::execution::context::SessionState; diff --git a/datafusion/core/src/datasource/mod.rs b/datafusion/core/src/datasource/mod.rs index 683afb7902e5..0f9a8c3d73b9 100644 --- a/datafusion/core/src/datasource/mod.rs +++ b/datafusion/core/src/datasource/mod.rs @@ -15,12 +15,15 @@ // specific language governing permissions and limitations // under the License. -//! DataFusion data sources +//! DataFusion data sources: [`TableProvider`] and [`ListingTable`] +//! +//! [`ListingTable`]: crate::datasource::listing::ListingTable // TODO(clippy): Having a `datasource::datasource` module path is unclear and ambiguous. // The child module should probably be renamed to something that more accurately // describes its content. Something along the lines of `provider`, or `providers`. #![allow(clippy::module_inception)] +pub mod avro_to_arrow; pub mod datasource; pub mod default_table_source; pub mod empty; diff --git a/datafusion/core/src/datasource/physical_plan/avro.rs b/datafusion/core/src/datasource/physical_plan/avro.rs index 704a97ba7e88..0c286ba19c61 100644 --- a/datafusion/core/src/datasource/physical_plan/avro.rs +++ b/datafusion/core/src/datasource/physical_plan/avro.rs @@ -165,6 +165,7 @@ impl ExecutionPlan for AvroExec { #[cfg(feature = "avro")] mod private { use super::*; + use crate::datasource::avro_to_arrow::Reader as AvroReader; use crate::datasource::physical_plan::file_stream::{FileOpenFuture, FileOpener}; use crate::datasource::physical_plan::FileMeta; use bytes::Buf; @@ -179,11 +180,8 @@ mod private { } impl AvroConfig { - fn open( - &self, - reader: R, - ) -> Result> { - crate::avro_to_arrow::Reader::try_new( + fn open(&self, reader: R) -> Result> { + AvroReader::try_new( reader, self.schema.clone(), self.batch_size, diff --git a/datafusion/core/src/error.rs b/datafusion/core/src/error.rs index 0a138c80df2a..5a5faa7896e3 100644 --- a/datafusion/core/src/error.rs +++ b/datafusion/core/src/error.rs @@ -15,5 +15,5 @@ // specific language governing permissions and limitations // under the License. -//! DataFusion error types +//! DataFusion error type [`DataFusionError`] and [`Result`]. pub use datafusion_common::{DataFusionError, Result, SharedResult}; diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs index 3e58923c3aad..ca44af7339c5 100644 --- a/datafusion/core/src/lib.rs +++ b/datafusion/core/src/lib.rs @@ -384,11 +384,12 @@ //! and improve compilation times. The crates are: //! //! * [datafusion_common]: Common traits and types -//! * [datafusion_execution]: State needed for execution //! * [datafusion_expr]: [`LogicalPlan`], [`Expr`] and related logical planning structure +//! * [datafusion_execution]: State and structures needed for execution //! * [datafusion_optimizer]: [`OptimizerRule`]s and [`AnalyzerRule`]s //! * [datafusion_physical_expr]: [`PhysicalExpr`] and related expressions -//! * [datafusion_sql]: [`SqlToRel`] SQL planner +//! * [datafusion_row]: Row based representation +//! * [datafusion_sql]: SQL planner ([`SqlToRel`]) //! //! [sqlparser]: https://docs.rs/sqlparser/latest/sqlparser //! [`SqlToRel`]: sql::planner::SqlToRel @@ -412,7 +413,6 @@ pub const DATAFUSION_VERSION: &str = env!("CARGO_PKG_VERSION"); extern crate core; extern crate sqlparser; -pub mod avro_to_arrow; pub mod catalog; pub mod dataframe; pub mod datasource; @@ -431,6 +431,7 @@ pub use parquet; // re-export DataFusion crates pub use datafusion_common as common; pub use datafusion_common::config; +pub use datafusion_execution; pub use datafusion_expr as logical_expr; pub use datafusion_optimizer as optimizer; pub use datafusion_physical_expr as physical_expr; diff --git a/datafusion/core/src/physical_optimizer/mod.rs b/datafusion/core/src/physical_optimizer/mod.rs index b4c019d62ba9..48a3d6ade774 100644 --- a/datafusion/core/src/physical_optimizer/mod.rs +++ b/datafusion/core/src/physical_optimizer/mod.rs @@ -15,9 +15,12 @@ // specific language governing permissions and limitations // under the License. -//! This module contains a query optimizer that operates against a physical plan and applies -//! rules to a physical plan, such as "Repartition". - +//! Optimizer that rewrites [`ExecutionPlan`]s. +//! +//! These rules take advantage of physical plan properties , such as +//! "Repartition" or "Sortedness" +//! +//! [`ExecutionPlan`]: crate::physical_plan::ExecutionPlan pub mod aggregate_statistics; pub mod coalesce_batches; pub mod combine_partial_final_agg; diff --git a/datafusion/core/src/prelude.rs b/datafusion/core/src/prelude.rs index ed2c81a69ff1..d01d9c2390d4 100644 --- a/datafusion/core/src/prelude.rs +++ b/datafusion/core/src/prelude.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License.pub}, -//! A "prelude" for users of the datafusion crate. +//! DataFusion "prelude" to simplify importing common types. //! //! Like the standard library's prelude, this module simplifies importing of //! common items. Unlike the standard prelude, the contents of this module must diff --git a/datafusion/core/src/scalar.rs b/datafusion/core/src/scalar.rs index 29f75096aece..c4f0d80616ee 100644 --- a/datafusion/core/src/scalar.rs +++ b/datafusion/core/src/scalar.rs @@ -15,7 +15,8 @@ // specific language governing permissions and limitations // under the License. -//! ScalarValue reimported from datafusion-common to easy migration -//! when datafusion was split into several different crates - +//! [`ScalarValue`] single value representation. +//! +//! Note this is reimported from the datafusion-common crate for easy +//! migration when datafusion was split into several different crates pub use datafusion_common::{ScalarType, ScalarValue}; diff --git a/datafusion/core/src/variable/mod.rs b/datafusion/core/src/variable/mod.rs index 6efa8eb86211..5ef165313ccf 100644 --- a/datafusion/core/src/variable/mod.rs +++ b/datafusion/core/src/variable/mod.rs @@ -15,6 +15,6 @@ // specific language governing permissions and limitations // under the License. -//! Variable provider +//! Variable provider for `@name` and `@@name` style runtime values. pub use datafusion_physical_expr::var_provider::{VarProvider, VarType}; diff --git a/datafusion/physical-expr/src/var_provider.rs b/datafusion/physical-expr/src/var_provider.rs index faa07665e4f3..e00cf7407237 100644 --- a/datafusion/physical-expr/src/var_provider.rs +++ b/datafusion/physical-expr/src/var_provider.rs @@ -29,7 +29,7 @@ pub enum VarType { UserDefined, } -/// A var provider for @variable +/// A var provider for `@variable` and `@@variable` runtime values. pub trait VarProvider: std::fmt::Debug { /// Get variable value fn get_value(&self, var_names: Vec) -> Result;