diff --git a/pysteps/io/readers.py b/pysteps/io/readers.py
index 30c4d4fc0..e9f0d0804 100644
--- a/pysteps/io/readers.py
+++ b/pysteps/io/readers.py
@@ -11,6 +11,8 @@
     read_timeseries
 """
 
+import warnings
+
 import numpy as np
 import xarray as xr
 
@@ -62,12 +64,20 @@ def read_timeseries(inputfns, importer, timestep=None, **kwargs) -> xr.Dataset |
 
     startdate = min(inputfns[1])
     sorted_dates = sorted(inputfns[1])
-    timestep_dates = int((sorted_dates[1] - sorted_dates[0]).total_seconds())
+    timestep_dates = None
+    if len(sorted_dates) > 1:
+        timestep_dates = int((sorted_dates[1] - sorted_dates[0]).total_seconds())
 
+    if timestep is None and timestep_dates is None:
+        raise ValueError("either provide a timestep or provide more than one inputfn")
     if timestep is None:
         timestep = timestep_dates
-    if timestep != timestep_dates:
-        raise ValueError("given timestep does not match inputfns")
+    if timestep_dates is not None and timestep != timestep_dates:
+        warnings.warn(
+            "Supplied timestep does not match actual timestep spacing in input data, "
+            + "using actual spacing as timestep."
+        )
+        timestep = timestep_dates
     for i in range(len(sorted_dates) - 1):
         if int((sorted_dates[i + 1] - sorted_dates[i]).total_seconds()) != timestep:
             raise ValueError("supplied dates are not evenly spaced")
diff --git a/pysteps/nowcasts/steps.py b/pysteps/nowcasts/steps.py
index b4d050cc8..4fa951c80 100644
--- a/pysteps/nowcasts/steps.py
+++ b/pysteps/nowcasts/steps.py
@@ -255,8 +255,10 @@ class StepsNowcasterParams:
 
 @dataclass
 class StepsNowcasterState:
-    precip_forecast: list[Any] | None = field(default_factory=list)
-    precip_cascades: list[list[np.ndarray]] | None = field(default_factory=list)
+    precip: np.ndarray
+    velocity: np.ndarray
+    precip_forecast: np.ndarray | None = None
+    precip_cascades: np.ndarray | None = None
     precip_decomposed: list[dict[str, Any]] | None = field(default_factory=list)
     # The observation mask (where the radar can observe the precipitation)
     precip_mask: list[Any] | None = field(default_factory=list)
@@ -276,19 +278,22 @@ class StepsNowcasterState:
 
 
 class StepsNowcaster:
-    def __init__(
-        self, precip, velocity, time_steps, steps_config: StepsNowcasterConfig
-    ):
+    def __init__(self, dataset, time_steps, steps_config: StepsNowcasterConfig):
         # Store inputs and optional parameters
-        self.__precip = precip
-        self.__velocity = velocity
         self.__time_steps = time_steps
 
         # Store the config data:
         self.__config = steps_config
 
+        self.__dataset = dataset.copy(deep=True)
+        precip_var = self.__dataset.attrs["precip_var"]
+        precip = self.__dataset[precip_var].values
+        velocity = np.stack(
+            [self.__dataset["velocity_x"], self.__dataset["velocity_y"]]
+        )
+
         # Store the state and params data:
-        self.__state = StepsNowcasterState()
+        self.__state = StepsNowcasterState(precip, velocity)
         self.__params = StepsNowcasterParams()
 
         # Additional variables for time measurement
@@ -303,14 +308,13 @@ def compute_forecast(self):
 
         Parameters
         ----------
-        precip: array-like
-            Array of shape (ar_order+1,m,n) containing the input precipitation fields
-            ordered by timestamp from oldest to newest. The time steps between the
-            inputs are assumed to be regular.
-        velocity: array-like
-            Array of shape (2,m,n) containing the x- and y-components of the advection
-            field. The velocities are assumed to represent one time step between the
-            inputs. All values are required to be finite.
+        dataset: xarray.Dataset
+            Input dataset as described in the documentation of
+            :py:mod:`pysteps.io.importers`. It has to contain the ``velocity_x`` and
+            ``velocity_y`` data variables, as well as any precipitation data variable.
+            The time dimension of the dataset has to be size
+            ``ar_order + 1`` and the precipitation variable has to have this dimension. All
+            velocity values are required to be finite.
         timesteps: int or list of floats
             Number of time steps to forecast or a list of time steps for which the
             forecasts are computed (relative to the input time step). The elements
@@ -320,13 +324,13 @@ def compute_forecast(self):
 
         Returns
         -------
-        out: ndarray
-            If return_output is True, a four-dimensional array of shape
-            (n_ens_members,num_timesteps,m,n) containing a time series of forecast
+        out: xarray.Dataset
+            If return_output is True, a dataset as described in the documentation of
+            :py:mod:`pysteps.io.importers` is returned containing a time series of forecast
             precipitation fields for each ensemble member. Otherwise, a None value
             is returned. The time series starts from t0+timestep, where timestep is
-            taken from the input precipitation fields. If measure_time is True, the
-            return value is a three-element tuple containing the nowcast array, the
+            taken from the metadata of the time coordinate. If measure_time is True, the
+            return value is a three-element tuple containing the nowcast dataset, the
             initialization time of the nowcast generator and the time used in the
             main loop (seconds).
 
@@ -346,7 +350,9 @@ def compute_forecast(self):
             self.__start_time_init = time.time()
 
         # Slice the precipitation field to only use the last ar_order + 1 fields
-        self.__precip = self.__precip[-(self.__config.ar_order + 1) :, :, :].copy()
+        self.__state.precip = self.__state.precip[
+            -(self.__config.ar_order + 1) :, :, :
+        ].copy()
         self.__initialize_nowcast_components()
 
         self.__perform_extrapolation()
@@ -378,14 +384,13 @@ def compute_forecast(self):
                     for j in range(self.__config.n_ens_members)
                 ]
             )
+            output_dataset = convert_output_to_xarray_dataset(
+                self.__dataset, self.__time_steps, self.__state.precip_forecast
+            )
             if self.__config.measure_time:
-                return (
-                    self.__state.precip_forecast,
-                    self.__init_time,
-                    self.__mainloop_time,
-                )
+                return (output_dataset, self.__init_time, self.__mainloop_time)
             else:
-                return self.__state.precip_forecast
+                return output_dataset
         else:
             return None
 
@@ -395,7 +400,7 @@ def __nowcast_main(self):
         to generate forecasts.
         """
         # Isolate the last time slice of observed precipitation
-        precip = self.__precip[
+        precip = self.__state.precip[
             -1, :, :
         ]  # Extract the last available precipitation field
 
@@ -408,7 +413,7 @@ def __nowcast_main(self):
         # Run the nowcast main loop
         self.__state.precip_forecast = nowcast_main_loop(
             precip,
-            self.__velocity,
+            self.__state.velocity,
             state,
             self.__time_steps,
             self.__config.extrapolation_method,
@@ -428,26 +433,26 @@ def __check_inputs(self):
         """
         Validate the inputs to ensure consistency and correct shapes.
         """
-        if self.__precip.ndim != 3:
+        if self.__state.precip.ndim != 3:
             raise ValueError("precip must be a three-dimensional array")
-        if self.__precip.shape[0] < self.__config.ar_order + 1:
+        if self.__state.precip.shape[0] < self.__config.ar_order + 1:
             raise ValueError(
                 f"precip.shape[0] must be at least ar_order+1, "
-                f"but found {self.__precip.shape[0]}"
+                f"but found {self.__state.precip.shape[0]}"
             )
-        if self.__velocity.ndim != 3:
+        if self.__state.velocity.ndim != 3:
             raise ValueError("velocity must be a three-dimensional array")
-        if self.__precip.shape[1:3] != self.__velocity.shape[1:3]:
+        if self.__state.precip.shape[1:3] != self.__state.velocity.shape[1:3]:
             raise ValueError(
                 f"Dimension mismatch between precip and velocity: "
-                f"shape(precip)={self.__precip.shape}, shape(velocity)={self.__velocity.shape}"
+                f"shape(precip)={self.__state.precip.shape}, shape(velocity)={self.__state.velocity.shape}"
             )
         if (
             isinstance(self.__time_steps, list)
             and not sorted(self.__time_steps) == self.__time_steps
         ):
             raise ValueError("timesteps must be in ascending order")
-        if np.any(~np.isfinite(self.__velocity)):
+        if np.any(~np.isfinite(self.__state.velocity)):
             raise ValueError("velocity contains non-finite values")
         if self.__config.mask_method not in ["obs", "sprog", "incremental", None]:
             raise ValueError(
@@ -528,7 +533,9 @@ def __print_forecast_info(self):
 
         print("Inputs")
         print("------")
-        print(f"input dimensions: {self.__precip.shape[1]}x{self.__precip.shape[2]}")
+        print(
+            f"input dimensions: {self.__state.precip.shape[1]}x{self.__state.precip.shape[2]}"
+        )
         if self.__config.kmperpixel is not None:
             print(f"km/pixel:         {self.__config.kmperpixel}")
         if self.__config.timestep is not None:
@@ -599,7 +606,9 @@ def __initialize_nowcast_components(self):
             self.__config.n_ens_members, self.__config.num_workers
         )
 
-        M, N = self.__precip.shape[1:]  # Extract the spatial dimensions (height, width)
+        M, N = self.__state.precip.shape[
+            1:
+        ]  # Extract the spatial dimensions (height, width)
 
         # Initialize FFT method
         self.__params.fft = utils.get_method(
@@ -631,7 +640,10 @@ def __initialize_nowcast_components(self):
 
         # Determine the domain mask from non-finite values in the precipitation data
         self.__params.domain_mask = np.logical_or.reduce(
-            [~np.isfinite(self.__precip[i, :]) for i in range(self.__precip.shape[0])]
+            [
+                ~np.isfinite(self.__state.precip[i, :])
+                for i in range(self.__state.precip.shape[0])
+            ]
         )
 
         print("Nowcast components initialized successfully.")
@@ -645,8 +657,8 @@ def __perform_extrapolation(self):
         if self.__config.conditional:
             self.__state.mask_threshold = np.logical_and.reduce(
                 [
-                    self.__precip[i, :, :] >= self.__config.precip_threshold
-                    for i in range(self.__precip.shape[0])
+                    self.__state.precip[i, :, :] >= self.__config.precip_threshold
+                    for i in range(self.__state.precip.shape[0])
                 ]
             )
         else:
@@ -655,7 +667,7 @@ def __perform_extrapolation(self):
         extrap_kwargs = self.__state.extrapolation_kwargs.copy()
         extrap_kwargs["xy_coords"] = self.__params.xy_coordinates
         extrap_kwargs["allow_nonfinite_values"] = (
-            True if np.any(~np.isfinite(self.__precip)) else False
+            True if np.any(~np.isfinite(self.__state.precip)) else False
         )
 
         res = []
@@ -664,7 +676,7 @@ def __extrapolate_single_field(precip, i):
             # Extrapolate a single precipitation field using the velocity field
             return self.__params.extrapolation_method(
                 precip[i, :, :],
-                self.__velocity,
+                self.__state.velocity,
                 self.__config.ar_order - i,
                 "min",
                 **extrap_kwargs,
@@ -674,17 +686,21 @@ def __extrapolate_single_field(precip, i):
             if (
                 not DASK_IMPORTED
             ):  # If Dask is not available, perform sequential extrapolation
-                self.__precip[i, :, :] = __extrapolate_single_field(self.__precip, i)
+                self.__state.precip[i, :, :] = __extrapolate_single_field(
+                    self.__state.precip, i
+                )
             else:
                 # If Dask is available, accumulate delayed computations for parallel execution
-                res.append(dask.delayed(__extrapolate_single_field)(self.__precip, i))
+                res.append(
+                    dask.delayed(__extrapolate_single_field)(self.__state.precip, i)
+                )
 
         # If Dask is available, perform the parallel computation
         if DASK_IMPORTED and res:
             num_workers_ = min(self.__params.num_ensemble_workers, len(res))
-            self.__precip = np.stack(
+            self.__state.precip = np.stack(
                 list(dask.compute(*res, num_workers=num_workers_))
-                + [self.__precip[-1, :, :]]
+                + [self.__state.precip[-1, :, :]]
             )
 
         print("Extrapolation complete and precipitation fields aligned.")
@@ -696,12 +712,12 @@ def __apply_noise_and_ar_model(self):
         and adds noise perturbations if necessary.
         """
         # Make a copy of the precipitation data and replace non-finite values
-        precip = self.__precip.copy()
-        for i in range(self.__precip.shape[0]):
+        precip = self.__state.precip.copy()
+        for i in range(self.__state.precip.shape[0]):
             # Replace non-finite values with the minimum finite value of the precipitation field
             precip[i, ~np.isfinite(precip[i, :])] = np.nanmin(precip[i, :])
         # Store the precipitation data back in the object
-        self.__precip = precip
+        self.__state.precip = precip
 
         # Initialize the noise generator if the noise_method is provided
         if self.__config.noise_method is not None:
@@ -712,7 +728,7 @@ def __apply_noise_and_ar_model(self):
             self.__params.noise_generator = generate_noise
 
             self.__params.perturbation_generator = init_noise(
-                self.__precip,
+                self.__state.precip,
                 fft_method=self.__params.fft,
                 **self.__params.noise_kwargs,
             )
@@ -726,9 +742,9 @@ def __apply_noise_and_ar_model(self):
                 # Compute noise adjustment coefficients
                 self.__params.noise_std_coefficients = (
                     noise.utils.compute_noise_stddev_adjs(
-                        self.__precip[-1, :, :],
+                        self.__state.precip[-1, :, :],
                         self.__config.precip_threshold,
-                        np.min(self.__precip),
+                        np.min(self.__state.precip),
                         self.__params.bandpass_filter,
                         self.__params.decomposition_method,
                         self.__params.perturbation_generator,
@@ -778,7 +794,7 @@ def __apply_noise_and_ar_model(self):
         self.__state.precip_decomposed = []
         for i in range(self.__config.ar_order + 1):
             precip_ = self.__params.decomposition_method(
-                self.__precip[i, :, :],
+                self.__state.precip[i, :, :],
                 self.__params.bandpass_filter,
                 mask=self.__state.mask_threshold,
                 fft_method=self.__params.fft,
@@ -891,7 +907,7 @@ def __initialize_velocity_perturbations(self):
                     ),
                 }
                 vp = init_vel_noise(
-                    self.__velocity,
+                    self.__state.velocity,
                     1.0 / self.__config.kmperpixel,
                     self.__config.timestep,
                     **kwargs,
@@ -911,8 +927,8 @@ def __initialize_precipitation_mask(self):
 
         if self.__config.probmatching_method == "mean":
             self.__params.precipitation_mean = np.mean(
-                self.__precip[-1, :, :][
-                    self.__precip[-1, :, :] >= self.__config.precip_threshold
+                self.__state.precip[-1, :, :][
+                    self.__state.precip[-1, :, :] >= self.__config.precip_threshold
                 ]
             )
         else:
@@ -920,13 +936,13 @@ def __initialize_precipitation_mask(self):
 
         if self.__config.mask_method is not None:
             self.__state.mask_precip = (
-                self.__precip[-1, :, :] >= self.__config.precip_threshold
+                self.__state.precip[-1, :, :] >= self.__config.precip_threshold
             )
 
             if self.__config.mask_method == "sprog":
                 # Compute the wet area ratio and the precipitation mask
                 self.__params.wet_area_ratio = np.sum(self.__state.mask_precip) / (
-                    self.__precip.shape[1] * self.__precip.shape[2]
+                    self.__state.precip.shape[1] * self.__state.precip.shape[2]
                 )
                 self.__state.precip_mask = [
                     self.__state.precip_cascades[0][i].copy()
@@ -974,7 +990,7 @@ def __initialize_fft_objects(self):
         self.__state.fft_objs = []
         for _ in range(self.__config.n_ens_members):
             fft_obj = utils.get_method(
-                self.__config.fft_method, shape=self.__precip.shape[1:]
+                self.__config.fft_method, shape=self.__state.precip.shape[1:]
             )
             self.__state.fft_objs.append(fft_obj)
         print("FFT objects initialized successfully.")
@@ -1221,21 +1237,6 @@ def __measure_time(self, label, start_time):
             return elapsed_time
         return None
 
-    def reset_states_and_params(self):
-        """
-        Reset the internal state and parameters of the nowcaster to allow multiple forecasts.
-        This method resets the state and params to their initial conditions without reinitializing
-        the inputs like precip, velocity, time_steps, or config.
-        """
-        # Re-initialize the state and parameters
-        self.__state = StepsNowcasterState()
-        self.__params = StepsNowcasterParams()
-
-        # Reset time measurement variables
-        self.__start_time_init = None
-        self.__init_time = None
-        self.__mainloop_time = None
-
 
 # Wrapper function to preserve backward compatibility
 def forecast(
@@ -1494,10 +1495,7 @@ def forecast(
     )
 
     # Create an instance of the new class with all the provided arguments
-    nowcaster = StepsNowcaster(
-        precip, velocity, timesteps, steps_config=nowcaster_config
-    )
+    nowcaster = StepsNowcaster(dataset, timesteps, steps_config=nowcaster_config)
     forecast_steps_nowcast = nowcaster.compute_forecast()
-    nowcaster.reset_states_and_params()
     # Call the appropriate methods within the class
     return forecast_steps_nowcast
diff --git a/pysteps/tests/helpers.py b/pysteps/tests/helpers.py
index 24c58f8d1..12d22cfcf 100644
--- a/pysteps/tests/helpers.py
+++ b/pysteps/tests/helpers.py
@@ -183,7 +183,7 @@ def get_precipitation_fields(
     # Read the radar composites
     importer = io.get_method(importer_name, "importer")
 
-    dataset = io.read_timeseries(fns, importer, **_importer_kwargs)
+    dataset = io.read_timeseries(fns, importer, timestep=timestep, **_importer_kwargs)
 
     if not return_raw:
         precip_var = dataset.attrs["precip_var"]
diff --git a/pysteps/tests/test_motion.py b/pysteps/tests/test_motion.py
index 8d198960a..8b2384cb0 100644
--- a/pysteps/tests/test_motion.py
+++ b/pysteps/tests/test_motion.py
@@ -16,12 +16,12 @@
 Also, they will fail if any modification on the code decrease the quality of
 the retrieval.
 """
-
 from contextlib import contextmanager
+from functools import partial
 
 import numpy as np
 import pytest
-from functools import partial
+import xarray as xr
 from scipy.ndimage import uniform_filter
 
 import pysteps as stp
@@ -38,7 +38,10 @@ def not_raises(_exception):
         raise pytest.fail("DID RAISE {0}".format(_exception))
 
 
-reference_field = get_precipitation_fields(num_prev_files=0)
+reference_dataset = get_precipitation_fields(num_prev_files=0)
+precip_var = reference_dataset.attrs["precip_var"]
+reference_data = reference_dataset[precip_var].values[0][::-1]
+reference_field = np.ma.masked_array(reference_data, reference_data == -15.0)
 
 
 def _create_motion_field(input_precip, motion_type):
@@ -208,6 +211,18 @@ def test_optflow_method_convergence(
     ideal_motion, precip_obs = _create_observations(
         input_precip.copy(), motion_type, num_times=num_times
     )
+    precip_data = precip_obs.data
+    dataset = xr.Dataset(
+        data_vars={
+            precip_var: (
+                ["time", "y", "x"],
+                precip_data,
+                reference_dataset[precip_var].attrs,
+            )
+        },
+        coords={**dict(reference_dataset.coords), "time": np.arange(num_times)},
+        attrs={**reference_dataset.attrs},
+    )
 
     oflow_method = motion.get_method(optflow_method_name)
 
@@ -217,15 +232,18 @@ def test_optflow_method_convergence(
         # To increase the stability of the tests to we increase this value to
         # maxiter=150.
         retrieved_motion = oflow_method(
-            precip_obs, verbose=False, options=dict(maxiter=150)
+            dataset, verbose=False, options=dict(maxiter=150)
         )
     elif optflow_method_name == "proesmans":
-        retrieved_motion = oflow_method(precip_obs)
+        retrieved_motion = oflow_method(dataset)
     else:
-        retrieved_motion = oflow_method(precip_obs, verbose=False)
+        retrieved_motion = oflow_method(dataset, verbose=False)
 
-    precip_data, _ = stp.utils.dB_transform(precip_obs.max(axis=0), inverse=True)
-    precip_data.data[precip_data.mask] = 0
+    precip_dataset = stp.utils.dB_transform(
+        dataset.max(dim="time", keep_attrs=True), inverse=True
+    )
+    precip_data = precip_dataset[precip_var].values
+    precip_data[np.isnan(precip_dataset[precip_var].values)] = 0
 
     precip_mask = (uniform_filter(precip_data, size=20) > 0.1) & ~precip_obs.mask.any(
         axis=0
@@ -236,7 +254,8 @@ def test_optflow_method_convergence(
     # Relative MSE = < (expected_motion - computed_motion)^2 > / <expected_motion^2 >
     # Relative RMSE = sqrt(Relative MSE)
 
-    mse = ((ideal_motion - retrieved_motion)[:, precip_mask] ** 2).mean()
+    motion_array = np.stack([retrieved_motion.velocity_x, retrieved_motion.velocity_y])
+    mse = ((ideal_motion - motion_array)[:, precip_mask] ** 2).mean()
 
     rel_mse = mse / (ideal_motion[:, precip_mask] ** 2).mean()
     rel_rmse = np.sqrt(rel_mse) * 100
@@ -280,8 +299,13 @@ def test_no_precipitation(optflow_method_name, num_times):
     if optflow_method_name == "lk":
         pytest.importorskip("cv2")
     zero_precip = np.zeros((num_times,) + reference_field.shape)
+    dataset = xr.Dataset(
+        data_vars={precip_var: (["time", "y", "x"], zero_precip)},
+        coords={**dict(reference_dataset.coords), "time": np.arange(num_times)},
+        attrs=reference_dataset.attrs,
+    )
     motion_method = motion.get_method(optflow_method_name)
-    uv_motion = motion_method(zero_precip, verbose=False)
+    uv_motion = motion_method(dataset, verbose=False)
 
     assert np.abs(uv_motion).max() < 0.01
 
@@ -313,15 +337,70 @@ def test_input_shape_checks(
 
     with not_raises(Exception):
         for frames in range(minimum_input_frames, maximum_input_frames + 1):
-            motion_method(np.zeros((frames, image_size, image_size)), verbose=False)
+            dataset = xr.Dataset(
+                data_vars={
+                    precip_var: (
+                        ["time", "y", "x"],
+                        np.zeros((frames, image_size, image_size)),
+                    )
+                },
+                coords={
+                    "time": np.arange(frames),
+                    "y": np.arange(image_size),
+                    "x": np.arange(image_size),
+                },
+                attrs=reference_dataset.attrs,
+            )
+            motion_method(dataset, verbose=False)
 
     with pytest.raises(ValueError):
-        motion_method(np.zeros((2,)))
+        motion_method(
+            xr.Dataset(
+                data_vars={precip_var: (["time"], np.zeros((2,)))},
+                coords={"time": np.arange(2)},
+                attrs=reference_dataset.attrs,
+            )
+        )
+        motion_method(
+            xr.Dataset(
+                data_vars={precip_var: (["y", "x"], np.zeros((2, 2)))},
+                coords={"y": np.arange(2), "x": np.arange(2)},
+                attrs=reference_dataset.attrs,
+            )
+        )
         motion_method(np.zeros((2, 2)))
         for frames in range(minimum_input_frames):
-            motion_method(np.zeros((frames, image_size, image_size)), verbose=False)
+            dataset = xr.Dataset(
+                data_vars={
+                    precip_var: (
+                        ["time", "y", "x"],
+                        np.zeros((frames, image_size, image_size)),
+                    )
+                },
+                coords={
+                    "time": np.arange(frames),
+                    "y": np.arange(image_size),
+                    "x": np.arange(image_size),
+                },
+                attrs=reference_dataset.attrs,
+            )
+            motion_method(dataset, verbose=False)
         for frames in range(maximum_input_frames + 1, maximum_input_frames + 4):
-            motion_method(np.zeros((frames, image_size, image_size)), verbose=False)
+            dataset = xr.Dataset(
+                data_vars={
+                    precip_var: (
+                        ["time", "y", "x"],
+                        np.zeros((frames, image_size, image_size)),
+                    )
+                },
+                coords={
+                    "time": np.arange(frames),
+                    "y": np.arange(image_size),
+                    "x": np.arange(image_size),
+                },
+                attrs=reference_dataset.attrs,
+            )
+            motion_method(dataset, verbose=False)
 
 
 def test_vet_padding():
@@ -333,9 +412,16 @@ def test_vet_padding():
     _, precip_obs = _create_observations(
         reference_field.copy(), "linear_y", num_times=2
     )
+    precip_data = precip_obs.data
+    precip_data[precip_obs.mask] = np.nan
+    dataset = xr.Dataset(
+        data_vars={precip_var: (["time", "y", "x"], precip_data)},
+        coords={**dict(reference_dataset.coords), "time": [0, 1]},
+        attrs=reference_dataset.attrs,
+    )
 
     # Use a small region to speed up the test
-    precip_obs = precip_obs[:, 200:427, 250:456]
+    dataset = dataset.isel(y=slice(200, 427), x=slice(250, 456))
     # precip_obs.shape == (227 , 206)
     # 227 is a prime number ; 206 = 2*103
     # Using this shape will force vet to internally pad the input array for the sector's
@@ -354,8 +440,10 @@ def test_vet_padding():
             # we don't care about convergence in this test
         )
 
-        assert precip_obs.shape == vet_method(precip_obs).shape
-        assert precip_obs.shape == vet_method(np.ma.masked_invalid(precip_obs)).shape
+        assert (
+            dataset[precip_var].values.shape
+            == vet_method(dataset)[precip_var].values.shape
+        )
 
 
 def test_vet_cost_function():
@@ -383,7 +471,7 @@ def test_vet_cost_function():
             ideal_motion.shape[1:],  # blocks_shape (same as 2D grid)
             mask_2d,  # Mask
             1e6,  # smooth_gain
-            debug=False,
+            debug=True,
         )
 
     tolerance = 1e-12
@@ -408,11 +496,21 @@ def test_lk_masked_array():
     np.ma.set_fill_value(precip_obs, -15)
     ndarray = precip_obs.filled()
     ndarray[ndarray == -15] = np.nan
-    uv_ndarray = motion_method(ndarray, fd_kwargs={"buffer_mask": 20}, verbose=False)
+    dataset = xr.Dataset(
+        data_vars={precip_var: (["time", "y", "x"], ndarray)},
+        coords={**dict(reference_dataset.coords), "time": [0, 1]},
+        attrs=reference_dataset.attrs,
+    )
+    uv_ndarray = motion_method(dataset, fd_kwargs={"buffer_mask": 20}, verbose=False)
 
     # masked array
     mdarray = np.ma.masked_invalid(ndarray)
     mdarray.data[mdarray.mask] = -15
-    uv_mdarray = motion_method(mdarray, fd_kwargs={"buffer_mask": 20}, verbose=False)
+    dataset = xr.Dataset(
+        data_vars={precip_var: (["time", "y", "x"], ndarray)},
+        coords={**dict(reference_dataset.coords), "time": [0, 1]},
+        attrs=reference_dataset.attrs,
+    )
+    uv_mdarray = motion_method(dataset, fd_kwargs={"buffer_mask": 20}, verbose=False)
 
     assert np.abs(uv_mdarray - uv_ndarray).max() < 0.01
diff --git a/pysteps/tests/test_plt_cartopy.py b/pysteps/tests/test_plt_cartopy.py
index e873e40af..f4dc9f289 100644
--- a/pysteps/tests/test_plt_cartopy.py
+++ b/pysteps/tests/test_plt_cartopy.py
@@ -1,11 +1,11 @@
 # -*- coding: utf-8 -*-
 
+import matplotlib.pyplot as plt
 import pytest
 
-from pysteps.visualization import plot_precip_field
-from pysteps.utils import to_rainrate
 from pysteps.tests.helpers import get_precipitation_fields
-import matplotlib.pyplot as plt
+from pysteps.utils import to_rainrate
+from pysteps.visualization import plot_precip_field
 
 plt_arg_names = ("source", "map_kwargs", "pass_geodata")
 
@@ -25,14 +25,24 @@
 
 @pytest.mark.parametrize(plt_arg_names, plt_arg_values)
 def test_visualization_plot_precip_field(source, map_kwargs, pass_geodata):
-    field, metadata = get_precipitation_fields(0, 0, True, True, None, source)
-    field = field.squeeze()
-    field, __ = to_rainrate(field, metadata)
+    dataset = get_precipitation_fields(0, 0, True, None, source)
+    dataset = to_rainrate(dataset)
 
+    precip_var = dataset.attrs["precip_var"]
+    field = dataset[precip_var].values
+    field = field.squeeze()
+    geodata = {
+        "projection": dataset.attrs["projection"],
+        "x1": dataset.x.values[0],
+        "x2": dataset.x.values[-1],
+        "y1": dataset.y.values[0],
+        "y2": dataset.y.values[-1],
+        "yorigin": "lower",
+    }
     if not pass_geodata:
-        metadata = None
+        geodata = None
 
-    plot_precip_field(field, ptype="intensity", geodata=metadata, map_kwargs=map_kwargs)
+    plot_precip_field(field, ptype="intensity", geodata=geodata, map_kwargs=map_kwargs)
 
 
 if __name__ == "__main__":
diff --git a/pysteps/tests/test_plt_motionfields.py b/pysteps/tests/test_plt_motionfields.py
index d0c7e6414..7f642652b 100644
--- a/pysteps/tests/test_plt_motionfields.py
+++ b/pysteps/tests/test_plt_motionfields.py
@@ -5,9 +5,8 @@
 import pytest
 
 from pysteps import motion
-from pysteps.visualization import plot_precip_field, quiver, streamplot
 from pysteps.tests.helpers import get_precipitation_fields
-
+from pysteps.visualization import plot_precip_field, quiver, streamplot
 
 arg_names_quiver = (
     "source",
@@ -33,12 +32,23 @@ def test_visualization_motionfields_quiver(
 ):
     pytest.importorskip("cv2")
     if source is not None:
-        fields, geodata = get_precipitation_fields(0, 2, False, True, upscale, source)
+        dataset = get_precipitation_fields(0, 2, False, upscale, source)
+        oflow_method = motion.get_method("LK")
+        dataset = oflow_method(dataset)
+        precip_var = dataset.attrs["precip_var"]
+        fields = dataset[precip_var].values
+        geodata = {
+            "projection": dataset.attrs["projection"],
+            "x1": dataset.x.values[0],
+            "x2": dataset.x.values[-1],
+            "y1": dataset.y.values[0],
+            "y2": dataset.y.values[-1],
+            "yorigin": "lower",
+        }
         if not pass_geodata:
             geodata = None
         ax = plot_precip_field(fields[-1], geodata=geodata)
-        oflow_method = motion.get_method("LK")
-        UV = oflow_method(fields)
+        UV = np.stack([dataset.velocity_x.values, dataset.velocity_y.values])
 
     else:
         shape = (100, 100)
@@ -78,12 +88,23 @@ def test_visualization_motionfields_streamplot(
 ):
     pytest.importorskip("cv2")
     if source is not None:
-        fields, geodata = get_precipitation_fields(0, 2, False, True, upscale, source)
+        dataset = get_precipitation_fields(0, 2, False, upscale, source)
+        oflow_method = motion.get_method("LK")
+        dataset = oflow_method(dataset)
+        precip_var = dataset.attrs["precip_var"]
+        fields = dataset[precip_var].values
+        geodata = {
+            "projection": dataset.attrs["projection"],
+            "x1": dataset.x.values[0],
+            "x2": dataset.x.values[-1],
+            "y1": dataset.y.values[0],
+            "y2": dataset.y.values[-1],
+            "yorigin": "lower",
+        }
         if not pass_geodata:
-            pass_geodata = None
+            geodata = None
         ax = plot_precip_field(fields[-1], geodata=geodata)
-        oflow_method = motion.get_method("LK")
-        UV = oflow_method(fields)
+        UV = np.stack([dataset.velocity_x.values, dataset.velocity_y.values])
 
     else:
         shape = (100, 100)
diff --git a/pysteps/tests/test_plt_precipfields.py b/pysteps/tests/test_plt_precipfields.py
index 9cc56fed1..056c24ab6 100644
--- a/pysteps/tests/test_plt_precipfields.py
+++ b/pysteps/tests/test_plt_precipfields.py
@@ -1,13 +1,13 @@
 # -*- coding: utf-8 -*-
 
+import matplotlib.pyplot as plt
+import numpy as np
 import pytest
 
-from pysteps.visualization import plot_precip_field
-from pysteps.utils import conversion
 from pysteps.postprocessing import ensemblestats
 from pysteps.tests.helpers import get_precipitation_fields
-import matplotlib.pyplot as plt
-import numpy as np
+from pysteps.utils import conversion
+from pysteps.visualization import plot_precip_field
 
 plt_arg_names = (
     "source",
@@ -41,20 +41,23 @@ def test_visualization_plot_precip_field(
     source, plot_type, bbox, colorscale, probthr, title, colorbar, axis
 ):
     if plot_type == "intensity":
-        field, metadata = get_precipitation_fields(0, 0, True, True, None, source)
-        field = field.squeeze()
-        field, metadata = conversion.to_rainrate(field, metadata)
+        dataset = get_precipitation_fields(0, 0, True, None, source)
+        dataset = conversion.to_rainrate(dataset)
 
     elif plot_type == "depth":
-        field, metadata = get_precipitation_fields(0, 0, True, True, None, source)
-        field = field.squeeze()
-        field, metadata = conversion.to_raindepth(field, metadata)
+        dataset = get_precipitation_fields(0, 0, True, None, source)
+        dataset = conversion.to_raindepth(dataset)
 
     elif plot_type == "prob":
-        field, metadata = get_precipitation_fields(0, 10, True, True, None, source)
-        field, metadata = conversion.to_rainrate(field, metadata)
+        dataset = get_precipitation_fields(0, 10, True, None, source)
+        dataset = conversion.to_rainrate(dataset)
+
+    precip_var = dataset.attrs["precip_var"]
+    field = dataset[precip_var].values
+    if plot_type == "prob":
         field = ensemblestats.excprob(field, probthr)
 
+    field = field.squeeze()
     field_orig = field.copy()
     ax = plot_precip_field(
         field.copy(),
@@ -63,7 +66,7 @@ def test_visualization_plot_precip_field(
         geodata=None,
         colorscale=colorscale,
         probthr=probthr,
-        units=metadata["unit"],
+        units=dataset[precip_var].attrs["units"],
         title=title,
         colorbar=colorbar,
         axis=axis,
diff --git a/pysteps/tests/test_timeseries_autoregression.py b/pysteps/tests/test_timeseries_autoregression.py
index f1cc76816..baba6345f 100644
--- a/pysteps/tests/test_timeseries_autoregression.py
+++ b/pysteps/tests/test_timeseries_autoregression.py
@@ -1,8 +1,8 @@
 # -*- coding: utf-8 -*-
 
 import os
-import numpy as np
 
+import numpy as np
 import pytest
 
 import pysteps
@@ -211,7 +211,9 @@ def _create_data_multivariate():
     R = []
     for fn in filenames:
         filename = os.path.join(root_path, "20160928", fn)
-        R_, _, _ = pysteps.io.import_fmi_pgm(filename, gzipped=True)
+        dataset = pysteps.io.import_fmi_pgm(filename, gzipped=True)
+        precip_var = dataset.attrs["precip_var"]
+        R_ = dataset[precip_var].values
         R_[~np.isfinite(R_)] = 0.0
         R.append(np.stack([R_, np.roll(R_, 5, axis=0)]))
 
@@ -235,7 +237,9 @@ def _create_data_univariate():
     R = []
     for fn in filenames:
         filename = os.path.join(root_path, "20160928", fn)
-        R_, _, _ = pysteps.io.import_fmi_pgm(filename, gzipped=True)
+        dataset = pysteps.io.import_fmi_pgm(filename, gzipped=True)
+        precip_var = dataset.attrs["precip_var"]
+        R_ = dataset[precip_var].values
         R_[~np.isfinite(R_)] = 0.0
         R.append(R_)
 
diff --git a/pysteps/tests/test_tracking_tdating.py b/pysteps/tests/test_tracking_tdating.py
index 863c27a0f..7b0f6f1ef 100644
--- a/pysteps/tests/test_tracking_tdating.py
+++ b/pysteps/tests/test_tracking_tdating.py
@@ -2,10 +2,11 @@
 
 import numpy as np
 import pytest
+import xarray as xr
 
+from pysteps.tests.helpers import get_precipitation_fields
 from pysteps.tracking.tdating import dating
 from pysteps.utils import to_reflectivity
-from pysteps.tests.helpers import get_precipitation_fields
 
 arg_names = ("source", "dry_input", "output_splits_merges")
 
@@ -27,24 +28,18 @@
 def test_tracking_tdating_dating_multistep(source, len_timesteps, output_splits_merges):
     pytest.importorskip("skimage")
 
-    input_fields, metadata = get_precipitation_fields(
-        0, len_timesteps, True, True, 4000, source
-    )
-    input_fields, __ = to_reflectivity(input_fields, metadata)
-
-    timelist = metadata["timestamps"]
+    dataset_input = get_precipitation_fields(0, len_timesteps, True, 4000, source)
+    dataset_input = to_reflectivity(dataset_input)
 
     # First half of timesteps
     tracks_1, cells, labels = dating(
-        input_fields[0 : len_timesteps // 2],
-        timelist[0 : len_timesteps // 2],
+        dataset_input.isel(time=slice(0, len_timesteps // 2)),
         mintrack=1,
         output_splits_merges=output_splits_merges,
     )
     # Second half of timesteps
     tracks_2, cells, _ = dating(
-        input_fields[len_timesteps // 2 - 2 :],
-        timelist[len_timesteps // 2 - 2 :],
+        dataset_input.isel(time=slice(len_timesteps // 2 - 2, None)),
         mintrack=1,
         start=2,
         cell_list=cells,
@@ -57,7 +52,7 @@ def test_tracking_tdating_dating_multistep(source, len_timesteps, output_splits_
 
     # Tracks should be continuous in time so time difference should not exceed timestep
     max_track_step = max([t.time.diff().max().seconds for t in tracks_2 if len(t) > 1])
-    timestep = np.diff(timelist).max().seconds
+    timestep = np.diff(dataset_input.time.values).max() / np.timedelta64(1, "s")
     assert max_track_step <= timestep
 
     # IDs of unmatched cells should increase in every timestep
@@ -76,20 +71,20 @@ def test_tracking_tdating_dating(source, dry_input, output_splits_merges):
     pandas = pytest.importorskip("pandas")
 
     if not dry_input:
-        input, metadata = get_precipitation_fields(0, 2, True, True, 4000, source)
-        input, __ = to_reflectivity(input, metadata)
+        dataset_input = get_precipitation_fields(0, 2, True, 4000, source)
+        dataset_input = to_reflectivity(dataset_input)
     else:
-        input = np.zeros((3, 50, 50))
-        metadata = {"timestamps": ["00", "01", "02"]}
-
-    timelist = metadata["timestamps"]
+        dataset_input = xr.Dataset(
+            data_vars={"precip_intensity": (["time", "y", "x"], np.zeros((3, 50, 50)))},
+            attrs={"precip_var": "precip_intensity"},
+        )
 
     cell_column_length = 9
     if output_splits_merges:
         cell_column_length = 15
 
     output = dating(
-        input, timelist, mintrack=1, output_splits_merges=output_splits_merges
+        dataset_input, mintrack=1, output_splits_merges=output_splits_merges
     )
 
     # Check output format
@@ -98,12 +93,12 @@ def test_tracking_tdating_dating(source, dry_input, output_splits_merges):
     assert isinstance(output[0], list)
     assert isinstance(output[1], list)
     assert isinstance(output[2], list)
-    assert len(output[1]) == input.shape[0]
-    assert len(output[2]) == input.shape[0]
+    assert len(output[1]) == dataset_input.sizes["time"]
+    assert len(output[2]) == dataset_input.sizes["time"]
     assert isinstance(output[1][0], pandas.DataFrame)
     assert isinstance(output[2][0], np.ndarray)
     assert output[1][0].shape[1] == cell_column_length
-    assert output[2][0].shape == input.shape[1:]
+    assert output[2][0].shape == (dataset_input.sizes["y"], dataset_input.sizes["x"])
     if not dry_input:
         assert len(output[0]) > 0
         assert isinstance(output[0][0], pandas.DataFrame)
diff --git a/pysteps/tests/test_verification_salscores.py b/pysteps/tests/test_verification_salscores.py
index fdaca9d38..fbed23def 100644
--- a/pysteps/tests/test_verification_salscores.py
+++ b/pysteps/tests/test_verification_salscores.py
@@ -4,8 +4,8 @@
 import pytest
 
 from pysteps.tests.helpers import get_precipitation_fields
-from pysteps.verification.salscores import sal
 from pysteps.utils import to_rainrate, to_reflectivity
+from pysteps.verification.salscores import sal
 
 test_data = [
     (to_rainrate, 1 / 15),
@@ -20,10 +20,12 @@ class TestSAL:
 
     def test_sal_zeros(self, converter, thr_factor):
         """Test the SAL verification method."""
-        precip, metadata = get_precipitation_fields(
+        dataset_input = get_precipitation_fields(
             num_prev_files=0, log_transform=False, metadata=True
         )
-        precip, metadata = converter(precip.filled(np.nan), metadata)
+        dataset_input = converter(dataset_input)
+        precip_var = dataset_input.attrs["precip_var"]
+        precip = dataset_input[precip_var].values[0]
         result = sal(precip * 0, precip * 0, thr_factor)
         assert np.isnan(result).all()
         result = sal(precip * 0, precip, thr_factor)
@@ -35,20 +37,24 @@ def test_sal_zeros(self, converter, thr_factor):
 
     def test_sal_same_image(self, converter, thr_factor):
         """Test the SAL verification method."""
-        precip, metadata = get_precipitation_fields(
+        dataset_input = get_precipitation_fields(
             num_prev_files=0, log_transform=False, metadata=True
         )
-        precip, metadata = converter(precip.filled(np.nan), metadata)
+        dataset_input = converter(dataset_input)
+        precip_var = dataset_input.attrs["precip_var"]
+        precip = dataset_input[precip_var].values[0]
         result = sal(precip, precip, thr_factor)
         assert isinstance(result, tuple)
         assert len(result) == 3
         assert np.allclose(result, [0, 0, 0])
 
     def test_sal_translation(self, converter, thr_factor):
-        precip, metadata = get_precipitation_fields(
+        dataset_input = get_precipitation_fields(
             num_prev_files=0, log_transform=False, metadata=True
         )
-        precip, metadata = converter(precip.filled(np.nan), metadata)
+        dataset_input = converter(dataset_input)
+        precip_var = dataset_input.attrs["precip_var"]
+        precip = dataset_input[precip_var].values[0]
         precip_translated = np.roll(precip, 10, axis=0)
         result = sal(precip, precip_translated, thr_factor)
         assert np.allclose(result[0], 0)
diff --git a/pysteps/tracking/tdating.py b/pysteps/tracking/tdating.py
index 97b1de9e4..c2d0cd1ae 100644
--- a/pysteps/tracking/tdating.py
+++ b/pysteps/tracking/tdating.py
@@ -26,8 +26,8 @@
     match
     couple_track
 """
-
 import numpy as np
+import xarray as xr
 
 import pysteps.feature.tstorm as tstorm_detect
 from pysteps import motion
@@ -50,8 +50,7 @@
 
 
 def dating(
-    input_video,
-    timelist,
+    dataset: xr.Dataset,
     mintrack=3,
     cell_list=None,
     label_list=None,
@@ -78,13 +77,12 @@ def dating(
 
     Parameters
     ----------
-    input_video: array-like
-        Array of shape (t,m,n) containing input image, with t being the temporal
-        dimension and m,n the spatial dimensions. Thresholds are tuned to maximum
+    dataset: xarray.Dataset
+        Input dataset as described in the documentation of
+        :py:mod:`pysteps.io.importers`. It has to contain a precipitation data variable.
+        The dataset has to have a time dimension. Thresholds are tuned to maximum
         reflectivity in dBZ with a spatial resolution of 1 km and a temporal resolution
         of 5 min. Nan values are ignored.
-    timelist: list
-        List of length t containing string of time and date of each (m,n) field.
     mintrack: int, optional
         minimum duration of cell-track to be counted. The default is 3 time steps.
     cell_list: list or None, optional
@@ -191,9 +189,14 @@ def dating(
     else:
         if not len(cell_list) == len(label_list):
             raise ValueError("len(cell_list) != len(label_list)")
+
+    timelist = dataset.time.values
     if start > len(timelist):
         raise ValueError("start > len(timelist)")
 
+    precip_var = dataset.attrs["precip_var"]
+    input_video = dataset[precip_var].values
+
     oflow_method = motion.get_method("LK")
     if len(label_list) == 0:
         max_ID = 0
@@ -218,7 +221,8 @@ def dating(
             max_ID = np.nanmax([np.nanmax(cid), max_ID]) + 1
             continue
         if t >= 2:
-            flowfield = oflow_method(input_video[t - 2 : t + 1, :, :])
+            dataset = oflow_method(dataset.isel(time=slice(t - 2, t + 1)))
+            flowfield = np.stack([dataset.velocity_x.values, dataset.velocity_y.values])
             cells_id, max_ID, newlabels, splitted_cells = tracking(
                 cells_id,
                 cell_list[-1],
diff --git a/pysteps/visualization/motionfields.py b/pysteps/visualization/motionfields.py
index 12c647112..5a5f7ac31 100644
--- a/pysteps/visualization/motionfields.py
+++ b/pysteps/visualization/motionfields.py
@@ -121,9 +121,9 @@ def motion_plot(
     x_grid = x_grid[skip]
     y_grid = y_grid[skip]
 
-    # If we have yorigin"="upper" we flip the y axes for the motion field in the y axis.
+    # If we have yorigin"="upper" we flip the y axes of the plot.
     if geodata is None or geodata["yorigin"] == "upper":
-        dy *= -1
+        y_grid = y_grid[::-1]
 
     if plot_type.lower() == "quiver":
         ax.quiver(x_grid, y_grid, dx, dy, angles="xy", zorder=20, **plot_kwargs)
diff --git a/pysteps/xarray_helpers.py b/pysteps/xarray_helpers.py
index 33ec2f40c..7fa6b7c32 100644
--- a/pysteps/xarray_helpers.py
+++ b/pysteps/xarray_helpers.py
@@ -11,6 +11,7 @@
     convert_to_xarray_dataset
 """
 
+import warnings
 from datetime import datetime, timedelta
 
 import numpy as np
@@ -151,9 +152,13 @@ def convert_input_to_xarray_dataset(
         ypixelsize = y_r[1] - y_r[0]
 
     if x_r[1] - x_r[0] != xpixelsize:
-        raise ValueError("xpixelsize does not match x1, x2 and array shape")
+        warnings.warn(
+            "xpixelsize does not match x1, x2 and array shape, using xpixelsize for pixel size"
+        )
     if y_r[1] - y_r[0] != ypixelsize:
-        raise ValueError("ypixelsize does not match y1, y2 and array shape")
+        warnings.warn(
+            "ypixelsize does not match y1, y2 and array shape, using ypixelsize for pixel size"
+        )
 
     # flip yr vector if yorigin is upper
     if metadata["yorigin"] == "upper":