Skip to content

Commit

Permalink
Substantial coverage, consolidation, and dependency upgrades (#18)
Browse files Browse the repository at this point in the history
feature: add `ParamDescribedRead` read-only variant `ParamDescribed`

chore: needed field visibility

* chore: substantially clean up mzML parsing and improve code coverage

* chore: exercise `ThermoRawReader`

feature: add `Any` bound to `MassSpectrometryReadWriteProcess` for better customization, add `MZReaderType::Unknown` for boxing

* chore: address rewiring

fix: fix MGF indexing

fix: fix `is_thermo_raw_prefix`, which used the wrong number of trailing bytes

* chore: keep IonMobilityFrameLike consistent with SpectrumLike

change: update `mzpeaks` v1.0.0 and `mzsignal` v1.0.0

change: require `ParamDescribed` for `ChromatogramLike`

fix: Fix `DataArray::store_as`

* chore: replace recurring `get_param_by_curie` wrapper methods with `find_param_method` macro

* chore: update CV and documentation

* chore: documentation

change: update `thermorawfilereader` to v0.3.1 and fix tracking of trailer values

feature: add `mzsignal` to the exported top-level API

* chore: minor cleaning of some examples

feature: add `MZReaderBuilder` to the top-level API

* chore: feature-gate new tests using `mzsignal`

* chore: fix dependency/feature warning after denoting `tokio` as a dependency only

* chore: fix documentation warnings
  • Loading branch information
mobiusklein committed Dec 15, 2024
1 parent 07fe0b3 commit cec3477
Show file tree
Hide file tree
Showing 41 changed files with 1,966 additions and 575 deletions.
435 changes: 218 additions & 217 deletions Cargo.lock

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -101,13 +101,13 @@ chrono = "0.4.37"
bitflags = "2.5.0"
identity-hash = "0.1.0"
thiserror = "2.0.2"
mzpeaks = { version = ">=0.23.0,<1.0.0" }
mzpeaks = { version = ">=1.0.0,<1.1.0" }

# Internal parallelism
rayon = { version = ">=1.8.0,<2.0", optional = true }

# Internal signal processing
mzsignal = { version = ">=0.27.0,<1.0.0", default-features = false, optional = true, features = [
mzsignal = { version = ">=1.0.0,<1.1.0", default-features = false, optional = true, features = [
'avx',
] }

Expand All @@ -123,7 +123,7 @@ bytemuck = { version = "1.18.0", features = ["extern_crate_alloc"] }
base64-simd = "0.8.0"

# Async reader features
tokio = { version = "1.32.0", optional = true, features = [
tokio = { version = "1.32", optional = true, features = [
"macros",
"rt",
"fs",
Expand All @@ -144,7 +144,7 @@ reqwest = { version = "0.12", features = ["json", "blocking"], optional = true }
futures = { version = "0.3", optional = true }

# Thermo RAW-related features
thermorawfilereader = { version = "0.3.0", default-features = false, optional = true }
thermorawfilereader = { version = "0.3.1", default-features = false, optional = true }

# Bruker TDF-related features
rusqlite = { version = "0.31.0", optional = true }
Expand Down
4 changes: 2 additions & 2 deletions Justfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ test-units:
cargo nextest run --lib --features nalgebra,parallelism,mzsignal,zlib-ng-compat,thermo

test-coverage:
cargo llvm-cov --lib --tests --features nalgebra,parallelism,mzsignal,zlib-ng-compat,thermo --html
cargo llvm-cov --lib --tests nextest --features nalgebra,parallelism,mzsignal,zlib-ng-compat,thermo,mzmlb --html

alias t := test-units

Expand All @@ -15,7 +15,7 @@ quick-docs:
cargo doc --no-deps -p mzdata

docs:
cargo doc --no-deps --features nalgebra,parallelism,mzsignal,mzmlb,zlib-ng-compat,thermo,async -p mzdata
cargo doc --no-deps --features nalgebra,parallelism,mzsignal,mzmlb,zlib-ng-compat,thermo,async,proxi -p mzdata -p mzsignal -p mzpeaks

install-mzdata:
cargo install --path . --features nalgebra,parallelism,mzsignal,mzmlb,zlib-ng-compat,hdf5_static
Expand Down
10 changes: 9 additions & 1 deletion cv/extract_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from typing import Tuple, Dict, Set, List

import fastobo
from fastobo.term import TermFrame, IsAClause, NameClause, RelationshipClause
from fastobo.term import TermFrame, IsAClause, NameClause, RelationshipClause, DefClause

from fastobo.doc import OboDoc

Expand Down Expand Up @@ -113,6 +113,7 @@ def find_name(term: TermFrame):
def make_entry_for(term: TermFrame):
name = None
flags = ValueType.NoType
descr = ""
parents = []
for clause in term:
if isinstance(clause, NameClause):
Expand All @@ -122,6 +123,12 @@ def make_entry_for(term: TermFrame):
if isinstance(clause, RelationshipClause):
if str(clause.typedef) == 'has_value_type':
flags |= xsd_to_type[str(clause.term)]
if isinstance(clause, DefClause):
descr = re.sub(
r"(\[|\])",
lambda m: "\\\\" + m.group(1),
str(clause.definition).replace('"', "'"),
)

vname = name
if "-" in vname:
Expand All @@ -145,6 +152,7 @@ def make_entry_for(term: TermFrame):

return f"""
#[term(cv=MS, accession={term.id.local}, name="{name}", flags={{{int(flags)}}}, parents={{{json.dumps(parents)}}})]
#[doc="{name} - {descr}"]
{vname},"""


Expand Down
9 changes: 9 additions & 0 deletions cv/extract_software.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
TermFrame,
IsAClause,
NameClause,
DefClause,
)

from fastobo.doc import OboDoc
Expand Down Expand Up @@ -52,6 +53,7 @@ def make_entry_for(term: TermFrame):
name = None
flags = SoftwareType.NoType
parents = []
descr = ''
for clause in term:
if isinstance(clause, NameClause):
name = str(clause.name)
Expand All @@ -63,6 +65,12 @@ def make_entry_for(term: TermFrame):
flags |= SoftwareType.Analysis
elif clause.term == ACQUISITION_SW:
flags |= SoftwareType.Acquisition
if isinstance(clause, DefClause):
descr = re.sub(
r"(\[|\])",
lambda m: "\\\\" + m.group(1),
str(clause.definition).replace('"', "'"),
)

vname: str = name
if "-" in vname:
Expand All @@ -84,6 +92,7 @@ def make_entry_for(term: TermFrame):

return f"""
#[term(cv=MS, accession={term.id.local}, name="{name}", flags={{{int(flags)}}}, parents={{{json.dumps(parents)}}})]
#[doc="{name} - {descr}"]
{vname},"""


Expand Down
Binary file modified cv/psi-ms.obo.gz
Binary file not shown.
6 changes: 3 additions & 3 deletions docs/reader_tutorial.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ whether it is random access, or what format it is stored in.

### From a file path

`mzdata` can read files on disk, accessed by path, easily. In this example we'll use the [`MZReader`](crate::io::MZReader) type
`mzdata` can read files on disk, accessed by path, easily. In this example we'll use the [`MZReader`] type
to figure out which reader to use for us automatically.

```rust
Expand All @@ -50,7 +50,7 @@ fn from_path() -> io::Result<()> {

When reading a file from disk, `mzdata` can make certain assumptions like that the file supports
the [`io::Seek`](std::io::Seek) trait and can read or build indices over the file quickly and guarantee
that the file supports full random access, like [`RandomAccessSpectrumIterator`](crate::io::traits::RandomAccessSpectrumIterator).
that the file supports full random access, like [`RandomAccessSpectrumIterator`].

Additionally, some binary formats like [`ThermoRawReader`] or [`MzMLbReader`] _require_ that there be a file on disk that
exists outside of the Rust model of the file system in order to read it.
Expand Down Expand Up @@ -157,7 +157,7 @@ you want to use.

All of the added complexity introduced by the type system can make anything that is flexible over how you come
to open a mass spectrometry data source cumbersome. When you don't _need_ to keep the reader around beyond the
current scope, the [`mz_read!`](crate::mz_read) macro can substantially simplify matters. It is like [`MZReader`](crate::MZReader),
current scope, the [`mz_read!`](crate::mz_read) macro can substantially simplify matters. It is like [`MZReader`],
but it is even more flexible, provided that the reader instance only lives as long as the enclosing scope:

```rust
Expand Down
2 changes: 1 addition & 1 deletion docs/spectrum_tutorial.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ appropriate.
# let mut spectrum = reader.get_spectrum_by_index(15).unwrap();
// An error might occur during reprofiling, or there may not be
// any peaks to transform.
spectrum.reprofile_with_shape(0.025, 0.015).unwrap();
spectrum.reprofile_with_shape(0.0025, 0.015).unwrap();
spectrum.description_mut().signal_continuity = SignalContinuity::Profile;
# Ok(())
# }
Expand Down
10 changes: 5 additions & 5 deletions examples/async_mzcat.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::time;
use std::{env, io, path};

use futures::StreamExt;
use tokio;
use tokio::fs;

Expand All @@ -21,9 +22,11 @@ async fn load_file<P: Into<path::PathBuf> + Clone>(

async fn scan_file(reader: &mut mzml::AsyncMzMLReader<fs::File>) {
let start = time::Instant::now();
let n = reader.len();
let mut i = 0;
while let Some(scan) = reader.get_spectrum_by_index(i).await {

let mut stream = reader.as_stream();

while let Some(scan) = stream.next().await {
if i % 10000 == 0 {
println!(
"\tScan {}: {}|{} ({} seconds)",
Expand All @@ -34,9 +37,6 @@ async fn scan_file(reader: &mut mzml::AsyncMzMLReader<fs::File>) {
);
}
i += 1;
if i == n {
break;
}
}
let end = time::Instant::now();
println!(
Expand Down
9 changes: 7 additions & 2 deletions examples/mzconvert.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::any::Any;
use std::env;
use std::io;
use std::path::PathBuf;
Expand All @@ -6,6 +7,7 @@ use std::sync::mpsc::sync_channel;
use std::thread;
use std::time;

use log::info;
use mzdata::io::MassSpectrometryFormat;
use mzdata::io::{checksum_file, MassSpectrometryReadWriteProcess, Sink, Source};
use mzdata::meta::custom_software_name;
Expand Down Expand Up @@ -80,8 +82,9 @@ impl MassSpectrometryReadWriteProcess<CentroidPeak, DeconvolutedPeak> for MZConv
R: RandomAccessSpectrumIterator<CentroidPeak, DeconvolutedPeak>
+ SpectrumSource<CentroidPeak, DeconvolutedPeak>
+ Send
+ Any
+ 'static,
W: SpectrumWriter<CentroidPeak, DeconvolutedPeak> + Send + 'static,
W: SpectrumWriter<CentroidPeak, DeconvolutedPeak> + Send + Any + 'static,
>(
&self,
reader: R,
Expand All @@ -95,8 +98,9 @@ impl MassSpectrometryReadWriteProcess<CentroidPeak, DeconvolutedPeak> for MZConv
+ MSDataFileMetadata
+ SpectrumSource<CentroidPeak, DeconvolutedPeak>
+ Send
+ Any
+ 'static,
W: SpectrumWriter<CentroidPeak, DeconvolutedPeak> + MSDataFileMetadata + Send + 'static,
W: SpectrumWriter<CentroidPeak, DeconvolutedPeak> + MSDataFileMetadata + Send + Any + 'static,
>(
&self,
reader: R,
Expand All @@ -106,6 +110,7 @@ impl MassSpectrometryReadWriteProcess<CentroidPeak, DeconvolutedPeak> for MZConv
) -> Result<(R, W), Self::ErrorType> {
if self.inpath != "-" {
let pb: PathBuf = self.inpath.clone().into();
info!("Computing checksum for {}", pb.display());
let checksum = checksum_file(&pb)?;
let has_already = reader
.file_description()
Expand Down
Loading

0 comments on commit cec3477

Please sign in to comment.