diff --git a/Cargo.toml b/Cargo.toml index 7d98125d..f877719c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -100,7 +100,7 @@ pyo3-error = { version = "0.5", default-features = false } pyo3-log = { version = "0.12.4", default-features = false } pythonize = { version = "0.25", default-features = false } rand = { version = "0.9.1", default-features = false } -schemars = { version = "=1.0.0-alpha.15", default-features = false } +schemars = { version = "1.0.1", default-features = false } scratch = { version = "1.0", default-features = false } semver = { version = "1.0.23", default-features = false } serde = { version = "1.0.218", default-features = false } @@ -122,7 +122,7 @@ wasmtime = { version = "33.0", default-features = false } wasmtime_runtime_layer = { version = "33.0", default-features = false } wasm-encoder = { version = "0.235", default-features = false } wasm_runtime_layer = { version = "0.5", default-features = false } -wit-bindgen = { version = "0.42", default-features = false } +wit-bindgen = { version = "0.43", default-features = false } wit-component = { version = "0.235", default-features = false } wit-parser = { version = "0.235", default-features = false } wyhash = { version = "0.6", default-features = false } diff --git a/codecs/fourier-network/tests/schema.json b/codecs/fourier-network/tests/schema.json index e6cdafc7..e97ea2dc 100644 --- a/codecs/fourier-network/tests/schema.json +++ b/codecs/fourier-network/tests/schema.json @@ -37,7 +37,7 @@ ], "format": "uint", "minimum": 1, - "description": "The optional mini-batch size used during training\n\n Setting the mini-batch size to `None` disables the use of batching,\n i.e. the network is trained using one large batch that includes the\n full data." + "description": "The optional mini-batch size used during training\n\nSetting the mini-batch size to `None` disables the use of batching,\ni.e. the network is trained using one large batch that includes the\nfull data." }, "seed": { "type": "integer", @@ -61,7 +61,7 @@ "mini_batch_size", "seed" ], - "description": "Fourier network codec which trains and overfits a fourier feature neural\n network on encoding and predicts during decoding.\n\n The approach is based on the papers by Tancik et al. 2020\n ()\n and by Huang and Hoefler 2020 ().", + "description": "Fourier network codec which trains and overfits a fourier feature neural\nnetwork on encoding and predicts during decoding.\n\nThe approach is based on the papers by Tancik et al. 2020\n()\nand by Huang and Hoefler 2020 ().", "title": "FourierNetworkCodec", "$schema": "https://json-schema.org/draft/2020-12/schema" } \ No newline at end of file diff --git a/codecs/jpeg2000/tests/schema.json b/codecs/jpeg2000/tests/schema.json index 59e96fa2..dc712c7c 100644 --- a/codecs/jpeg2000/tests/schema.json +++ b/codecs/jpeg2000/tests/schema.json @@ -54,7 +54,7 @@ "description": "Lossless compression" } ], - "description": "Codec providing compression using JPEG 2000.\n\n Arrays that are higher-dimensional than 2D are encoded by compressing each\n 2D slice with JPEG 2000 independently. Specifically, the array's shape is\n interpreted as `[.., height, width]`. If you want to compress 2D slices\n along two different axes, you can swizzle the array axes beforehand.", + "description": "Codec providing compression using JPEG 2000.\n\nArrays that are higher-dimensional than 2D are encoded by compressing each\n2D slice with JPEG 2000 independently. Specifically, the array's shape is\ninterpreted as `[.., height, width]`. If you want to compress 2D slices\nalong two different axes, you can swizzle the array axes beforehand.", "properties": { "_version": { "type": "string", diff --git a/codecs/pco/tests/schema.json b/codecs/pco/tests/schema.json index 3cb2c33d..ed8f127b 100644 --- a/codecs/pco/tests/schema.json +++ b/codecs/pco/tests/schema.json @@ -21,7 +21,7 @@ 11, 12 ], - "description": "Compression level, ranging from 0 (weak) over 8 (very good) to 12\n (expensive)" + "description": "Compression level, ranging from 0 (weak) over 8 (very good) to 12\n(expensive)" }, "_version": { "type": "string", @@ -46,7 +46,7 @@ "required": [ "mode" ], - "description": "Automatically detects a good mode.\n\n This works well most of the time, but costs some compression time and\n can select a bad mode in adversarial cases." + "description": "Automatically detects a good mode.\n\nThis works well most of the time, but costs some compression time and\ncan select a bad mode in adversarial cases." }, { "type": "object", @@ -63,7 +63,7 @@ }, { "type": "object", - "description": "Tries using the `FloatMult` mode with a given base.\n\n Only applies to floating-point types.", + "description": "Tries using the `FloatMult` mode with a given base.\n\nOnly applies to floating-point types.", "properties": { "float_mult_base": { "type": "number", @@ -82,7 +82,7 @@ }, { "type": "object", - "description": "Tries using the `FloatQuant` mode with the given number of bits of\n quantization.\n\n Only applies to floating-point types.", + "description": "Tries using the `FloatQuant` mode with the given number of bits of\nquantization.\n\nOnly applies to floating-point types.", "properties": { "float_quant_bits": { "type": "integer", @@ -102,7 +102,7 @@ }, { "type": "object", - "description": "Tries using the `IntMult` mode with a given base.\n\n Only applies to integer types.", + "description": "Tries using the `IntMult` mode with a given base.\n\nOnly applies to integer types.", "properties": { "int_mult_base": { "type": "integer", @@ -135,7 +135,7 @@ "required": [ "delta" ], - "description": "Automatically detects a detects a good delta encoding.\n\n This works well most of the time, but costs some compression time and\n can select a bad delta encoding in adversarial cases." + "description": "Automatically detects a detects a good delta encoding.\n\nThis works well most of the time, but costs some compression time and\ncan select a bad delta encoding in adversarial cases." }, { "type": "object", @@ -148,11 +148,11 @@ "required": [ "delta" ], - "description": "Never uses delta encoding.\n\n This is best if your data is in a random order or adjacent numbers have\n no relation to each other." + "description": "Never uses delta encoding.\n\nThis is best if your data is in a random order or adjacent numbers have\nno relation to each other." }, { "type": "object", - "description": "Tries taking nth order consecutive deltas.\n\n Supports a delta encoding order up to 7. For instance, 1st order is\n just regular delta encoding, 2nd is deltas-of-deltas, etc. It is legal\n to use 0th order, but it is identical to None.", + "description": "Tries taking nth order consecutive deltas.\n\nSupports a delta encoding order up to 7. For instance, 1st order is\njust regular delta encoding, 2nd is deltas-of-deltas, etc. It is legal\nto use 0th order, but it is identical to None.", "properties": { "delta_encoding_order": { "type": "integer", @@ -189,7 +189,7 @@ "required": [ "delta" ], - "description": "Tries delta encoding according to an extra latent variable of\n \"lookback\".\n\n This can improve compression ratio when there are nontrivial patterns\n in the array, but reduces compression speed substantially." + "description": "Tries delta encoding according to an extra latent variable of\n\"lookback\".\n\nThis can improve compression ratio when there are nontrivial patterns\nin the array, but reduces compression speed substantially." } ] } @@ -197,7 +197,7 @@ "oneOf": [ { "type": "object", - "description": "Divide the chunk into equal pages of up to this many numbers.\n\n For example, with equal pages up to 100,000, a chunk of 150,000 numbers\n would be divided into 2 pages, each of 75,000 numbers.", + "description": "Divide the chunk into equal pages of up to this many numbers.\n\nFor example, with equal pages up to 100,000, a chunk of 150,000 numbers\nwould be divided into 2 pages, each of 75,000 numbers.", "properties": { "equal_pages_up_to": { "type": "integer", diff --git a/codecs/random-projection/tests/schema.json b/codecs/random-projection/tests/schema.json index b5f7bb63..9f48b20f 100644 --- a/codecs/random-projection/tests/schema.json +++ b/codecs/random-projection/tests/schema.json @@ -17,7 +17,7 @@ "required": [ "seed" ], - "description": "Codec that uses random projections to reduce the dimensionality of high-\n dimensional data to compress it.\n\n A two-dimensional array of shape `$N \\times D$` is encoded as n array of\n shape `$N \\times K$`, where `$K$` is either set explicitly or chosen using\n the the Johnson-Lindenstrauss lemma. For `$K$` to be smaller than `$D$`,\n `$D$` must be quite large. Therefore, this codec should only applied on\n large datasets as it otherwise significantly inflates the data size instead\n of reducing it.\n\n Choosing a lower distortion rate `epsilon` will improve the quality of the\n lossy compression, i.e. reduce the compression error, at the cost of\n increasing `$K$`.\n\n This codec only supports finite floating point data.", + "description": "Codec that uses random projections to reduce the dimensionality of high-\ndimensional data to compress it.\n\nA two-dimensional array of shape `$N \\times D$` is encoded as n array of\nshape `$N \\times K$`, where `$K$` is either set explicitly or chosen using\nthe the Johnson-Lindenstrauss lemma. For `$K$` to be smaller than `$D$`,\n`$D$` must be quite large. Therefore, this codec should only applied on\nlarge datasets as it otherwise significantly inflates the data size instead\nof reducing it.\n\nChoosing a lower distortion rate `epsilon` will improve the quality of the\nlossy compression, i.e. reduce the compression error, at the cost of\nincreasing `$K$`.\n\nThis codec only supports finite floating point data.", "allOf": [ { "oneOf": [ @@ -39,7 +39,7 @@ "reduction", "epsilon" ], - "description": "The reduced dimensionality `$K$` is derived from `epsilon`, as defined\n by the Johnson-Lindenstrauss lemma." + "description": "The reduced dimensionality `$K$` is derived from `epsilon`, as defined\nby the Johnson-Lindenstrauss lemma." }, { "type": "object", @@ -59,7 +59,7 @@ "reduction", "k" ], - "description": "The reduced dimensionality `$K$`, to which the data is projected, is\n given explicitly." + "description": "The reduced dimensionality `$K$`, to which the data is projected, is\ngiven explicitly." } ] }, @@ -76,7 +76,7 @@ "required": [ "projection" ], - "description": "The random projection matrix is dense and its components are sampled\n from `$\\text{N}\\left( 0, \\frac{1}{k} \\right)$`" + "description": "The random projection matrix is dense and its components are sampled\nfrom `$\\text{N}\\left( 0, \\frac{1}{k} \\right)$`" }, { "type": "object", @@ -88,7 +88,7 @@ ], "exclusiveMinimum": 0.0, "maximum": 1.0, - "description": "The `density` of the sparse projection matrix.\n\n Setting `density` to `$\\frac{1}{3}$` reproduces the settings by\n Achlioptas [^1]. If `density` is `None`, it is set to\n `$\\frac{1}{\\sqrt{d}}$`,\n the minimum density as recommended by Li et al [^2].\n\n\n [^1]: Achlioptas, D. (2003). Database-friendly random projections:\n Johnson-Lindenstrauss with binary coins. *Journal of Computer\n and System Sciences*, 66(4), 671-687. Available from:\n [doi:10.1016/S0022-0000(03)00025-4](https://doi.org/10.1016/S0022-0000(03)00025-4).\n\n [^2]: Li, P., Hastie, T. J., and Church, K. W. (2006). Very sparse\n random projections. In *Proceedings of the 12th ACM SIGKDD\n international conference on Knowledge discovery and data\n mining (KDD '06)*. Association for Computing Machinery, New\n York, NY, USA, 287–296. Available from:\n [doi:10.1145/1150402.1150436](https://doi.org/10.1145/1150402.1150436)." + "description": "The `density` of the sparse projection matrix.\n\nSetting `density` to `$\\frac{1}{3}$` reproduces the settings by\nAchlioptas [^1]. If `density` is `None`, it is set to\n`$\\frac{1}{\\sqrt{d}}$`,\nthe minimum density as recommended by Li et al [^2].\n\n\n[^1]: Achlioptas, D. (2003). Database-friendly random projections:\n Johnson-Lindenstrauss with binary coins. *Journal of Computer\n and System Sciences*, 66(4), 671-687. Available from:\n [doi:10.1016/S0022-0000(03)00025-4](https://doi.org/10.1016/S0022-0000(03)00025-4).\n\n[^2]: Li, P., Hastie, T. J., and Church, K. W. (2006). Very sparse\n random projections. In *Proceedings of the 12th ACM SIGKDD\n international conference on Knowledge discovery and data\n mining (KDD '06)*. Association for Computing Machinery, New\n York, NY, USA, 287–296. Available from:\n [doi:10.1145/1150402.1150436](https://doi.org/10.1145/1150402.1150436)." }, "projection": { "type": "string", @@ -98,7 +98,7 @@ "required": [ "projection" ], - "description": "The random projection matrix is sparse where only `density`% of entries\n are non-zero.\n\n The matrix's components are sampled from\n\n - `$-\\sqrt{\\frac{1}{k \\cdot density}}$` with probability\n `$0.5 \\cdot density$`\n - `$0$` with probability `$1 - density$`\n - `$+\\sqrt{\\frac{1}{k \\cdot density}}$` with probability\n `$0.5 \\cdot density$`" + "description": "The random projection matrix is sparse where only `density`% of entries\nare non-zero.\n\nThe matrix's components are sampled from\n\n- `$-\\sqrt{\\frac{1}{k \\cdot density}}$` with probability\n `$0.5 \\cdot density$`\n- `$0$` with probability `$1 - density$`\n- `$+\\sqrt{\\frac{1}{k \\cdot density}}$` with probability\n `$0.5 \\cdot density$`" } ] } diff --git a/codecs/sperr/tests/schema.json b/codecs/sperr/tests/schema.json index ac46d915..d849d6e2 100644 --- a/codecs/sperr/tests/schema.json +++ b/codecs/sperr/tests/schema.json @@ -60,7 +60,7 @@ "description": "Fixed point-wise (absolute) error" } ], - "description": "Codec providing compression using SPERR.\n\n Arrays that are higher-dimensional than 3D are encoded by compressing each\n 3D slice with SPERR independently. Specifically, the array's shape is\n interpreted as `[.., depth, height, width]`. If you want to compress 3D\n slices along three different axes, you can swizzle the array axes\n beforehand.", + "description": "Codec providing compression using SPERR.\n\nArrays that are higher-dimensional than 3D are encoded by compressing each\n3D slice with SPERR independently. Specifically, the array's shape is\ninterpreted as `[.., depth, height, width]`. If you want to compress 3D\nslices along three different axes, you can swizzle the array axes\nbeforehand.", "properties": { "_version": { "type": "string", diff --git a/codecs/sz3/tests/schema.json b/codecs/sz3/tests/schema.json index 70d52aef..c32d21c6 100644 --- a/codecs/sz3/tests/schema.json +++ b/codecs/sz3/tests/schema.json @@ -121,7 +121,7 @@ "oneOf": [ { "type": "object", - "description": "Errors are bounded by *both* the absolute and relative error, i.e. by\n whichever bound is stricter", + "description": "Errors are bounded by *both* the absolute and relative error, i.e. by\nwhichever bound is stricter", "properties": { "eb_abs": { "type": "number", @@ -146,7 +146,7 @@ }, { "type": "object", - "description": "Errors are bounded by *either* the absolute or relative error, i.e. by\n whichever bound is weaker", + "description": "Errors are bounded by *either* the absolute or relative error, i.e. by\nwhichever bound is weaker", "properties": { "eb_abs": { "type": "number", diff --git a/codecs/zfp-classic/tests/schema.json b/codecs/zfp-classic/tests/schema.json index c3427b23..710e8594 100644 --- a/codecs/zfp-classic/tests/schema.json +++ b/codecs/zfp-classic/tests/schema.json @@ -27,7 +27,7 @@ "min_exp": { "type": "integer", "format": "int32", - "description": "Smallest absolute bit plane number encoded.\n\n This parameter applies to floating-point data only and is ignored\n for integer data." + "description": "Smallest absolute bit plane number encoded.\n\nThis parameter applies to floating-point data only and is ignored\nfor integer data." }, "mode": { "type": "string", @@ -44,7 +44,7 @@ }, { "type": "object", - "description": "In fixed-rate mode, each d-dimensional compressed block of `$4^d$`\n values is stored using a fixed number of bits. This number of\n compressed bits per block is amortized over the `$4^d$` values to give\n a rate of `$rate = \\frac{maxbits}{4^d}$` in bits per value.", + "description": "In fixed-rate mode, each d-dimensional compressed block of `$4^d$`\nvalues is stored using a fixed number of bits. This number of\ncompressed bits per block is amortized over the `$4^d$` values to give\na rate of `$rate = \\frac{maxbits}{4^d}$` in bits per value.", "properties": { "rate": { "type": "number", @@ -63,7 +63,7 @@ }, { "type": "object", - "description": "In fixed-precision mode, the number of bits used to encode a block may\n vary, but the number of bit planes (the precision) encoded for the\n transform coefficients is fixed.", + "description": "In fixed-precision mode, the number of bits used to encode a block may\nvary, but the number of bit planes (the precision) encoded for the\ntransform coefficients is fixed.", "properties": { "precision": { "type": "integer", @@ -83,7 +83,7 @@ }, { "type": "object", - "description": "In fixed-accuracy mode, all transform coefficient bit planes up to a\n minimum bit plane number are encoded. The smallest absolute bit plane\n number is chosen such that\n `$minexp = \\text{floor}(\\log_{2}(tolerance))$`.", + "description": "In fixed-accuracy mode, all transform coefficient bit planes up to a\nminimum bit plane number are encoded. The smallest absolute bit plane\nnumber is chosen such that\n`$minexp = \\text{floor}(\\log_{2}(tolerance))$`.", "properties": { "tolerance": { "type": "number", @@ -111,7 +111,7 @@ "required": [ "mode" ], - "description": "Lossless per-block compression that preserves integer and floating point\n bit patterns." + "description": "Lossless per-block compression that preserves integer and floating point\nbit patterns." } ], "description": "Codec providing compression using ZFP (classic)", diff --git a/codecs/zfp/tests/schema.json b/codecs/zfp/tests/schema.json index d63a7f52..da4b2f9b 100644 --- a/codecs/zfp/tests/schema.json +++ b/codecs/zfp/tests/schema.json @@ -27,7 +27,7 @@ "min_exp": { "type": "integer", "format": "int32", - "description": "Smallest absolute bit plane number encoded.\n\n This parameter applies to floating-point data only and is ignored\n for integer data." + "description": "Smallest absolute bit plane number encoded.\n\nThis parameter applies to floating-point data only and is ignored\nfor integer data." }, "mode": { "type": "string", @@ -44,7 +44,7 @@ }, { "type": "object", - "description": "In fixed-rate mode, each d-dimensional compressed block of `$4^d$`\n values is stored using a fixed number of bits. This number of\n compressed bits per block is amortized over the `$4^d$` values to give\n a rate of `$rate = \\frac{maxbits}{4^d}$` in bits per value.", + "description": "In fixed-rate mode, each d-dimensional compressed block of `$4^d$`\nvalues is stored using a fixed number of bits. This number of\ncompressed bits per block is amortized over the `$4^d$` values to give\na rate of `$rate = \\frac{maxbits}{4^d}$` in bits per value.", "properties": { "rate": { "type": "number", @@ -63,7 +63,7 @@ }, { "type": "object", - "description": "In fixed-precision mode, the number of bits used to encode a block may\n vary, but the number of bit planes (the precision) encoded for the\n transform coefficients is fixed.", + "description": "In fixed-precision mode, the number of bits used to encode a block may\nvary, but the number of bit planes (the precision) encoded for the\ntransform coefficients is fixed.", "properties": { "precision": { "type": "integer", @@ -83,7 +83,7 @@ }, { "type": "object", - "description": "In fixed-accuracy mode, all transform coefficient bit planes up to a\n minimum bit plane number are encoded. The smallest absolute bit plane\n number is chosen such that\n `$minexp = \\text{floor}(\\log_{2}(tolerance))$`.", + "description": "In fixed-accuracy mode, all transform coefficient bit planes up to a\nminimum bit plane number are encoded. The smallest absolute bit plane\nnumber is chosen such that\n`$minexp = \\text{floor}(\\log_{2}(tolerance))$`.", "properties": { "tolerance": { "type": "number", @@ -111,7 +111,7 @@ "required": [ "mode" ], - "description": "Lossless per-block compression that preserves integer and floating point\n bit patterns." + "description": "Lossless per-block compression that preserves integer and floating point\nbit patterns." } ], "description": "Codec providing compression using ZFP", diff --git a/crates/numcodecs-python/src/schema.rs b/crates/numcodecs-python/src/schema.rs index f9dc4cee..a2a5868a 100644 --- a/crates/numcodecs-python/src/schema.rs +++ b/crates/numcodecs-python/src/schema.rs @@ -137,7 +137,7 @@ pub fn docs_from_schema(schema: &Schema) -> Option { let mut docs = String::new(); if let Some(Value::String(description)) = schema.get("description") { - docs.push_str(&derust_doc_comment(description)); + docs.push_str(description); docs.push_str("\n\n"); } @@ -307,7 +307,7 @@ fn extend_parameters_from_one_of_schema<'a>( _ => &[], }; let variant_docs = match schema.get("description") { - Some(Value::String(docs)) => Some(derust_doc_comment(docs)), + Some(Value::String(docs)) => Some(docs), _ => None, }; @@ -352,22 +352,6 @@ fn extend_parameters_from_one_of_schema<'a>( } } -fn derust_doc_comment(docs: &str) -> Cow { - if docs.trim() != docs { - return Cow::Borrowed(docs); - } - - if !docs - .split('\n') - .skip(1) - .all(|l| l.trim().is_empty() || l.starts_with(' ')) - { - return Cow::Borrowed(docs); - } - - Cow::Owned(docs.replace("\n ", "\n")) -} - #[derive(Debug, Error)] pub enum SchemaError { #[error("codec class' cached config schema is invalid")] @@ -412,7 +396,7 @@ impl<'a> Parameter<'a> { .any(|r| matches!(r, Value::String(n) if n == name)), default: parameter.get("default"), docs: match parameter.get("description") { - Some(Value::String(docs)) => Some(derust_doc_comment(docs)), + Some(Value::String(docs)) => Some(docs), _ => None, }, } @@ -541,7 +525,7 @@ mod tests { fn schema() { assert_eq!( format!("{}", schema_for!(MyCodec).to_value()), - r#"{"type":"object","properties":{"param":{"type":["integer","null"],"format":"int32","description":"An optional integer value."}},"unevaluatedProperties":false,"oneOf":[{"type":"object","description":"Mode a.\n\n It gets another line.","properties":{"value":{"type":"boolean","description":"A boolean value. And some really, really, really, long first\n line that wraps around.\n\n With multiple lines of comments."},"common":{"type":"string","description":"A common string value.\n\n Something else here."},"mode":{"type":"string","const":"A"}},"required":["mode","value","common"]},{"type":"object","description":"Mode b.","properties":{"common":{"type":"string","description":"A common string value.\n\n Something else here."},"mode":{"type":"string","const":"B"}},"required":["mode","common"]}],"description":"A codec that does something on encoding and decoding.\n\n With multiple lines of comments.","title":"MyCodec","$schema":"https://json-schema.org/draft/2020-12/schema"}"# + r#"{"type":"object","properties":{"param":{"type":["integer","null"],"format":"int32","description":"An optional integer value."}},"unevaluatedProperties":false,"oneOf":[{"type":"object","description":"Mode a.\n\nIt gets another line.","properties":{"value":{"type":"boolean","description":"A boolean value. And some really, really, really, long first\nline that wraps around.\n\nWith multiple lines of comments."},"common":{"type":"string","description":"A common string value.\n\nSomething else here."},"mode":{"type":"string","const":"A"}},"required":["mode","value","common"]},{"type":"object","description":"Mode b.","properties":{"common":{"type":"string","description":"A common string value.\n\nSomething else here."},"mode":{"type":"string","const":"B"}},"required":["mode","common"]}],"description":"A codec that does something on encoding and decoding.\n\nWith multiple lines of comments.","title":"MyCodec","$schema":"https://json-schema.org/draft/2020-12/schema"}"# ); }