Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 53 additions & 17 deletions src/MAT_HDF5.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,13 @@ import Dates
import Tables
import PooledArrays: PooledArray

import ..MAT_types:
import ..MAT_types:
convert_struct_array,
EmptyStruct,
MatlabClassObject,
MatlabClassObject,
MatlabOpaque,
MatlabStructArray,
MatlabTable,
MatlabStructArray,
MatlabTable,
ScalarOrArray,
StructArrayField

Expand Down Expand Up @@ -158,6 +158,7 @@ const sparse_attr_matlab = "MATLAB_sparse"
const int_decode_attr_matlab = "MATLAB_int_decode"
const object_type_attr_matlab = "MATLAB_object_decode"
const object_decode_attr_matlab = "MATLAB_object_decode"
const struct_field_attr_matlab = "MATLAB_fields"

### Reading
function read_complex(dtype::HDF5.Datatype, dset::HDF5.Dataset, ::Type{T}) where T
Expand Down Expand Up @@ -193,8 +194,8 @@ function m_read(dset::HDF5.Dataset, subsys::Subsystem)
elseif mattype == "struct"
# Not sure if this check is necessary but it is checked in
# `m_read(g::HDF5.Group)`
if haskey(dset, "MATLAB_fields")
field_names = [join(n) for n in read_attribute(dset, "MATLAB_fields")]
if haskey(dset, struct_field_attr_matlab)
field_names = [join(n) for n in read_attribute(dset, struct_field_attr_matlab)]
return MatlabStructArray(field_names, tuple(dims...))
else
return Dict{String,Any}()
Expand All @@ -219,7 +220,7 @@ function m_read(dset::HDF5.Dataset, subsys::Subsystem)
if mattype == "FileWrapper__"
return read_cell(dset, subsys)
end
if haskey(dset, "MATLAB_fields")
if haskey(dset, struct_field_attr_matlab)
@warn "Enumeration Instances are not supported currently."
return missing
end
Expand Down Expand Up @@ -292,8 +293,8 @@ function read_sparse_matrix(g::HDF5.Group, mattype::String)
end

function read_struct_as_dict(g::HDF5.Group, subsys::Subsystem)
if haskey(g, "MATLAB_fields")
fn = [join(f) for f in read_attribute(g, "MATLAB_fields")]
if haskey(g, struct_field_attr_matlab)
fn = [join(f) for f in read_attribute(g, struct_field_attr_matlab)]
else
fn = keys(g)
end
Expand Down Expand Up @@ -565,7 +566,7 @@ end
# Write cell arrays
function m_write(mfile::MatlabHDF5File, parent::HDF5Parent, name::String, data::AbstractArray{T}, object_decode::UInt32=UInt32(0)) where T
data = _normalize_arr(data)
refs = _write_references!(mfile, parent, data)
refs = _write_references(mfile, parent, data)
# Write the references as the chosen variable
cset, ctype = create_dataset(parent, name, refs)
try
Expand All @@ -582,7 +583,7 @@ function m_write(mfile::MatlabHDF5File, parent::HDF5Parent, name::String, data::
end
end

function _write_references!(mfile::MatlabHDF5File, parent::HDF5Parent, data::AbstractArray)
function _write_references(mfile::MatlabHDF5File, parent::HDF5Parent, data::AbstractArray)
pathrefs = "/#refs#"
fid = HDF5.file(parent)
local g
Expand Down Expand Up @@ -630,6 +631,41 @@ function _write_references!(mfile::MatlabHDF5File, parent::HDF5Parent, data::Abs
return refs
end

function _write_field_reference(mfile::MatlabHDF5File, parent::HDF5Parent, k::Vector{String})
pathrefs = "/#refs#"
fid = HDF5.file(parent)
local g
local ref
if !haskey(fid, pathrefs)
g = create_group(fid, pathrefs)
else
g = fid[pathrefs]
end

try
mfile.refcounter +=1
itemname = string(mfile.refcounter)
cset, ctype = create_dataset(g, itemname, HDF5.VLen(k))
write_dataset(cset, ctype, HDF5.VLen(k))
tmp = g[itemname]
ref = Reference(tmp, pathrefs*"/"*itemname)
close(tmp)
finally
close(g)
end
return ref
end

function _write_struct_fields(mfile::MatlabHDF5File, parent::Union{HDF5.Group, HDF5.Dataset}, fieldnames::Vector{String})
total_chars = sum(length, fieldnames)
if total_chars < 4096
write_attribute(parent, struct_field_attr_matlab, HDF5.VLen(fieldnames))
else
# Write Reference instead
ref = _write_field_reference(mfile, parent, fieldnames)
write_attribute(parent, struct_field_attr_matlab, ref)
end
end

# Struct array: Array of Dict => MATLAB struct array
function m_write(mfile::MatlabHDF5File, parent::HDF5Parent, name::String, arr::AbstractArray{<:AbstractDict})
Expand All @@ -646,7 +682,7 @@ function m_write(mfile::MatlabHDF5File, parent::HDF5Parent, name::String, arr::M
try
write_attribute(dset, empty_attr_matlab, 0x01)
write_attribute(dset, name_type_attr_matlab, "struct")
write_attribute(dset, "MATLAB_fields", HDF5.VLen(arr.names))
_write_struct_fields(mfile, dset, arr.names)
write_dataset(dset, dtype, adata)
finally
close(dtype); close(dset)
Expand All @@ -656,14 +692,14 @@ function m_write(mfile::MatlabHDF5File, parent::HDF5Parent, name::String, arr::M
try
if isempty(arr.class)
write_attribute(g, name_type_attr_matlab, "struct")
write_attribute(g, "MATLAB_fields", HDF5.VLen(arr.names))
_write_struct_fields(mfile, g, arr.names)
else
write_attribute(g, name_type_attr_matlab, arr.class)
write_attribute(g, object_decode_attr_matlab, UInt32(2))
write_attribute(g, "MATLAB_fields", HDF5.VLen(arr.names))
_write_struct_fields(mfile, g, arr.names)
end
for (fieldname, field_values) in arr
refs = _write_references!(mfile, parent, field_values)
refs = _write_references(mfile, parent, field_values)
dset, dtype = create_dataset(g, fieldname, refs)
try
write_dataset(dset, dtype, refs)
Expand Down Expand Up @@ -701,7 +737,7 @@ function m_write(mfile::MatlabHDF5File, parent::HDF5Parent, name::String, obj::M
write_attribute(g, name_type_attr_matlab, obj.class)
write_attribute(g, object_decode_attr_matlab, UInt32(2))
all_keys = collect(keys(obj))
write_attribute(g, "MATLAB_fields", HDF5.VLen(all_keys))
_write_struct_fields(mfile, g, all_keys)
for (ki, vi) in zip(all_keys, values(obj))
m_write(mfile, g, ki, vi)
end
Expand Down Expand Up @@ -744,7 +780,7 @@ function m_write(mfile::MatlabHDF5File, parent::HDF5Parent, name::String, k::Vec
for i = 1:length(k)
m_write(mfile, g, k[i], v[i])
end
write_attribute(g, "MATLAB_fields", HDF5.VLen(k))
_write_struct_fields(mfile, g, k)
finally
close(g)
end
Expand Down
17 changes: 11 additions & 6 deletions src/MAT_types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,8 @@ function Base.:(==)(m1::MatlabStructArray{N}, m2::MatlabStructArray{N}) where {N
end

function Base.isapprox(m1::MatlabStructArray, m2::MatlabStructArray; kwargs...)
return isequal(m1.class, m2.class) && isequal(m1.names, m2.names) && isapprox(m1.values, m2.values; kwargs...)
return isequal(m1.class, m2.class) && issetequal(m1.names, m2.names) &&
key_based_isapprox(m1.names, m1, m2; kwargs...)
end

function find_index(m::MatlabStructArray, s::AbstractString)
Expand Down Expand Up @@ -413,14 +414,18 @@ function Base.isapprox(m1::MatlabOpaque, m2::MatlabOpaque; kwargs...)
end

function dict_isapprox(d1::AbstractDict{T}, d2::AbstractDict{T}; kwargs...) where T
keys(d1) == keys(d2) || return false
for k in keys(d1)
v1, v2 = d1[k], d2[k]
value_isapprox(v1, v2) || return false
issetequal(keys(d1), keys(d2)) || return false
return key_based_isapprox(keys(d1), d1, d2; kwargs...)
end
dict_isapprox(d1::AbstractDict{T1}, d2::AbstractDict{T2}; kwargs...) where {T1,T2} = false

function key_based_isapprox(keys, collection1, collection2; kwargs...)
for k in keys
v1, v2 = collection1[k], collection2[k]
value_isapprox(v1, v2; kwargs...) || return false
end
return true
end
dict_isapprox(d1::AbstractDict{T1}, d2::AbstractDict{T2}; kwargs...) where {T1,T2} = false

value_isapprox(x1::AbstractString, x2::AbstractString; kwargs...) = isequal(x1, x2)
value_isapprox(x1, x2; kwargs...) = isapprox(x1, x2; kwargs...)
Expand Down
2 changes: 2 additions & 0 deletions test/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ using Dates
@test isapprox(MatlabStructArray(["a"], [[0.1, 0.2]]), MatlabStructArray(["a"], [[0.1+eps(0.1), 0.2]]))
@test !isapprox(MatlabStructArray(["a"], [[0.1, 0.2]]), MatlabStructArray(["b"], [[0.1, 0.2]]))
@test !isapprox(MatlabStructArray(["a"], [[0.1, 0.2]]), MatlabStructArray(["a"], [[0.11, 0.2]]))
# name order shouldn't matter in isapprox
@test isapprox(MatlabStructArray(["a", "b"], [[0.1], [0.2]]), MatlabStructArray(["b", "a"], [[0.2], [0.1]]))

# empty struct array constructor
s_arr = MatlabStructArray(["x", "y"], (0,1))
Expand Down
36 changes: 28 additions & 8 deletions test/write.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ using SparseArrays, LinearAlgebra

tmpfile = string(tempname(), ".mat")

function test_write(data; kwargs...)
function test_write_data(data; approx = false, kwargs...)
matwrite(tmpfile, data; kwargs...)

fid = matopen(tmpfile, "r")
Expand All @@ -14,18 +14,22 @@ function test_write(data; kwargs...)
close(fid)
end

@test isequal(result, data)
if approx
@test MAT.MAT_types.dict_isapprox(result, data)
else
@test isequal(result, data)
end
end

function test_write(data)
test_write(data; compress = false)
test_write(data; compress = true)
function test_write(data; kwargs...)
test_write_data(data; compress = false, kwargs...)
test_write_data(data; compress = true, kwargs...)
end

function test_compression_effective(data)
test_write(data; compress = false)
test_write_data(data; compress = false)
sizeUncompressed = stat(tmpfile).size
test_write(data; compress = true)
test_write_data(data; compress = true)
sizeCompressed = stat(tmpfile).size

if sizeCompressed >= sizeUncompressed
Expand Down Expand Up @@ -92,13 +96,19 @@ test_write(Dict(
"cell" => Any[1 2.01 "string" Any["string1" "string2"]]
))

s3 = Dict()
for i in 1:526
s3["field$i"] = i
end

test_write(Dict(
"s" => Dict(
"a" => 1.0,
"b" => [1.0 2.0],
"c" => [1.0 2.0 3.0]
),
"s2" => Dict("a" => [1.0 2.0])
"s2" => Dict("a" => [1.0 2.0]),
"s3" => s3
))

test_write(Dict(
Expand Down Expand Up @@ -213,6 +223,16 @@ end
matwrite(tmpfile, Dict("class_array" => carr))
carr_read = matread(tmpfile)["class_array"]
@test carr_read == MatlabStructArray(carr)

s_large1 = Dict()
s_large2 = Dict()
for i in 1:600
s_large1["field$i"] = i
s_large2["field$i"] = i + 1000
end
sarr = Dict{String, Any}[s_large1, s_large2]
# note: name/key order doesn't seem to be preserved for some reason
test_write(Dict{String,Any}("s_array_large" => MatlabStructArray(sarr)); approx=true)
end

@testset "MatlabOpaque simple" begin
Expand Down
Loading