Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
4.4 (unreleased)
================

- The dada reader now supports reading Meerkat MKBF files, which store
data in heaps of 256 samples each (with the samples along the
fastest axis). [#542]

4.3.1 (unreleased)
==================
Expand Down
82 changes: 43 additions & 39 deletions baseband/dada/header.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,46 +62,45 @@ class DADAHeader(OrderedDict):
'time')
"""Properties accessible/usable in initialisation for all headers."""

_defaults = [('HEADER', 'DADA'),
('HDR_VERSION', '1.0'),
('HDR_SIZE', 4096),
('DADA_VERSION', '1.0'),
('OBS_ID', 'unset'),
('PRIMARY', 'unset'),
('SECONDARY', 'unset'),
('FILE_NAME', 'unset'),
('FILE_NUMBER', 0),
('FILE_SIZE', 0),
('OBS_OFFSET', 0),
('OBS_OVERLAP', 0),
('SOURCE', 'unset'),
('TELESCOPE', 'unset'),
('INSTRUMENT', 'unset'),
('RECEIVER', 'unset'),
('NBIT', 8),
('NDIM', 1),
('NPOL', 1),
('NCHAN', 1),
('RESOLUTION', 1),
('DSB', 1)]
_defaults = {
'HEADER': 'DADA',
'HDR_VERSION': '1.0',
'HDR_SIZE': 4096,
'DADA_VERSION': '1.0',
'OBS_ID': 'unset',
'PRIMARY': 'unset',
'SECONDARY': 'unset',
'FILE_NAME': 'unset',
'FILE_NUMBER': 0,
'FILE_SIZE': 0,
'OBS_OFFSET': 0,
'OBS_OVERLAP': 0,
'SOURCE': 'unset',
'TELESCOPE': 'unset',
'INSTRUMENT': 'unset',
'RECEIVER': 'unset',
'NBIT': 8,
'NDIM': 1,
'NPOL': 1,
'NCHAN': 1,
'RESOLUTION': 1,
'DSB': 1,
}

def __init__(self, *args, verify=True, mutable=True, **kwargs):
self.mutable = True
self.comments = {}
if len(args) == 1 and isinstance(args[0], str):
args = (self._fromlines(args[0].split('\n')),)

super().__init__(*args, **kwargs)
super().__init__(**self._fromlines(args[0].split('\n')), **kwargs)
else:
super().__init__(*args, **kwargs)
self.mutable = mutable
if verify and (args or kwargs):
self.verify()

def verify(self):
"""Basic check of integrity."""
assert self['HEADER'] == 'DADA'
assert all(key in self for key in ('HDR_VERSION',
'HDR_SIZE',
'DADA_VERSION'))
assert len(set(self.keys()).intersection(self._defaults.keys())) > 10

def copy(self):
"""Create a mutable and independent copy of the header."""
Expand All @@ -118,7 +117,7 @@ def __copy__(self):
@staticmethod
def _fromlines(lines):
"""Interpret a list of lines as a header, converting its values."""
args = []
kwargs = {}
for line_no, line in enumerate(lines):
split = line.strip().split('#')
comment = split[1].strip() if (len(split) > 1
Expand All @@ -135,9 +134,9 @@ def _fromlines(lines):
elif key in ('FREQ', 'BW', 'TSAMP'):
value = float(value)

args.append((key, (value, comment)))
kwargs[key] = (value, comment)

return args
return kwargs

def _tolines(self):
"""Write header to a list of strings."""
Expand Down Expand Up @@ -198,7 +197,7 @@ def fromfile(cls, fh, verify=True):
else:
fh.seek(start_pos + hdr_size)

return cls(cls._fromlines(lines), verify=verify, mutable=False)
return cls(**cls._fromlines(lines), verify=verify, mutable=False)

def tofile(self, fh):
"""Write DADA file header to filehandle.
Expand Down Expand Up @@ -248,7 +247,7 @@ def fromvalues(cls, **kwargs):

Furthermore, some header defaults are set in ``DADAHeader._defaults``.
"""
self = cls(cls._defaults, verify=False)
self = cls(**cls._defaults, verify=False)
self.update(**kwargs)
return self

Expand Down Expand Up @@ -400,11 +399,16 @@ def offset(self, offset):
@property
def start_time(self):
"""Start time of the observation."""
mjd_int, frac = self['MJD_START'].split('.')
mjd_int = int(mjd_int)
frac = float('.' + frac)
# replace '-' between date and time with a 'T' and convert to Time
return Time(mjd_int, frac, scale='utc', format='mjd')
if "MJD_START" in self:
mjd_int, frac = self['MJD_START'].split('.')
mjd_int = int(mjd_int)
frac = float('.' + frac)
return Time(mjd_int, frac, scale='utc', format='mjd')
else:
# replace '-' between date and time with a 'T' and convert to Time
t0 = self['UTC_START']
t0 = t0[:10] + 'T' + t0[11:]
return Time(t0, scale='utc', format='isot')

@start_time.setter
def start_time(self, start_time):
Expand Down
46 changes: 45 additions & 1 deletion baseband/dada/payload.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from ..base.payload import PayloadBase


__all__ = ['DADAPayload']
__all__ = ['DADAPayload', "MKBFPayload"]


def decode_8bit(words):
Expand Down Expand Up @@ -43,3 +43,47 @@ class DADAPayload(PayloadBase):
8: encode_8bit}
_memmap = True
_sample_shape_maker = namedtuple('SampleShape', 'npol, nchan')

def __new__(cls, words, *, header=None, **kwargs):
# Override instrument if header is given.
if header is not None and header.get("INSTRUMENT") == "MKBF":
cls = MKBFPayload
return super().__new__(cls)


class MKBFPayload(DADAPayload):
"""Container for decoding and encoding MKBF DADA payloads.

Subclass of `~baseband.dada.DADAPayload` that takes into account
that the samples are organized in heaps of 256 samples, with order
(nheap, npol, nchan, nsub=256).

Some information on the instrument writing it can be found in
`Van der Byl et al. 2021 <https://doi.org/10.1117/1.JATIS.8.1.011006>`_

"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Change view of words to indicate one element containts 256
# samples for each part of the sample_shape (this also ensures
# bpw=bits-per-word is correct in _item_to_slices).
self._dtype_word = np.dtype(
[("heaps",
[("real", f"int{self.bps}"), ("imag", f"int{self.bps}")],
self.sample_shape + (256,))])
self.words = self.words.view(self._dtype_word)

def _decode(self, words, data_slice=()):
words = np.moveaxis(words["heaps"], -1, 1).ravel().reshape(
-1, *self.sample_shape)[data_slice]
return super()._decode(words)

def _encode(self, data):
data = np.moveaxis(data.reshape(-1, 256, *self.sample_shape), 1, -1)
return super()._encode(data)

def __getitem__(self, item=()):
words_slice, data_slice = self._item_to_slices(item)
return self._decode(self.words[words_slice], data_slice)

data = property(__getitem__, doc="Full decoded payload.")
66 changes: 65 additions & 1 deletion baseband/dada/tests/test_dada.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@
import pytest
import numpy as np
import astropy.units as u
from numpy.testing import assert_array_equal
from astropy.time import Time
from ... import dada
from ...helpers import sequentialfile as sf
from ..base import DADAFileNameSequencer
from ...data import SAMPLE_DADA as SAMPLE_FILE, SAMPLE_MEERKAT_DADA
from ..payload import decode_8bit
from ...data import (
SAMPLE_DADA as SAMPLE_FILE, SAMPLE_MEERKAT_DADA, SAMPLE_MKBF_DADA)


class TestDADA:
Expand Down Expand Up @@ -810,3 +813,64 @@ def test_meerkat_data():
with dada.open(SAMPLE_MEERKAT_DADA, 'rs') as fh:
data = fh.read()
assert data.shape == (16384 - 4096 // 2, 2)


class TestMKBF:
def test_header(self):
# Check that header can be read and time is correct.
with dada.open(SAMPLE_MKBF_DADA, 'rb') as fh:
header = fh.read_header()
assert header.sample_shape == (2, 1024)
assert header["NPOL"] == 2
assert header["NCHAN"] == 1024
assert header.start_time == Time("2023-07-19T15:24:04")

def test_data(self):
with dada.open(SAMPLE_MKBF_DADA, 'rs') as fh:
data = fh.read()
fh.seek(10)
d10 = fh.read(1)
assert_array_equal(d10, data[10:11])

with open(SAMPLE_MKBF_DADA, 'rb') as fh:
fh.seek(4096)
raw_words = np.frombuffer(fh.read(-1), dtype="u1")

pd = decode_8bit(raw_words).view("c8").reshape(2, 1024, 256)
check = np.moveaxis(pd, -1, 0).reshape(data.shape)
assert_array_equal(check, data)

@pytest.mark.parametrize("nheap", [1, 3, 6])
def test_writing(self, nheap, tmpdir):
with dada.open(SAMPLE_MKBF_DADA, 'rs') as fh:
header = fh.header0
data = fh.read()
test_file = str(tmpdir.join('test_mkbf.dada'))
# swap real and imaginary.
other_data = data.view("f4")[..., ::-1].copy().view("c8")
assert not np.all(other_data == data)
new_header = header.copy()
new_header.payload_nbytes *= nheap
with dada.open(test_file, "ws", header0=new_header) as fw:
fw.write(data)
fw.write(other_data)
fw.write(other_data[:200])
fw.write(data[200:])
fw.write(np.concatenate([data, other_data, data]))

with dada.open(test_file, "rs") as fr:
assert fr.header0 == new_header
out = fr.read()
if nheap == 6:
assert fr._last_header == new_header
else:
assert fr._last_header != new_header

assert out.shape == (6*256, 2, 1024)
assert_array_equal(out[:256], data)
assert_array_equal(out[256:512], other_data)
assert_array_equal(out[512:712], other_data[:200])
assert_array_equal(out[712:768], data[200:])
assert_array_equal(out[768:1024], data)
assert_array_equal(out[1024:1280], other_data)
assert_array_equal(out[1280:], data)
7 changes: 7 additions & 0 deletions baseband/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,13 @@ def _full_path(name, dirname=_path.dirname(_path.abspath(__file__))):
with many ctrl-0.
"""

SAMPLE_MKBF_DADA = _full_path('sample_mkbf.dada')
"""DADA sample in custom Meerkat MKBF format, adapted to shortened size.

The sample does not have the standard "HEADER" keyword, and stores data
in chunks of 256 samples (hardcoded, not inferable from header).
"""

SAMPLE_PUPPI = _full_path('sample_puppi.raw')
"""GUPPI/PUPPI sample, npol=2, nchan=4.

Expand Down
Binary file added baseband/data/sample_mkbf.dada
Binary file not shown.
6 changes: 6 additions & 0 deletions docs/dada/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ single :term:`data frame` consisting of an ASCII :term:`header` of typically
<http://psrdada.sourceforge.net/manuals/Specification.pdf>`_ and
actual usage; files are described by an :ref:`ASCII header <dada_header>`.

.. note::
Since the specification is also by actual usage, some files are not
immediately readable (e.g., in Meerkat's MKBF files, the samples are not
stored consecutively, but in heaps). Please raise an issue if you
encounter problems, ideally with links to format definitions.

.. _dada_usage:

Usage
Expand Down