diff --git a/CHANGES.rst b/CHANGES.rst index 9545eb06..30a15dbd 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,9 @@ 4.4 (unreleased) ================ +- The dada reader now supports reading Meerkat MKBF files, which store + data in heaps of 256 samples each (with the samples along the + fastest axis). [#542] 4.3.1 (unreleased) ================== diff --git a/baseband/dada/header.py b/baseband/dada/header.py index 67818187..819fcde4 100644 --- a/baseband/dada/header.py +++ b/baseband/dada/header.py @@ -62,46 +62,45 @@ class DADAHeader(OrderedDict): 'time') """Properties accessible/usable in initialisation for all headers.""" - _defaults = [('HEADER', 'DADA'), - ('HDR_VERSION', '1.0'), - ('HDR_SIZE', 4096), - ('DADA_VERSION', '1.0'), - ('OBS_ID', 'unset'), - ('PRIMARY', 'unset'), - ('SECONDARY', 'unset'), - ('FILE_NAME', 'unset'), - ('FILE_NUMBER', 0), - ('FILE_SIZE', 0), - ('OBS_OFFSET', 0), - ('OBS_OVERLAP', 0), - ('SOURCE', 'unset'), - ('TELESCOPE', 'unset'), - ('INSTRUMENT', 'unset'), - ('RECEIVER', 'unset'), - ('NBIT', 8), - ('NDIM', 1), - ('NPOL', 1), - ('NCHAN', 1), - ('RESOLUTION', 1), - ('DSB', 1)] + _defaults = { + 'HEADER': 'DADA', + 'HDR_VERSION': '1.0', + 'HDR_SIZE': 4096, + 'DADA_VERSION': '1.0', + 'OBS_ID': 'unset', + 'PRIMARY': 'unset', + 'SECONDARY': 'unset', + 'FILE_NAME': 'unset', + 'FILE_NUMBER': 0, + 'FILE_SIZE': 0, + 'OBS_OFFSET': 0, + 'OBS_OVERLAP': 0, + 'SOURCE': 'unset', + 'TELESCOPE': 'unset', + 'INSTRUMENT': 'unset', + 'RECEIVER': 'unset', + 'NBIT': 8, + 'NDIM': 1, + 'NPOL': 1, + 'NCHAN': 1, + 'RESOLUTION': 1, + 'DSB': 1, + } def __init__(self, *args, verify=True, mutable=True, **kwargs): self.mutable = True self.comments = {} if len(args) == 1 and isinstance(args[0], str): - args = (self._fromlines(args[0].split('\n')),) - - super().__init__(*args, **kwargs) + super().__init__(**self._fromlines(args[0].split('\n')), **kwargs) + else: + super().__init__(*args, **kwargs) self.mutable = mutable if verify and (args or kwargs): self.verify() def verify(self): """Basic check of integrity.""" - assert self['HEADER'] == 'DADA' - assert all(key in self for key in ('HDR_VERSION', - 'HDR_SIZE', - 'DADA_VERSION')) + assert len(set(self.keys()).intersection(self._defaults.keys())) > 10 def copy(self): """Create a mutable and independent copy of the header.""" @@ -118,7 +117,7 @@ def __copy__(self): @staticmethod def _fromlines(lines): """Interpret a list of lines as a header, converting its values.""" - args = [] + kwargs = {} for line_no, line in enumerate(lines): split = line.strip().split('#') comment = split[1].strip() if (len(split) > 1 @@ -135,9 +134,9 @@ def _fromlines(lines): elif key in ('FREQ', 'BW', 'TSAMP'): value = float(value) - args.append((key, (value, comment))) + kwargs[key] = (value, comment) - return args + return kwargs def _tolines(self): """Write header to a list of strings.""" @@ -198,7 +197,7 @@ def fromfile(cls, fh, verify=True): else: fh.seek(start_pos + hdr_size) - return cls(cls._fromlines(lines), verify=verify, mutable=False) + return cls(**cls._fromlines(lines), verify=verify, mutable=False) def tofile(self, fh): """Write DADA file header to filehandle. @@ -248,7 +247,7 @@ def fromvalues(cls, **kwargs): Furthermore, some header defaults are set in ``DADAHeader._defaults``. """ - self = cls(cls._defaults, verify=False) + self = cls(**cls._defaults, verify=False) self.update(**kwargs) return self @@ -400,11 +399,16 @@ def offset(self, offset): @property def start_time(self): """Start time of the observation.""" - mjd_int, frac = self['MJD_START'].split('.') - mjd_int = int(mjd_int) - frac = float('.' + frac) - # replace '-' between date and time with a 'T' and convert to Time - return Time(mjd_int, frac, scale='utc', format='mjd') + if "MJD_START" in self: + mjd_int, frac = self['MJD_START'].split('.') + mjd_int = int(mjd_int) + frac = float('.' + frac) + return Time(mjd_int, frac, scale='utc', format='mjd') + else: + # replace '-' between date and time with a 'T' and convert to Time + t0 = self['UTC_START'] + t0 = t0[:10] + 'T' + t0[11:] + return Time(t0, scale='utc', format='isot') @start_time.setter def start_time(self, start_time): diff --git a/baseband/dada/payload.py b/baseband/dada/payload.py index 0fd92b12..2c4f6fb1 100644 --- a/baseband/dada/payload.py +++ b/baseband/dada/payload.py @@ -7,7 +7,7 @@ from ..base.payload import PayloadBase -__all__ = ['DADAPayload'] +__all__ = ['DADAPayload', "MKBFPayload"] def decode_8bit(words): @@ -43,3 +43,47 @@ class DADAPayload(PayloadBase): 8: encode_8bit} _memmap = True _sample_shape_maker = namedtuple('SampleShape', 'npol, nchan') + + def __new__(cls, words, *, header=None, **kwargs): + # Override instrument if header is given. + if header is not None and header.get("INSTRUMENT") == "MKBF": + cls = MKBFPayload + return super().__new__(cls) + + +class MKBFPayload(DADAPayload): + """Container for decoding and encoding MKBF DADA payloads. + + Subclass of `~baseband.dada.DADAPayload` that takes into account + that the samples are organized in heaps of 256 samples, with order + (nheap, npol, nchan, nsub=256). + + Some information on the instrument writing it can be found in + `Van der Byl et al. 2021 `_ + + """ + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # Change view of words to indicate one element containts 256 + # samples for each part of the sample_shape (this also ensures + # bpw=bits-per-word is correct in _item_to_slices). + self._dtype_word = np.dtype( + [("heaps", + [("real", f"int{self.bps}"), ("imag", f"int{self.bps}")], + self.sample_shape + (256,))]) + self.words = self.words.view(self._dtype_word) + + def _decode(self, words, data_slice=()): + words = np.moveaxis(words["heaps"], -1, 1).ravel().reshape( + -1, *self.sample_shape)[data_slice] + return super()._decode(words) + + def _encode(self, data): + data = np.moveaxis(data.reshape(-1, 256, *self.sample_shape), 1, -1) + return super()._encode(data) + + def __getitem__(self, item=()): + words_slice, data_slice = self._item_to_slices(item) + return self._decode(self.words[words_slice], data_slice) + + data = property(__getitem__, doc="Full decoded payload.") diff --git a/baseband/dada/tests/test_dada.py b/baseband/dada/tests/test_dada.py index 65eb802c..ee248a30 100644 --- a/baseband/dada/tests/test_dada.py +++ b/baseband/dada/tests/test_dada.py @@ -6,11 +6,14 @@ import pytest import numpy as np import astropy.units as u +from numpy.testing import assert_array_equal from astropy.time import Time from ... import dada from ...helpers import sequentialfile as sf from ..base import DADAFileNameSequencer -from ...data import SAMPLE_DADA as SAMPLE_FILE, SAMPLE_MEERKAT_DADA +from ..payload import decode_8bit +from ...data import ( + SAMPLE_DADA as SAMPLE_FILE, SAMPLE_MEERKAT_DADA, SAMPLE_MKBF_DADA) class TestDADA: @@ -810,3 +813,64 @@ def test_meerkat_data(): with dada.open(SAMPLE_MEERKAT_DADA, 'rs') as fh: data = fh.read() assert data.shape == (16384 - 4096 // 2, 2) + + +class TestMKBF: + def test_header(self): + # Check that header can be read and time is correct. + with dada.open(SAMPLE_MKBF_DADA, 'rb') as fh: + header = fh.read_header() + assert header.sample_shape == (2, 1024) + assert header["NPOL"] == 2 + assert header["NCHAN"] == 1024 + assert header.start_time == Time("2023-07-19T15:24:04") + + def test_data(self): + with dada.open(SAMPLE_MKBF_DADA, 'rs') as fh: + data = fh.read() + fh.seek(10) + d10 = fh.read(1) + assert_array_equal(d10, data[10:11]) + + with open(SAMPLE_MKBF_DADA, 'rb') as fh: + fh.seek(4096) + raw_words = np.frombuffer(fh.read(-1), dtype="u1") + + pd = decode_8bit(raw_words).view("c8").reshape(2, 1024, 256) + check = np.moveaxis(pd, -1, 0).reshape(data.shape) + assert_array_equal(check, data) + + @pytest.mark.parametrize("nheap", [1, 3, 6]) + def test_writing(self, nheap, tmpdir): + with dada.open(SAMPLE_MKBF_DADA, 'rs') as fh: + header = fh.header0 + data = fh.read() + test_file = str(tmpdir.join('test_mkbf.dada')) + # swap real and imaginary. + other_data = data.view("f4")[..., ::-1].copy().view("c8") + assert not np.all(other_data == data) + new_header = header.copy() + new_header.payload_nbytes *= nheap + with dada.open(test_file, "ws", header0=new_header) as fw: + fw.write(data) + fw.write(other_data) + fw.write(other_data[:200]) + fw.write(data[200:]) + fw.write(np.concatenate([data, other_data, data])) + + with dada.open(test_file, "rs") as fr: + assert fr.header0 == new_header + out = fr.read() + if nheap == 6: + assert fr._last_header == new_header + else: + assert fr._last_header != new_header + + assert out.shape == (6*256, 2, 1024) + assert_array_equal(out[:256], data) + assert_array_equal(out[256:512], other_data) + assert_array_equal(out[512:712], other_data[:200]) + assert_array_equal(out[712:768], data[200:]) + assert_array_equal(out[768:1024], data) + assert_array_equal(out[1024:1280], other_data) + assert_array_equal(out[1280:], data) diff --git a/baseband/data/__init__.py b/baseband/data/__init__.py index 78af79e8..f9a29785 100644 --- a/baseband/data/__init__.py +++ b/baseband/data/__init__.py @@ -112,6 +112,13 @@ def _full_path(name, dirname=_path.dirname(_path.abspath(__file__))): with many ctrl-0. """ +SAMPLE_MKBF_DADA = _full_path('sample_mkbf.dada') +"""DADA sample in custom Meerkat MKBF format, adapted to shortened size. + +The sample does not have the standard "HEADER" keyword, and stores data +in chunks of 256 samples (hardcoded, not inferable from header). +""" + SAMPLE_PUPPI = _full_path('sample_puppi.raw') """GUPPI/PUPPI sample, npol=2, nchan=4. diff --git a/baseband/data/sample_mkbf.dada b/baseband/data/sample_mkbf.dada new file mode 100644 index 00000000..0c645285 Binary files /dev/null and b/baseband/data/sample_mkbf.dada differ diff --git a/docs/dada/index.rst b/docs/dada/index.rst index 17074f53..2bc108ea 100644 --- a/docs/dada/index.rst +++ b/docs/dada/index.rst @@ -11,6 +11,12 @@ single :term:`data frame` consisting of an ASCII :term:`header` of typically `_ and actual usage; files are described by an :ref:`ASCII header `. +.. note:: + Since the specification is also by actual usage, some files are not + immediately readable (e.g., in Meerkat's MKBF files, the samples are not + stored consecutively, but in heaps). Please raise an issue if you + encounter problems, ideally with links to format definitions. + .. _dada_usage: Usage