Skip to content

Commit dc197b6

Browse files
authored
Merge pull request #644 from benjeffery/top-level-metadata-python
Add tree sequence metadata and schema to low-level python
2 parents 0280623 + a042938 commit dc197b6

File tree

2 files changed

+168
-48
lines changed

2 files changed

+168
-48
lines changed

python/_tskitmodule.c

Lines changed: 114 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1786,6 +1786,24 @@ parse_table_collection_dict(tsk_table_collection_t *tables, PyObject *tables_dic
17861786
return ret;
17871787
}
17881788

1789+
static const char *
1790+
parse_metadata_schema_arg(PyObject *arg, Py_ssize_t* metadata_schema_length)
1791+
{
1792+
const char *ret = NULL;
1793+
if (arg == NULL) {
1794+
PyErr_Format(
1795+
PyExc_AttributeError,
1796+
"Cannot del metadata_schema, set to empty string (\"\") to clear.");
1797+
goto out;
1798+
}
1799+
ret = PyUnicode_AsUTF8AndSize(arg, metadata_schema_length);
1800+
if (ret == NULL) {
1801+
goto out;
1802+
}
1803+
out:
1804+
return ret;
1805+
}
1806+
17891807
static int
17901808
write_table_arrays(tsk_table_collection_t *tables, PyObject *dict)
17911809
{
@@ -2519,16 +2537,10 @@ IndividualTable_set_metadata_schema(IndividualTable *self, PyObject *arg, void *
25192537
const char *metadata_schema;
25202538
Py_ssize_t metadata_schema_length;
25212539

2522-
if (arg == NULL) {
2523-
PyErr_Format(
2524-
PyExc_AttributeError,
2525-
"Cannot del metadata_schema, set to empty string (\"\") to clear.");
2526-
goto out;
2527-
}
25282540
if (IndividualTable_check_state(self) != 0) {
25292541
goto out;
25302542
}
2531-
metadata_schema = PyUnicode_AsUTF8AndSize(arg, &metadata_schema_length);
2543+
metadata_schema = parse_metadata_schema_arg(arg, &metadata_schema_length);
25322544
if (metadata_schema == NULL) {
25332545
goto out;
25342546
}
@@ -2995,16 +3007,10 @@ NodeTable_set_metadata_schema(NodeTable *self, PyObject *arg, void *closure)
29953007
const char *metadata_schema;
29963008
Py_ssize_t metadata_schema_length;
29973009

2998-
if (arg == NULL) {
2999-
PyErr_Format(
3000-
PyExc_AttributeError,
3001-
"Cannot del metadata_schema, set to empty string (\"\") to clear.");
3002-
goto out;
3003-
}
30043010
if (NodeTable_check_state(self) != 0) {
30053011
goto out;
30063012
}
3007-
metadata_schema = PyUnicode_AsUTF8AndSize(arg, &metadata_schema_length);
3013+
metadata_schema = parse_metadata_schema_arg(arg, &metadata_schema_length);
30083014
if (metadata_schema == NULL) {
30093015
goto out;
30103016
}
@@ -3488,15 +3494,10 @@ EdgeTable_set_metadata_schema(EdgeTable *self, PyObject *arg, void *closure)
34883494
const char *metadata_schema;
34893495
Py_ssize_t metadata_schema_length;
34903496

3491-
if (arg == NULL) {
3492-
PyErr_Format(PyExc_AttributeError,
3493-
"Cannot del metadata_schema, set to empty string (\"\") to clear.");
3494-
goto out;
3495-
}
34963497
if (EdgeTable_check_state(self) != 0) {
34973498
goto out;
34983499
}
3499-
metadata_schema = PyUnicode_AsUTF8AndSize(arg, &metadata_schema_length);
3500+
metadata_schema = parse_metadata_schema_arg(arg, &metadata_schema_length);
35003501
if (metadata_schema == NULL) {
35013502
goto out;
35023503
}
@@ -3989,16 +3990,10 @@ MigrationTable_set_metadata_schema(MigrationTable *self, PyObject *arg, void *cl
39893990
const char *metadata_schema;
39903991
Py_ssize_t metadata_schema_length;
39913992

3992-
if (arg == NULL) {
3993-
PyErr_Format(
3994-
PyExc_AttributeError,
3995-
"Cannot del metadata_schema, set to empty string (\"\") to clear.");
3996-
goto out;
3997-
}
39983993
if (MigrationTable_check_state(self) != 0) {
39993994
goto out;
40003995
}
4001-
metadata_schema = PyUnicode_AsUTF8AndSize(arg, &metadata_schema_length);
3996+
metadata_schema = parse_metadata_schema_arg(arg, &metadata_schema_length);
40023997
if (metadata_schema == NULL) {
40033998
goto out;
40043999
}
@@ -4453,16 +4448,10 @@ SiteTable_set_metadata_schema(SiteTable *self, PyObject *arg, void *closure)
44534448
const char *metadata_schema;
44544449
Py_ssize_t metadata_schema_length;
44554450

4456-
if (arg == NULL) {
4457-
PyErr_Format(
4458-
PyExc_AttributeError,
4459-
"Cannot del metadata_schema, set to empty string (\"\") to clear.");
4460-
goto out;
4461-
}
44624451
if (SiteTable_check_state(self) != 0) {
44634452
goto out;
44644453
}
4465-
metadata_schema = PyUnicode_AsUTF8AndSize(arg, &metadata_schema_length);
4454+
metadata_schema = parse_metadata_schema_arg(arg, &metadata_schema_length);
44664455
if (metadata_schema == NULL) {
44674456
goto out;
44684457
}
@@ -4956,16 +4945,10 @@ MutationTable_set_metadata_schema(MutationTable *self, PyObject *arg, void *clos
49564945
const char *metadata_schema;
49574946
Py_ssize_t metadata_schema_length;
49584947

4959-
if (arg == NULL) {
4960-
PyErr_Format(
4961-
PyExc_AttributeError,
4962-
"Cannot del metadata_schema, set to empty string (\"\") to clear.");
4963-
goto out;
4964-
}
49654948
if (MutationTable_check_state(self) != 0) {
49664949
goto out;
49674950
}
4968-
metadata_schema = PyUnicode_AsUTF8AndSize(arg, &metadata_schema_length);
4951+
metadata_schema = parse_metadata_schema_arg(arg, &metadata_schema_length);
49694952
if (metadata_schema == NULL) {
49704953
goto out;
49714954
}
@@ -5375,16 +5358,10 @@ PopulationTable_set_metadata_schema(PopulationTable *self, PyObject *arg, void *
53755358
const char *metadata_schema;
53765359
Py_ssize_t metadata_schema_length;
53775360

5378-
if (arg == NULL) {
5379-
PyErr_Format(
5380-
PyExc_AttributeError,
5381-
"Cannot del metadata_schema, set to empty string (\"\") to clear.");
5382-
goto out;
5383-
}
53845361
if (PopulationTable_check_state(self) != 0) {
53855362
goto out;
53865363
}
5387-
metadata_schema = PyUnicode_AsUTF8AndSize(arg, &metadata_schema_length);
5364+
metadata_schema = parse_metadata_schema_arg(arg, &metadata_schema_length);
53885365
if (metadata_schema == NULL) {
53895366
goto out;
53905367
}
@@ -6083,6 +6060,71 @@ TableCollection_get_file_uuid(TableCollection *self, void *closure)
60836060
return Py_BuildValue("s", self->tables->file_uuid);
60846061
}
60856062

6063+
static PyObject *
6064+
TableCollection_get_metadata(TableCollection *self, void *closure)
6065+
{
6066+
return PyBytes_FromStringAndSize(self->tables->metadata, self->tables->metadata_length);
6067+
}
6068+
6069+
static int
6070+
TableCollection_set_metadata(TableCollection *self, PyObject *arg, void *closure)
6071+
{
6072+
int ret = -1;
6073+
int err;
6074+
char *metadata;
6075+
Py_ssize_t metadata_length;
6076+
6077+
if (arg == NULL) {
6078+
PyErr_Format(
6079+
PyExc_AttributeError,
6080+
"Cannot del metadata, set to empty string (b\"\") to clear.");
6081+
goto out;
6082+
}
6083+
err = PyBytes_AsStringAndSize(arg, &metadata, &metadata_length);
6084+
if (err != 0) {
6085+
goto out;
6086+
}
6087+
err = tsk_table_collection_set_metadata(
6088+
self->tables, metadata, metadata_length);
6089+
if (err != 0) {
6090+
handle_library_error(err);
6091+
goto out;
6092+
}
6093+
ret = 0;
6094+
out:
6095+
return ret;
6096+
}
6097+
6098+
static PyObject *
6099+
TableCollection_get_metadata_schema(TableCollection *self, void *closure)
6100+
{
6101+
return make_Py_Unicode_FromStringAndLength(
6102+
self->tables->metadata_schema, self->tables->metadata_schema_length);
6103+
}
6104+
6105+
static int
6106+
TableCollection_set_metadata_schema(TableCollection *self, PyObject *arg, void *closure)
6107+
{
6108+
int ret = -1;
6109+
int err;
6110+
const char *metadata_schema;
6111+
Py_ssize_t metadata_schema_length;
6112+
6113+
metadata_schema = parse_metadata_schema_arg(arg, &metadata_schema_length);
6114+
if (metadata_schema == NULL) {
6115+
goto out;
6116+
}
6117+
err = tsk_table_collection_set_metadata_schema(
6118+
self->tables, metadata_schema, metadata_schema_length);
6119+
if (err != 0) {
6120+
handle_library_error(err);
6121+
goto out;
6122+
}
6123+
ret = 0;
6124+
out:
6125+
return ret;
6126+
}
6127+
60866128
static PyObject *
60876129
TableCollection_simplify(TableCollection *self, PyObject *args, PyObject *kwds)
60886130
{
@@ -6340,6 +6382,12 @@ static PyGetSetDef TableCollection_getsetters[] = {
63406382
(setter) TableCollection_set_sequence_length, "The sequence length."},
63416383
{"file_uuid", (getter) TableCollection_get_file_uuid, NULL,
63426384
"The UUID of the corresponding file."},
6385+
{"metadata",
6386+
(getter) TableCollection_get_metadata,
6387+
(setter) TableCollection_set_metadata, "The metadata."},
6388+
{"metadata_schema",
6389+
(getter) TableCollection_get_metadata_schema,
6390+
(setter) TableCollection_set_metadata_schema, "The metadata schema."},
63436391
{NULL} /* Sentinel */
63446392
};
63456393

@@ -6680,6 +6728,20 @@ TreeSequence_get_site(TreeSequence *self, PyObject *args)
66806728
return ret;
66816729
}
66826730

6731+
static PyObject *
6732+
TreeSequence_get_metadata(TreeSequence * self) {
6733+
return PyBytes_FromStringAndSize(
6734+
self->tree_sequence->tables->metadata,
6735+
self->tree_sequence->tables->metadata_length);
6736+
}
6737+
6738+
static PyObject *
6739+
TreeSequence_get_metadata_schema(TreeSequence * self) {
6740+
return make_Py_Unicode_FromStringAndLength(
6741+
self->tree_sequence->tables->metadata_schema,
6742+
self->tree_sequence->tables->metadata_schema_length);
6743+
}
6744+
66836745
static PyObject *
66846746
TreeSequence_get_table_metadata_schemas(TreeSequence *self) {
66856747
PyObject *ret = NULL;
@@ -8173,6 +8235,10 @@ static PyMethodDef TreeSequence_methods[] = {
81738235
METH_NOARGS, "Returns the tree breakpoints as a numpy array." },
81748236
{"get_file_uuid", (PyCFunction) TreeSequence_get_file_uuid,
81758237
METH_NOARGS, "Returns the UUID of the underlying file, if present." },
8238+
{"get_metadata", (PyCFunction) TreeSequence_get_metadata, METH_NOARGS,
8239+
"Returns the metadata for the tree sequence"},
8240+
{"get_metadata_schema", (PyCFunction) TreeSequence_get_metadata_schema, METH_NOARGS,
8241+
"Returns the metadata schema for the tree sequence metadata"},
81768242
{"get_num_sites", (PyCFunction) TreeSequence_get_num_sites,
81778243
METH_NOARGS, "Returns the number of sites" },
81788244
{"get_num_mutations", (PyCFunction) TreeSequence_get_num_mutations, METH_NOARGS,

python/tests/test_lowlevel.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,38 @@ def test_set_sequence_length(self):
197197
tables.sequence_length = value
198198
self.assertEqual(tables.sequence_length, value)
199199

200+
def test_set_metadata_errors(self):
201+
tables = _tskit.TableCollection(1)
202+
with self.assertRaises(AttributeError):
203+
del tables.metadata
204+
for bad_value in ["bytes only", 59, 43.4, None, []]:
205+
with self.assertRaises(TypeError):
206+
tables.metadata = bad_value
207+
208+
def test_set_metadata(self):
209+
tables = _tskit.TableCollection(1)
210+
self.assertEqual(tables.metadata, b"")
211+
for value in [b"foo", b"", "💩".encode(), b"null char \0 in string"]:
212+
tables.metadata = value
213+
tables.metadata_schema = "Test we have two separate fields"
214+
self.assertEqual(tables.metadata, value)
215+
216+
def test_set_metadata_schema_errors(self):
217+
tables = _tskit.TableCollection(1)
218+
with self.assertRaises(AttributeError):
219+
del tables.metadata_schema
220+
for bad_value in [59, 43.4, None, []]:
221+
with self.assertRaises(TypeError):
222+
tables.metadata_schema = bad_value
223+
224+
def test_set_metadata_schema(self):
225+
tables = _tskit.TableCollection(1)
226+
self.assertEqual(tables.metadata_schema, "")
227+
for value in ["foo", "", "💩", "null char \0 in string"]:
228+
tables.metadata_schema = value
229+
tables.metadata = b"Test we have two separate fields"
230+
self.assertEqual(tables.metadata_schema, value)
231+
200232
def test_simplify_bad_args(self):
201233
ts = msprime.simulate(10, random_seed=1)
202234
tc = ts.tables.ll_tables
@@ -478,6 +510,28 @@ def test_metadata_schemas(self):
478510
for table_name in self.metadata_tables:
479511
self.assertEqual(getattr(schemas, table_name), "")
480512

513+
def test_metadata(self):
514+
tables = _tskit.TableCollection(1)
515+
ts = _tskit.TreeSequence()
516+
ts.load_tables(tables)
517+
self.assertEqual(ts.get_metadata(), b"")
518+
for value in [b"foo", b"", "💩".encode(), b"null char \0 in string"]:
519+
tables.metadata = value
520+
ts = _tskit.TreeSequence()
521+
ts.load_tables(tables)
522+
self.assertEqual(ts.get_metadata(), value)
523+
524+
def test_metadata_schema(self):
525+
tables = _tskit.TableCollection(1)
526+
ts = _tskit.TreeSequence()
527+
ts.load_tables(tables)
528+
self.assertEqual(ts.get_metadata_schema(), "")
529+
for value in ["foo", "", "💩", "null char \0 in string"]:
530+
tables.metadata_schema = value
531+
ts = _tskit.TreeSequence()
532+
ts.load_tables(tables)
533+
self.assertEqual(ts.get_metadata_schema(), value)
534+
481535
def test_kc_distance_errors(self):
482536
ts1 = self.get_example_tree_sequence(10)
483537
self.assertRaises(TypeError, ts1.get_kc_distance)

0 commit comments

Comments
 (0)