Skip to content

Commit 1f28c11

Browse files
nspopemergify[bot]
authored andcommitted
Initial C implementation of pair_coalescence_counts
1 parent beafeba commit 1f28c11

File tree

15 files changed

+1293
-112
lines changed

15 files changed

+1293
-112
lines changed

.github/workflows/docs.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ jobs:
3838
use-mamba: true
3939

4040
- name: Cache Conda env
41-
uses: actions/cache@v3
41+
uses: actions/cache@v4
4242
with:
4343
path: ${{ env.CONDA }}/envs
4444
key: conda-${{ runner.os }}--${{ runner.arch }}--${{ hashFiles(env.REQUIREMENTS) }}-${{ env.CACHE_NUMBER }}
@@ -65,4 +65,4 @@ jobs:
6565
curl -X POST https://api.github.com/repos/tskit-dev/tskit-site/dispatches \
6666
-H 'Accept: application/vnd.github.everest-preview+json' \
6767
-u AdminBot-tskit:${{ secrets.ADMINBOT_TOKEN }} \
68-
--data '{"event_type":"build-docs"}'
68+
--data '{"event_type":"build-docs"}'

c/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
11
build
2+
.*.swp
3+
.*.swo

c/tests/test_stats.c

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,94 @@ verify_divergence_matrix(tsk_treeseq_t *ts, tsk_flags_t options)
304304
}
305305
}
306306

307+
/* Check coalescence counts against naive implementation */
308+
static void
309+
verify_pair_coalescence_counts(tsk_treeseq_t *ts, tsk_flags_t options)
310+
{
311+
int ret;
312+
const tsk_size_t n = tsk_treeseq_get_num_samples(ts);
313+
const tsk_size_t N = tsk_treeseq_get_num_nodes(ts);
314+
const tsk_size_t T = tsk_treeseq_get_num_trees(ts);
315+
const tsk_id_t *samples = tsk_treeseq_get_samples(ts);
316+
const double *breakpoints = tsk_treeseq_get_breakpoints(ts);
317+
const tsk_size_t P = 2;
318+
const tsk_size_t I = P * (P + 1) / 2;
319+
tsk_id_t sample_sets[n];
320+
tsk_size_t sample_set_sizes[P];
321+
tsk_id_t index_tuples[2 * I];
322+
tsk_id_t node_output_map[N];
323+
tsk_size_t dim = T * N * I;
324+
double C1[dim];
325+
tsk_size_t i, j, k;
326+
327+
for (i = 0; i < n; i++) {
328+
sample_sets[i] = samples[i];
329+
}
330+
331+
for (i = 0; i < P; i++) {
332+
sample_set_sizes[i] = 0;
333+
}
334+
for (j = 0; j < n; j++) {
335+
i = j / (n / P);
336+
sample_set_sizes[i]++;
337+
}
338+
339+
for (j = 0, i = 0; j < P; j++) {
340+
for (k = j; k < P; k++) {
341+
index_tuples[i++] = (tsk_id_t) j;
342+
index_tuples[i++] = (tsk_id_t) k;
343+
}
344+
}
345+
346+
for (i = 0; i < N; i++) {
347+
node_output_map[i] = (tsk_id_t) i;
348+
}
349+
350+
ret = tsk_treeseq_pair_coalescence_stat(ts, P, sample_set_sizes, sample_sets, I,
351+
index_tuples, T, breakpoints, N, node_output_map, options, C1);
352+
CU_ASSERT_EQUAL_FATAL(ret, 0);
353+
/* TODO: test against naive pairs per node per tree */
354+
355+
/* cover errors */
356+
double bad_breakpoints[2] = { breakpoints[1], 0.0 };
357+
ret = tsk_treeseq_pair_coalescence_stat(ts, P, sample_set_sizes, sample_sets, I,
358+
index_tuples, 1, bad_breakpoints, N, node_output_map, options, C1);
359+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);
360+
361+
index_tuples[0] = (tsk_id_t) P;
362+
ret = tsk_treeseq_pair_coalescence_stat(ts, P, sample_set_sizes, sample_sets, I,
363+
index_tuples, 1, breakpoints, N, node_output_map, options, C1);
364+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SAMPLE_SET_INDEX);
365+
index_tuples[0] = 0;
366+
367+
tsk_size_t tmp = sample_set_sizes[0];
368+
sample_set_sizes[0] = 0;
369+
ret = tsk_treeseq_pair_coalescence_stat(ts, P, sample_set_sizes, sample_sets, I,
370+
index_tuples, 1, breakpoints, N, node_output_map, options, C1);
371+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EMPTY_SAMPLE_SET);
372+
sample_set_sizes[0] = tmp;
373+
374+
sample_sets[1] = 0;
375+
ret = tsk_treeseq_pair_coalescence_stat(ts, P, sample_set_sizes, sample_sets, I,
376+
index_tuples, 1, breakpoints, N, node_output_map, options, C1);
377+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);
378+
sample_sets[1] = 1;
379+
380+
ret = tsk_treeseq_pair_coalescence_stat(ts, P, sample_set_sizes, sample_sets, I,
381+
index_tuples, 1, breakpoints, N - 1, node_output_map, options, C1);
382+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NODE_OUTPUT_MAP_DIM);
383+
384+
ret = tsk_treeseq_pair_coalescence_stat(ts, P, sample_set_sizes, sample_sets, I,
385+
index_tuples, 1, breakpoints, 0, node_output_map, options, C1);
386+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NODE_OUTPUT_MAP_DIM);
387+
388+
node_output_map[0] = -2;
389+
ret = tsk_treeseq_pair_coalescence_stat(ts, P, sample_set_sizes, sample_sets, I,
390+
index_tuples, 1, breakpoints, N, node_output_map, options, C1);
391+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NODE_OUTPUT_MAP);
392+
node_output_map[0] = 0;
393+
}
394+
307395
typedef struct {
308396
int call_count;
309397
int error_on;
@@ -2786,6 +2874,19 @@ test_multiroot_divergence_matrix(void)
27862874
tsk_treeseq_free(&ts);
27872875
}
27882876

2877+
static void
2878+
test_pair_coalescence_counts(void)
2879+
{
2880+
tsk_treeseq_t ts;
2881+
tsk_treeseq_from_text(&ts, 100, nonbinary_ex_nodes, nonbinary_ex_edges, NULL,
2882+
nonbinary_ex_sites, nonbinary_ex_mutations, NULL, NULL, 0);
2883+
verify_pair_coalescence_counts(&ts, TSK_STAT_NODE);
2884+
verify_pair_coalescence_counts(&ts, TSK_STAT_NODE | TSK_STAT_SPAN_NORMALISE);
2885+
verify_pair_coalescence_counts(&ts, 0);
2886+
verify_pair_coalescence_counts(&ts, TSK_STAT_SPAN_NORMALISE);
2887+
tsk_treeseq_free(&ts);
2888+
}
2889+
27892890
int
27902891
main(int argc, char **argv)
27912892
{
@@ -2884,6 +2985,8 @@ main(int argc, char **argv)
28842985
test_simplest_divergence_matrix_internal_sample },
28852986
{ "test_multiroot_divergence_matrix", test_multiroot_divergence_matrix },
28862987

2988+
{ "test_pair_coalescence_counts", test_pair_coalescence_counts },
2989+
28872990
{ NULL, NULL },
28882991
};
28892992
return test_main(tests, argc, argv);

c/tskit/core.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,14 @@ tsk_strerror_internal(int err)
484484
ret = "Insufficient weights provided (at least 1 required). "
485485
"(TSK_ERR_INSUFFICIENT_WEIGHTS)";
486486
break;
487+
case TSK_ERR_BAD_NODE_OUTPUT_MAP:
488+
ret = "Node output map contains values less than TSK_NULL. "
489+
"(TSK_ERR_BAD_NODE_OUTPUT_MAP)";
490+
break;
491+
case TSK_ERR_BAD_NODE_OUTPUT_MAP_DIM:
492+
ret = "Maximum index in node output map does not match "
493+
"output dimension. (TSK_ERR_BAD_NODE_OUTPUT_MAP_DIM)";
494+
break;
487495

488496
/* Mutation mapping errors */
489497
case TSK_ERR_GENOTYPES_ALL_MISSING:

c/tskit/core.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -694,6 +694,14 @@ not support it.
694694
Insufficient weights were provided.
695695
*/
696696
#define TSK_ERR_INSUFFICIENT_WEIGHTS -913
697+
/**
698+
The node output map contains a value less than TSK_NULL.
699+
*/
700+
#define TSK_ERR_BAD_NODE_OUTPUT_MAP -914
701+
/**
702+
Maximum index in node output map does not match output dimension.
703+
*/
704+
#define TSK_ERR_BAD_NODE_OUTPUT_MAP_DIM -915
697705
/** @} */
698706

699707
/**

0 commit comments

Comments
 (0)