Skip to content

time windows in statistics #2948

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 27 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
aeda4dc
example test for time windows
petrelharp May 9, 2024
0a30696
a little more flailing at beginning tests
petrelharp May 17, 2024
1a988c0
first simple test for AFS in branch mode with time windows
tforest Jun 18, 2024
18ffda0
AFS non-naive version with time windows + Some tests
tforest Jul 15, 2024
c6f9562
Improve AFS branch mode
tforest Jul 18, 2024
7a3149f
fix AFS tests
tforest Jul 18, 2024
71da7ad
Fix naive branch general stat
tforest Jul 22, 2024
2a44909
intermediate changes to C AFS implementation with time windows
tforest Aug 23, 2024
4460db9
tw afs C implementation
tforest Sep 23, 2024
bbec6a9
Better dimension drop with time windows
tforest Oct 11, 2024
da5f205
AFS branch mode with time windows to review
tforest Oct 21, 2024
6b3ab4f
Adapting some tests with new time_windows parameter
tforest Oct 30, 2024
f2d857b
time_windows addition after code rebase
tforest Oct 30, 2024
b8f4ba5
Fix some tests failing because of time_windows
tforest Oct 31, 2024
c9f6c06
Remove unused parameters in afs function
tforest Oct 31, 2024
59ea266
Fixing a memory issue in tsk_treeseq_update_branch_afs
tforest Oct 31, 2024
96ac0ce
Fixing some time_windows issues with AFS
tforest Nov 19, 2024
26a7f09
Adjusting some stats tests using time_windows
tforest Nov 19, 2024
8a8c05b
Adjusting some tests trying to pass codecov
tforest Dec 11, 2024
0d48891
Updating naive_branch_... and branch_allele_freq... in tests
tforest Jan 18, 2025
8abdd6e
disable drop dimension for branch afs
tforest Apr 9, 2025
783a63b
Merge branch 'tskit-dev:main' into time_windows
tforest Apr 9, 2025
53a4a5b
Update tskit to 0.6.2 / branch:time_windows
tforest Apr 9, 2025
1a48aaf
update branch_general_stat in tests
tforest Apr 10, 2025
d226c43
Merge branch 'time_windows' of github.com:tforest/tskit into time_win…
tforest Apr 10, 2025
869ac01
Merge branch 'tskit-dev:main' into time_windows
tforest Apr 10, 2025
1bec4a9
fixing AFS branch tw interval
tforest Aug 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 95 additions & 10 deletions c/tests/test_stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
*/

#include "testlib.h"
#include <math.h>
#include <tskit/stats.h>

#include <unistd.h>
Expand Down Expand Up @@ -868,6 +869,84 @@ verify_one_way_stat_func_errors(tsk_treeseq_t *ts, one_way_sample_stat_method *m
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);
}

// Temporary definition for time_windows in tsk_treeseq_allele_frequency_spectrum
typedef int one_way_sample_stat_method_tw(const tsk_treeseq_t *self,
tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,
const tsk_id_t *sample_sets, tsk_size_t num_windows, const double *windows,
tsk_size_t num_time_windows, const double *time_windows, tsk_flags_t options,
double *result);

// Temporary duplicate for time-windows-having methods
static void
verify_one_way_stat_func_errors_tw(
tsk_treeseq_t *ts, one_way_sample_stat_method_tw *method)
{
int ret;
tsk_id_t num_nodes = (tsk_id_t) tsk_treeseq_get_num_nodes(ts);
tsk_id_t samples[] = { 0, 1, 2, 3 };
tsk_size_t sample_set_sizes = 4;
double windows[] = { 0, 0, 0 };
double time_windows[] = { -1, 0.5, INFINITY };
double result;

ret = method(ts, 0, &sample_set_sizes, samples, 0, NULL, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);

samples[0] = TSK_NULL;
ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
samples[0] = -10;
ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
samples[0] = num_nodes;
ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
samples[0] = num_nodes + 1;
ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);

samples[0] = num_nodes - 1;
ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SAMPLES);

samples[0] = 1;
ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);

samples[0] = 0;
sample_set_sizes = 0;
ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EMPTY_SAMPLE_SET);

sample_set_sizes = 4;
/* Window errors */
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should also have time window errors here?

ret = method(ts, 1, &sample_set_sizes, samples, 0, windows, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NUM_WINDOWS);

ret = method(ts, 1, &sample_set_sizes, samples, 2, windows, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);

/* Time window errors */
ret = method(
ts, 1, &sample_set_sizes, samples, 0, NULL, 0, time_windows, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TIME_WINDOWS_DIM);

ret = method(
ts, 1, &sample_set_sizes, samples, 0, NULL, 2, time_windows, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TIME_WINDOWS);

time_windows[0] = 0.1;
ret = method(
ts, 1, &sample_set_sizes, samples, 0, NULL, 2, time_windows, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TIME_WINDOWS);

time_windows[0] = 0;
time_windows[1] = 0;
ret = method(
ts, 1, &sample_set_sizes, samples, 0, NULL, 2, time_windows, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TIME_WINDOWS);
}

static void
verify_two_way_stat_func_errors(
tsk_treeseq_t *ts, general_sample_stat_method *method, tsk_flags_t options)
Expand Down Expand Up @@ -1206,23 +1285,24 @@ verify_afs(tsk_treeseq_t *ts)
sample_set_sizes[0] = n - 2;
sample_set_sizes[1] = 2;
ret = tsk_treeseq_allele_frequency_spectrum(
ts, 2, sample_set_sizes, samples, 0, NULL, 0, result);
ts, 2, sample_set_sizes, samples, 0, NULL, 0, NULL, 0, result);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

uh-oh, are these tabs?

CU_ASSERT_EQUAL_FATAL(ret, 0);

ret = tsk_treeseq_allele_frequency_spectrum(
ts, 2, sample_set_sizes, samples, 0, NULL, TSK_STAT_POLARISED, result);
ts, 2, sample_set_sizes, samples, 0, NULL, 0, NULL, TSK_STAT_POLARISED, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);

ret = tsk_treeseq_allele_frequency_spectrum(ts, 2, sample_set_sizes, samples, 0,
NULL, TSK_STAT_POLARISED | TSK_STAT_SPAN_NORMALISE, result);
NULL, 0, NULL, TSK_STAT_POLARISED | TSK_STAT_SPAN_NORMALISE, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);

ret = tsk_treeseq_allele_frequency_spectrum(ts, 2, sample_set_sizes, samples, 0,
NULL, TSK_STAT_BRANCH | TSK_STAT_POLARISED | TSK_STAT_SPAN_NORMALISE, result);
NULL, 0, NULL, TSK_STAT_BRANCH | TSK_STAT_POLARISED | TSK_STAT_SPAN_NORMALISE,
result);
CU_ASSERT_EQUAL_FATAL(ret, 0);

ret = tsk_treeseq_allele_frequency_spectrum(ts, 2, sample_set_sizes, samples, 0,
NULL, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE, result);
NULL, 0, NULL, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);

free(result);
Expand Down Expand Up @@ -2416,21 +2496,26 @@ test_paper_ex_afs_errors(void)
tsk_size_t sample_set_sizes[] = { 2, 2 };
tsk_id_t samples[] = { 0, 1, 2, 3 };
double result[10]; /* not thinking too hard about the actual value needed */
double time_windows[] = { 0, 1 };
int ret;

tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);

verify_one_way_stat_func_errors(&ts, tsk_treeseq_allele_frequency_spectrum);
verify_one_way_stat_func_errors_tw(&ts, tsk_treeseq_allele_frequency_spectrum);

ret = tsk_treeseq_allele_frequency_spectrum(
&ts, 2, sample_set_sizes, samples, 0, NULL, TSK_STAT_NODE, result);
&ts, 2, sample_set_sizes, samples, 0, NULL, 0, NULL, TSK_STAT_NODE, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);

ret = tsk_treeseq_allele_frequency_spectrum(&ts, 2, sample_set_sizes, samples, 0,
NULL, TSK_STAT_BRANCH | TSK_STAT_SITE, result);
NULL, 0, NULL, TSK_STAT_BRANCH | TSK_STAT_SITE, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MULTIPLE_STAT_MODES);

ret = tsk_treeseq_allele_frequency_spectrum(&ts, 2, sample_set_sizes, samples, 0,
NULL, 1, time_windows, TSK_STAT_SITE, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);

tsk_treeseq_free(&ts);
}

Expand All @@ -2448,14 +2533,14 @@ test_paper_ex_afs(void)
/* we have two singletons and one tripleton */

ret = tsk_treeseq_allele_frequency_spectrum(
&ts, 1, sample_set_sizes, samples, 0, NULL, 0, result);
&ts, 1, sample_set_sizes, samples, 0, NULL, 0, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(result[0], 0);
CU_ASSERT_EQUAL_FATAL(result[1], 3.0);
CU_ASSERT_EQUAL_FATAL(result[2], 0);

ret = tsk_treeseq_allele_frequency_spectrum(
&ts, 1, sample_set_sizes, samples, 0, NULL, TSK_STAT_POLARISED, result);
&ts, 1, sample_set_sizes, samples, 0, NULL, 0, NULL, TSK_STAT_POLARISED, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(result[0], 0);
CU_ASSERT_EQUAL_FATAL(result[1], 2.0);
Expand Down
6 changes: 3 additions & 3 deletions c/tests/test_trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -8182,13 +8182,13 @@ test_time_uncalibrated(void)
CU_ASSERT_EQUAL_FATAL(ret, 0);

ret = tsk_treeseq_allele_frequency_spectrum(
&ts2, 2, sample_set_sizes, samples, 0, NULL, TSK_STAT_SITE, result);
&ts2, 2, sample_set_sizes, samples, 0, NULL, 0, NULL, TSK_STAT_SITE, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_allele_frequency_spectrum(
&ts2, 2, sample_set_sizes, samples, 0, NULL, TSK_STAT_BRANCH, result);
&ts2, 2, sample_set_sizes, samples, 0, NULL, 0, NULL, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TIME_UNCALIBRATED);
ret = tsk_treeseq_allele_frequency_spectrum(&ts2, 2, sample_set_sizes, samples, 0,
NULL, TSK_STAT_BRANCH | TSK_STAT_ALLOW_TIME_UNCALIBRATED, result);
NULL, 0, NULL, TSK_STAT_BRANCH | TSK_STAT_ALLOW_TIME_UNCALIBRATED, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);

sigma = tsk_calloc(tsk_treeseq_get_num_nodes(&ts2), sizeof(double));
Expand Down
2 changes: 1 addition & 1 deletion c/tskit/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,7 @@ tsk_strerror_internal(int err)
"(TSK_ERR_BAD_SAMPLE_PAIR_TIMES)";
break;
case TSK_ERR_BAD_TIME_WINDOWS:
ret = "Time windows must be strictly increasing and end at infinity. "
ret = "Time windows must be strictly increasing. "
"(TSK_ERR_BAD_TIME_WINDOWS)";
break;
case TSK_ERR_BAD_NODE_TIME_WINDOW:
Expand Down
Loading