-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexport_allele_frequencies_missingness.sh
53 lines (36 loc) · 1.54 KB
/
export_allele_frequencies_missingness.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/bin/bash
shopt -s expand_aliases
# this script is meant to be run in the main analysis folder after harmonization of the target genotypes and ancestry scoring.
# it will create a .tar.gz file that you can then share
if ! command -v plink2 &> /dev/null; then
echo "could not find plink2 command, trying bin/plink2"
alias plink2=bin/plink2
fi
set -e
if [[ -z "${BIOBANK}" ]]; then
echo "Error: BIOBANK undefined, please set the BIOBANK environment variable, e.g.: export BIOBANK='ukbb'"
exit 1
fi
keepfiles=results/${BIOBANK}/Ancestry_identifier/outlier_detection/*keep
echo "found keep files: "
wc -l $keepfiles
mkdir -p results/${BIOBANK}/afreq
for k in $keepfiles; do
if [[ $(wc -l < $k) -ge 20 ]]; then
outfile=$(basename ${k})
outfile=results/${BIOBANK}/afreq/${outfile%%.keep}
echo "Using output prefix '${outfile}'"
for chrom in {1..22}; do
if [ ! -f ${outfile}_chr${chrom}.acount ]; then
plink2 --bfile custom_input/${BIOBANK}/genotypes/chr${chrom} --keep ${k} --missing variant-only --freq counts --out ${outfile}_chr${chrom} --memory 7500
else
echo "output file already exists: ${outfile}_chr${chrom}.acount, skipping."
fi
done
else
echo "fewer than 20 individuals inside {$keepfile}, skipping."
fi
done
echo "packing..."
tar -zcvf ${BIOBANK}_hapmap3_1kg_allele_frequencies.tar.gz results/${BIOBANK}/afreq
echo "Done. Allele frequency files are packaged in ${BIOBANK}_hapmap3_1kg_allele_frequencies.tar.gz"