Skip to content

Commit

Permalink
Continued work for making the 'representative coding/noncoding/etc' g…
Browse files Browse the repository at this point in the history
…tf/gff3
  • Loading branch information
Kris Alavattam committed Apr 27, 2023
1 parent f42d527 commit 70e661a
Show file tree
Hide file tree
Showing 2 changed files with 362 additions and 102 deletions.
23 changes: 11 additions & 12 deletions results/2023-0215/tutorial_collapse-intersecting-regions.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,11 @@

library(plyr)


d <- data.frame(
chrom = c(1, 1, 1, 14, 16, 16),
name = c("a", "b", "c", "d", "e", "f"),
start = as.numeric(c(70001, 70203, 70060, 40004, 50000872, 50000872)),
stop = as.numeric(c(71200, 80001, 71051, 42004, 50000890, 51000952))
end = as.numeric(c(71200, 80001, 71051, 42004, 50000890, 51000952))
)


Expand All @@ -22,9 +21,9 @@ d <- data.frame(
# d <- d[order(d$start), ]
#
# # Check if a record should be linked with the previous
# d$previous_stop <- c(NA, d$stop[-nrow(d)])
# d$previous_stop <- cummax(ifelse(is.na(d$previous_stop), 0, d$previous_stop))
# d$new_group <- is.na(d$previous_stop) | d$start >= d$previous_stop
# d$previous_end <- c(NA, d$end[-nrow(d)])
# d$previous_end <- cummax(ifelse(is.na(d$previous_end), 0, d$previous_end))
# d$new_group <- is.na(d$previous_end) | d$start >= d$previous_end
#
# # The number of the current group of records is the number of times we have
# #+ switched to a new group
Expand All @@ -36,32 +35,32 @@ d <- data.frame(
# "group",
# summarize,
# start = min(start),
# stop = max(stop),
# end = max(end),
# name = paste(name, collapse = ",")
# )


# Don't ignore chromosome ----------------------------------------------------
# But this ignores the chrom column: To account for it, do the same thing for
#+ each chromosome separately
d <- d[order(d$chrom, d$start), ]
d <- d[order(d$chrom, d$start, d$end), ]
d <- plyr::ddply(
d,
"chrom",
function(u) {
# Check if a record should be linked with the previous record
x <- c(NA, u$stop[-nrow(u)])
x <- c(NA, u$end[-nrow(u)])
y <- ifelse(is.na(x), 0, x)
y <- cummax(y)
y[is.na(x)] <- NA
u$previous_stop <- y
u$previous_end <- y

return(u)
}
)

# Identify new groups based on 'previous_stop'
d$new_group <- is.na(d$previous_stop) | d$start >= d$previous_stop
# Identify new groups based on 'previous_end'
d$new_group <- is.na(d$previous_end) | d$start >= d$previous_end
d$group <- cumsum(d$new_group)

# Aggregate the data
Expand All @@ -70,6 +69,6 @@ plyr::ddply(
.(chrom, group),
plyr::summarize,
start = min(start),
stop = max(stop),
end = max(end),
name = paste(name, collapse = ",")
)
Loading

0 comments on commit 70e661a

Please sign in to comment.