r - Append to list of lists in for loop -
i have 2 data frames:
data<-structure(list(sample = structure(c(1l, 2l, 2l, 1l, 1l, 1l, 1l, 2l, 2l, 2l), .label = c("s1", "s2"), class = "factor"), chrom = structure(c(1l, 1l, 1l, 2l, 2l, 2l, 1l, 1l, 1l, 2l), .label = c("2l", "2r"), class = "factor"), pos = c(318351l, 605574l, 1014043l, 2031592l, 2886957l, 2910379l, 2218351l, 105574l, 1344043l, 216957l)), .names = c("sample", "chrom", "pos"), row.names = c(na, 10l), class = "data.frame") > arrange(data, chrom,sample) sample chrom pos 1 s1 2l 318351 2 s1 2l 2218351 3 s2 2l 605574 4 s2 2l 1014043 5 s2 2l 105574 6 s2 2l 1344043 7 s1 2r 2031592 8 s1 2r 2886957 9 s1 2r 2910379 10 s2 2r 216957
svbreaks<-structure(list(sample = structure(c(1l, 1l, 1l, 2l, 2l, 2l, 1l, 1l, 2l, 1l), .label = c("s1", "s2"), class = "factor"), chrom = structure(c(1l, 1l, 1l, 1l, 1l, 1l, 2l, 2l, 2l, 2l), .label = c("2l", "2r"), class = "factor"), bp = c(2425901l, 2426025l, 6694426l, 6694566l, 8387755l, 8387927l, 8963713l, 963799l, 980364l, 980521l), gene = structure(c(3l, 3l, 5l, 5l, 4l, 4l, 2l, 2l, 1l, 1l), .label = c("cg8213", "cg8216", "intergenic", "pdm3", "tsp"), class = "factor"), type = structure(c(2l, 1l, 2l, 1l, 3l, 3l, 3l, 4l, 4l, 3l ), .label = c("del", "dup", "inv", "tandup"), class = "factor")), row.names = c(na, 10l), .names = c("sample", "chrom", "bp", "gene", "type"), class = "data.frame") > arrange(svbreaks, chrom, sample) sample chrom bp gene type 1 s1 2l 2425901 intergenic dup 2 s1 2l 2426025 intergenic del 3 s1 2l 6694426 tsp dup 4 s2 2l 6694566 tsp del 5 s2 2l 8387755 pdm3 inv 6 s2 2l 8387927 pdm3 inv 7 s1 2r 8963713 cg8216 inv 8 s1 2r 963799 cg8216 tandup 9 s1 2r 980521 cg8213 inv 10 s2 2r 980364 cg8213 tandup
and function calculates distance between pos
in data
, bp
in svbreaks
on same chrom
each sample:
fun3 <- function(p) { index<-which.min(abs(sv_df$bp - p)) closestbp<-as.numeric(sv_df$bp[index]) chrom<-as.character(sv_df$chrom[index]) gene<-as.character(sv_df$gene[index]) sample<-as.character(sv_df$sample[index]) type<-as.character(sv_df$type[index]) dist<-(p-closestbp) list(p, closestbp, dist, chrom, gene, type, sample) }
i want loop through each chrom
, , call function each sample
, build data frame containing sample
s , chrom
s. here's i've got far:
l <- list() (c in levels(data$chrom)){ (s in levels(data$sample)){ # filter in chromosome , sample df<-filter(data, chrom == c & sample == s) sv_df<-filter(svbreaks, chrom == c & sample == s) # apply funciton dist2bp<-lapply(df$pos, fun3) dist2bp<-do.call(rbind, dist2bp) dist2bp<-as.data.frame(dist2bp) colnames(dist2bp)=c("snp", "closest_bp", "min_dist", "chrom", "closest_gene", "type", "sample") l[[s]] <- dist2bp } }
however, not give me expected output, , saves second level of chrom
- here's resulting data frame:
> levels(data$chrom) [1] "2l" "2r" > levels(data$sample) [1] "s1" "s2" snp closest_bp min_dist chrom closest_gene type sample s1.1 2031592 980521 1051071 2r cg8213 inv s1 s1.2 2886957 980521 1906436 2r cg8213 inv s1 s1.3 2910379 980521 1929858 2r cg8213 inv s1 s2 216957 980364 -763407 2r cg8213 tandup s2
if add list in chrom
loop, output not correct:
for (c in levels(data$chrom)){ (s in levels(data$sample)){ [...] l[[s]] <- dist2bp } l[[c]] <- dist2bp } snp closest_bp min_dist chrom closest_gene type sample s1.1 2031592 980521 1051071 2r cg8213 inv s1 # chrom level 2, s1 s1.2 2886957 980521 1906436 2r cg8213 inv s1 s1.3 2910379 980521 1929858 2r cg8213 inv s1 s2 216957 980364 -763407 2r cg8213 tandup s2 # chrom level 2, s2 2l.1 605574 6694566 -6088992 2l tsp del s2 # chrom level 1, s2 2l.2 1014043 6694566 -5680523 2l tsp del s2 2l.3 105574 6694566 -6588992 2l tsp del s2 2l.4 1344043 6694566 -5350523 2l tsp del s2 2r 216957 980364 -763407 2r cg8213 tandup s2 # not sure why here, # chrom level 1, s1 missing
can point me in right direction?
Comments
Post a Comment