'using for loop or lapply to append to rows of data frame in r from the ttest
Alright I have checked the internet, and I am still creating a data frame of replicated lines. I have a for loop that is creating the welch t-test results, I have saved the values like such:
gene <- biomarkers$Symbol
pval <- ttest$p.value
tstat <- ttest$statistic
I tried to iterate with the for loop to add the results to a data frame which created at the start of the chunk
df2 <- data.frame(pathol=(character()),
genes=character(),
p_value=character(),
t_stat=character(),
stringsAsFactors=FALSE)
for (gene in biomarkers$Symbol) {
print(gene)
ddat <- degs[degs$Symbol== gene & degs$pathol=="mitosis",]
ttest <- t.test(logFC ~ value, data = ddat)
print (ttest)
df2[nrow(df2) + 1,] #(this added the 10 genes only to the rows, not to the column)
then I realised I need to use lapply...which I tried this:
prova <- lapply(biomarkers$Symbol, function(gene) {
append = (gene)
#ttest <- t.test(logFC ~ value, data = ddat)
})
do.call(rbind, prova)
this created a list of the ten genes, however when I uncomment the variable 'ttest', it just adds a list of:
[1,] "logFC by value"
[2,] "logFC by value"
[3,] "logFC by value"
[4,] "logFC by value"
[5,] "logFC by value"
[6,] "logFC by value"
[7,] "logFC by value"
[8,] "logFC by value"
[9,] "logFC by value"
[10,] "logFC by value"
I would like to end up with a data frame that looks like this:
| pathol | genes | p_value | t_stat |
|---|---|---|---|
| mitosis | PBK | 0.05 | 000.4 |
| mitosis | PLK4 | 0.02 | 000.9 |
#the gene will be the same for all of the rows. any help would be great!
EDIT
dput output:
dput(head(ddat))
structure(list(experiment = c("FP001RO_15_HI", "FP001RO_15_HI",
"FP001RO_15_HI", "FP001RO_15_LOW", "FP001RO_15_LOW", "FP001RO_15_LOW"
), Human.Gene.entrezID = c("57405", "57405", "57405", "57405",
"57405", "57405"), Symbol = c("SPC25", "SPC25", "SPC25", "SPC25",
"SPC25", "SPC25"), description = c("SPC25 component of NDC80 kinetochore complex",
"SPC25 component of NDC80 kinetochore complex", "SPC25 component of NDC80 kinetochore complex",
"SPC25 component of NDC80 kinetochore complex", "SPC25 component of NDC80 kinetochore complex",
"SPC25 component of NDC80 kinetochore complex"), score = c(3.867,
3.867, 3.867, 3.867, 3.867, 3.867), type = c("WGGNC", "WGGNC",
"WGGNC", "WGGNC", "WGGNC", "WGGNC"), pathol = c("mitosis", "mitosis",
"mitosis", "mitosis", "mitosis", "mitosis"), Probeid = c("295661_at",
"295661_at", "295661_at", "295661_at", "295661_at", "295661_at"
), logFC = c(-0.0641349806730976, -0.0641349806730976, -0.0641349806730976,
-0.0324566291924587, -0.0324566291924587, -0.0324566291924587
), AveExpr = c(4.1541958195567, 4.1541958195567, 4.1541958195567,
4.17003499529702, 4.17003499529702, 4.17003499529702), t = c(-0.567682269120301,
-0.567682269120301, -0.567682269120301, -0.214562465957216, -0.214562465957216,
-0.214562465957216), P.Value = c(0.580865708246137, 0.580865708246137,
0.580865708246137, 0.833648126277364, 0.833648126277364, 0.833648126277364
), adj.P.Val = c(0.828914361133465, 0.828914361133465, 0.828914361133465,
0.999594589241814, 0.999594589241814, 0.999594589241814), B = c(-6.4683360535952,
-6.4683360535952, -6.4683360535952, -5.45944975240508, -5.45944975240508,
-5.45944975240508), cpd = c("FP001RO", "FP001RO", "FP001RO",
"FP001RO", "FP001RO", "FP001RO"), time = c(15, 15, 15, 15, 15,
15), dose = c("HI", "HI", "HI", "LOW", "LOW", "LOW"), entrezgene_rat = c(295661,
295661, 295661, 295661, 295661, 295661), external_gene_name_rat = c("Spc25",
"Spc25", "Spc25", "Spc25", "Spc25", "Spc25"), external_gene_name_human = c("SPC25",
"SPC25", "SPC25", "SPC25", "SPC25", "SPC25"), entrezGene_probes_human = c("57405_at",
"57405_at", "57405_at", "57405_at", "57405_at", "57405_at"),
inMap_human_withGrey = c(1, 1, 1, 1, 1, 1), inMap_rat_withGrey = c(1,
1, 1, 1, 1, 1), variable = structure(c(11L, 12L, 10L, 10L,
11L, 12L), .Label = c("Necrosis1", "Necrosis2", "Necrosis3",
"hyperpl1", "hyperpl2", "hyperpl3", "fibrosis", "hypertrophy1",
"hypertrophy2", "mitosis1", "mitosis2", "mitosis3", "vacuolation1",
"vacuolation2", "vacuolation3"), class = "factor"), value = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("0", "1"), class = "factor"),
pathology = c("mitosis", "mitosis", "mitosis", "mitosis",
"mitosis", "mitosis")), row.names = c(13L, 14L, 15L, 55L,
56L, 57L), class = "data.frame")
dput of gene
dput(biomarkers$Symbol)
c("PBK", "PLK4", "CDKN3", "CDCA3", "PRC1", "CDK1", "TCF19", "SHCBP1",
"CENPK", "SPC25")
Solution 1:[1]
We may use
prova <- lapply(biomarkers$Symbol, function(gene) {
ddat <- subset(degs, Symbol == gene & pathol =="mitosis")
fmla <- reformulate('value', response = 'logFC')
if(nlevels(droplevels(ddat$val)) >=2) {
ttest <- t.test(fmla, data = ddat)
data.frame(pathol = 'mitosis', genes = gene,
p_value = ttest$p.value, t_stat = ttest$statistic)
} else NULL
})
names(prova) <- biomarkers$Symbol
out <- do.call(rbind, prova)
Solution 2:[2]
Addendum to your question, remember to perform multiple hypothesis testing correction when doing lots of tests at once.
Working off of akrun's answer, here's BH correction:
ord = order(prova$p.value)
prova$fdr = prova$p.value*nrow(prova)/ord
This will reduce the number of false positives.
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | |
| Solution 2 |
