What's new

groupby and compute a t.test for each combination of groups

Fever

Active member
Staff member
I am trying to perform a t test for each of the groups in the data however I cannot seem to get it correct.

x %>%
group_by(sector, quarter) %>%
drop_na(sector) %>%
nest() %>%
spread(key = quarter, value = data) %>%
mutate(
t_test = map2(`1`, `2`, ~{t.test(.x$value, .y$value) %>% tidy()})
)


I am following this example here and here

Using the second method I have:

x %>%
pivot_wider(names_from = sector, values_from = mean) %>%
drop_na(quantiles) %>%
group_by(quarter, quantiles) %>%
summarise_each(funs(t.test., WholesaleTrade, pair = TRUE)$p.value, vars = Accommodation_and_FoodServices:Utilities)


Data:

structure(list(sector = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L), .Label = c("Accommodation_and_FoodServices",
"Administrative_Support_and_WasteManagement", "Agriculture",
"ArtsEntertainment_and_Recreation", "EducationalServices", "Finance_and_Insurance",
"HealthCase_and_SocialAssistance", "Information", "Manufacturing",
"Mining", "OtherServices", "ProfessionalScientific_and_Technical",
"RealEstateRental_and_Leasing", "RetailTrade", "Transportation_and_Warehousing",
"Utilities", "WholesaleTrade"), class = "factor"), quarter = c(1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L), quantitles = c(1L,
2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L,
3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L,
4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L), mean = c(-0.000848481704318871,
-0.0026420727647309, 0.00245840107615485, 0.0045025566876799,
-0.00470304939350074, -0.000918582128316931, 0.00039382864210062,
-0.00105807496143496, 0.000521420406174422, 0.000353402914604492,
-0.000599300935646763, 0.000769987495857269, 0.000798216387633237,
-0.000721519158311539, 0.000598817828569857, -0.000975357032871162,
0.00140710698955251, 0.00275658323647339, -0.0050080691188512,
-0.00310397183523041, 0.0023014166265359, -0.00165402601584748,
-0.00087449305266732, 0.000305906121179782, -0.00390265399553028,
0.0048288459889199, 0.00265486458585242, 0.00134927708059983,
-0.00141242453991298, 0.00421637246849654, -0.00824950394615583,
0.0324871469911992, -0.000664499471897156, 0.00150142317261284,
-0.00181089433316555, -0.00110831570113468, -0.000910050322363531,
-0.00123530308787296, 0.00080685752952673, 0.00295592416541709,
0.00235712325715245, -0.00021229673461101, -0.000519026702048537,
0.00292403580192125, -0.00114799380466835)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -45L))

Continue reading...
 
Top