'Convert months names to month.abb in a column rows in a dataframe?
I have a dataframe called data_subset. In the aggregation_period column, I want to replace every row containing month names (e.g, 3, 4, 5,6, 7) as an integer to month.abb format (e.g. Mar, Apr, May Jun, Jul) without affecting other rows containing AMJ and MAMJJ. I have tried the below code chunks, but could not find a way to reach the required results.
Any thoughts and ideas, please?
> data_subset <- nor_ind_final_s1 %>% filter(source == 'modis')
> head(data_subset, 10)
year aggregation_period value source statistic variable
1 2001 3 0.44 modis mean NDVI
2 2001 4 0.57 modis mean NDVI
3 2001 5 0.62 modis mean NDVI
4 2001 6 0.75 modis mean NDVI
5 2001 7 0.62 modis mean NDVI
6 2002 3 0.28 modis mean NDVI
7 2002 4 0.43 modis mean NDVI
8 2002 5 0.70 modis mean NDVI
9 2002 6 0.80 modis mean NDVI
10 2002 7 0.75 modis mean NDVI
> tail(data_subset)
year aggregation_period value source statistic variable
114 2012 AMJ 0.7000 modis mean NDVI
115 2013 AMJ 0.6450 modis mean NDVI
116 2014 AMJ 0.5825 modis mean NDVI
117 2015 AMJ 0.6500 modis mean NDVI
118 2016 AMJ 0.4375 modis mean NDVI
119 2017 AMJ 0.6575 modis mean NDVI
> # check the levels
> levels(as.factor(data_subset$aggregation_period))
[1] "3" "4" "5" "6" "7" "AMJ" "MAMJJ"
> # create a vector of integers
> m <- c("3", "4", "5", "6", "7")
> # convert months to month.abb
> m.abb <- month.abb[as.numeric(m)]
> # check which rows contain months
> mons <- which(data_subset$aggregation_period == m)
Warning message:
In data_subset$aggregation_period == m :
longer object length is not a multiple of shorter object length
> mons
[1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
[26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
[51] 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
[76] 76 77 78 79 80 81 82 83 84 85
> # replace months names to month abbreviations
Here is the dput() of my data.
> dput(data_subset)
structure(list(year = c(2001, 2001, 2001, 2001, 2001, 2002, 2002,
2002, 2002, 2002, 2003, 2003, 2003, 2003, 2003, 2004, 2004, 2004,
2004, 2004, 2005, 2005, 2005, 2005, 2005, 2006, 2006, 2006, 2006,
2006, 2007, 2007, 2007, 2007, 2007, 2008, 2008, 2008, 2008, 2008,
2009, 2009, 2009, 2009, 2009, 2010, 2010, 2010, 2010, 2010, 2011,
2011, 2011, 2011, 2011, 2012, 2012, 2012, 2012, 2012, 2013, 2013,
2013, 2013, 2013, 2014, 2014, 2014, 2014, 2014, 2015, 2015, 2015,
2015, 2015, 2016, 2016, 2016, 2016, 2016, 2017, 2017, 2017, 2017,
2017, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
2011, 2012, 2013, 2014, 2015, 2016, 2017, 2001, 2002, 2003, 2004,
2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015,
2016, 2017), aggregation_period = c("3", "4", "5", "6", "7",
"3", "4", "5", "6", "7", "3", "4", "5", "6", "7", "3", "4", "5",
"6", "7", "3", "4", "5", "6", "7", "3", "4", "5", "6", "7", "3",
"4", "5", "6", "7", "3", "4", "5", "6", "7", "3", "4", "5", "6",
"7", "3", "4", "5", "6", "7", "3", "4", "5", "6", "7", "3", "4",
"5", "6", "7", "3", "4", "5", "6", "7", "3", "4", "5", "6", "7",
"3", "4", "5", "6", "7", "3", "4", "5", "6", "7", "3", "4", "5",
"6", "7", "MAMJJ", "MAMJJ", "MAMJJ", "MAMJJ", "MAMJJ", "MAMJJ",
"MAMJJ", "MAMJJ", "MAMJJ", "MAMJJ", "MAMJJ", "MAMJJ", "MAMJJ",
"MAMJJ", "MAMJJ", "MAMJJ", "MAMJJ", "AMJ", "AMJ", "AMJ", "AMJ",
"AMJ", "AMJ", "AMJ", "AMJ", "AMJ", "AMJ", "AMJ", "AMJ", "AMJ",
"AMJ", "AMJ", "AMJ", "AMJ"), value = c(0.44, 0.57, 0.62, 0.75,
0.62, 0.28, 0.43, 0.7, 0.8, 0.75, 0.4, 0.46, 0.76, 0.76, 0.58,
0.31, 0.46, 0.59, 0.74, 0.67, 0.4, 0.28, 0.71, 0.75, 0.76, 0.41,
0.33, 0.56, 0.76, 0.62, 0.35, 0.4, 0.66, 0.76, 0.71, 0.39, 0.56,
0.68, 0.78, 0.66, 0.64, 0.44, 0.71, 0.82, 0.65, 0.35, 0.41, 0.71,
0.77, 0.67, 0.59, 0.37, 0.61, 0.72, 0.78, 0.56, 0.6, 0.69, 0.74,
0.77, 0.59, 0.51, 0.56, 0.7, 0.81, 0.31, 0.32, 0.6, 0.68, 0.73,
0.45, 0.48, 0.59, 0.73, 0.8, 0.41, 0.36, 0.22, 0.68, 0.49, 0.42,
0.63, 0.45, 0.79, 0.76, 0.6, 0.592, 0.592, 0.554, 0.58, 0.536,
0.576, 0.614, 0.652, 0.582, 0.614, 0.672, 0.634, 0.528, 0.61,
0.432, 0.61, 0.64, 0.67, 0.64, 0.615, 0.625, 0.5675, 0.6325,
0.67, 0.655, 0.64, 0.62, 0.7, 0.645, 0.5825, 0.65, 0.4375, 0.6575
), source = c("modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis", "modis", "modis", "modis", "modis", "modis", "modis",
"modis"), statistic = c("mean", "mean", "mean", "mean", "mean",
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean", "mean", "mean", "mean", "mean", "mean", "mean", "mean",
"mean", "mean"), variable = c("NDVI", "NDVI", "NDVI", "NDVI",
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI",
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI",
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI",
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI",
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI",
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI",
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI",
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI",
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI",
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI",
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI",
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI",
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI",
"NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI", "NDVI",
"NDVI", "NDVI", "NDVI")), row.names = c(NA, -119L), class = "data.frame")
Solution 1:[1]
Using replace on a predefined vector, no warnings.
rp <- data_subset$aggregation_period %in% 1:12
data_subset |>
transform(aggregation_period=
replace(aggregation_period, rp,
month.abb[as.double(aggregation_period[rp])]))
# year aggregation_period value source statistic variable
# 1 2001 Mar 0.4400 modis mean NDVI
# 2 2001 Apr 0.5700 modis mean NDVI
# 3 2001 May 0.6200 modis mean NDVI
# 4 2001 Jun 0.7500 modis mean NDVI
# 5 2001 Jul 0.6200 modis mean NDVI
# 6 2002 Mar 0.2800 modis mean NDVI
# ...
# 114 2012 AMJ 0.7000 modis mean NDVI
# 115 2013 AMJ 0.6450 modis mean NDVI
# 116 2014 AMJ 0.5825 modis mean NDVI
# 117 2015 AMJ 0.6500 modis mean NDVI
# 118 2016 AMJ 0.4375 modis mean NDVI
# 119 2017 AMJ 0.6575 modis mean NDVI
Solution 2:[2]
You can use an ifelse statement to replace the values in the aggregation_period column to integer if it's between 1 to 12, then use month.abb to rename the values.
head() and tail() of the results are pasted here.
library(dplyr)
df %>%
mutate(aggregation_period = ifelse(aggregation_period %in% 1:12,
month.abb[as.integer(aggregation_period)],
aggregation_period))
year aggregation_period value source statistic variable
1 2001 Mar 0.4400 modis mean NDVI
2 2001 Apr 0.5700 modis mean NDVI
3 2001 May 0.6200 modis mean NDVI
4 2001 Jun 0.7500 modis mean NDVI
5 2001 Jul 0.6200 modis mean NDVI
6 2002 Mar 0.2800 modis mean NDVI
7 2002 Apr 0.4300 modis mean NDVI
8 2002 May 0.7000 modis mean NDVI
9 2002 Jun 0.8000 modis mean NDVI
10 2002 Jul 0.7500 modis mean NDVI
11 2012 AMJ 0.7000 modis mean NDVI
12 2013 AMJ 0.6450 modis mean NDVI
13 2014 AMJ 0.5825 modis mean NDVI
14 2015 AMJ 0.6500 modis mean NDVI
15 2016 AMJ 0.4375 modis mean NDVI
16 2017 AMJ 0.6575 modis mean NDVI
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | jay.sf |
| Solution 2 |
