'Identifying episodes with R

I'm working with an unbalanced panel that contains information across various countries for several years. One of the variables I have is dist, which represents the log-deviations from HP-filtered GDP. If dist>1, then dummy=1.

structure(list(country = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 9L, 9L, 
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 
10L, 10L, 10L, 10L), .Label = c("Bangladesh", "Barbados", "Benin", 
"Burundi", "Cameroon", "Chile", "Cyprus", "Ecuador", "Equatorial Guinea", 
"Gabon", "Ghana", "Guatemala", "Guinea", "Guyana", "Haiti", "India", 
"Jamaica", "Jordan", "Lebanon", "Liberia", "Madagascar", "Mali", 
"Mexico", "Morocco", "Mozambique", "Nepal", "Nicaragua", "Niger", 
"Oman", "Pakistan", "Panama", "Papua New Guinea", "Peru", "Rwanda", 
"Senegal", "Seychelles", "Sierra Leone", "Singapore", "Sri Lanka", 
"Sudan", "Togo", "Trinidad and Tobago", "Tunisia", "Turkey", 
"Uganda", "Zambia", "Zimbabwe"), class = c("pseries", "factor"
)), date = structure(c(12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 
20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 40L, 
41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 
54L, 1L, 2L, 3L, 4L, 5L, 6L), .Label = c("1965", "1966", "1967", 
"1968", "1969", "1970", "1971", "1972", "1973", "1974", "1975", 
"1976", "1977", "1978", "1979", "1980", "1981", "1982", "1983", 
"1984", "1985", "1986", "1987", "1988", "1989", "1990", "1991", 
"1992", "1993", "1994", "1995", "1996", "1997", "1998", "1999", 
"2000", "2001", "2002", "2003", "2004", "2005", "2006", "2007", 
"2008", "2009", "2010", "2011", "2012", "2013", "2014", "2015", 
"2016", "2017", "2018"), class = c("pseries", "factor")), dist = c(-2.44153863355447, 
-0.565497304455015, 1.4222058591902, 1.18583114700364, 0.936095936859405, 
-0.40582369098349, 2.08296049774769, -1.59377116576285, 0.316969269882462, 
-0.562808458278515, -0.438155688763343, 0.824980940313303, -0.60293744113302, 
0.397182279339227, 1.34777053122572, 0.00539983501313633, 0.0528824118570846, 
-0.838507946421168, 0.220969370384196, -1.06945294612116, 0.157324834050942, 
0.244080457191975, 0.215931770884978, 0.226176571912671, 0.197663973781873, 
0.183399695771442, 0.334605869075331, 0.0683789169881243, -0.0432432418415798, 
-0.191834311017179, -0.368292801016647, -0.572222256415503, -0.838601784768107, 
-1.15213567684887, -1.49844386483361, 2.39265920977342, 1.90573467650352, 
1.3362714819111, 0.158391029649629, 1.12699318386612, 2.38996532864524
), dummy = c(0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 
1, 0, 1, 1)), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 
10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 400L, 
401L, 402L, 403L, 404L, 405L, 406L, 407L, 408L, 409L, 410L, 411L, 
412L, 413L, 414L, 415L, 416L, 417L, 418L, 419L, 420L), class = "data.frame")

I would like to identify episodes in the following way based on the values of dist and dummy. An episode is defined as the time spam for which values of dist>0.5, conditioned on dist>= 1 at some point. In the example above, the episode from Bangladesh lasts for 3 years from 1978-1980 and the one for Guatemala lasts for another 3 years from 2006 to 2008 Any help would be much appreciated!



Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source