'R Tidyverse - Counting the number a word appears in a list by group

I am currently working on the following:

I have two dataframes. One dataframe contains a number of inventors per company and I would like to know how often their name appears in another dataframe in the same company.The company identifier (df_itemnumber_rounded) in both dataframes is called the same and present in both dataframes.

Example:

First dataframe includes:

df_itemnumber_rounded <- c(df_2002_77, df_2002_77, df_2002_77, df_2002_78
,df_2002_767)

assignees_split <- c("DSM IP ASSETS BV","DSM NV,GIST BROCADES NV","INRA INST NAT RECH AGRONOMIQUE","DE FORENEDE BRYGGERIER AS", "FORENEDE BRYGGERIER" )

Second dataframe includes:

df_itemnumber_rounded <- c(df_2002_77,df_2002_77,df_2002_77,df_2002_77,df_2002_77)

citedp_assignee <- c("LANGEJAN AREND","PELLETIER RENE FRANCOIS ROGER ","LESAFFRE &amp; CIE","GIST BROCADES NV ", "DISTILLERS CO YEAST LTD ")

The aim is to have the first dataframe that includes a new variable ("counts") that shows how often a name appears within a company.

So like this:

df_itemnumber_rounded assignees_split count
df_2002_77 DSM IP ASSETS BV 0
df_2002_77 GIST BROCADES NV 1

I have tried to work on it using str_detect and sum but I don't kow how to do it by group instead of letting it run over the whole dataframe.

counts <- test_distinct_cleaned %>% 
  group_by(df_itemnumber_rounded,assignees_split) %>% 
mutate(counts=map_int(tolower(test_distinct_cleaned$assignees_split),~sum(str_detect(tolower(match_with_cleaned$citedp_assignee),.x))))

However, it takes a long time and does not seem to come to an end. I have tried the solution above without the group_by function on a smaller df where it counts all the appearances of a name and not just the one of the same company. So I am not sure whether this works at all with the group_by and if there is a faster way to do it. The first df has 17000 lines and the second to match with has over 150000 lines...

Below there is a real example of the data:

Dataframe 1 are the assignees.

    structure(list(df_itemnumber_rounded = c("df_2012_2175", "df_2012_2175", 
"df_2012_2175", "df_2012_2175", "df_2012_2175", "df_2012_2175", 
"df_2012_2175", "df_2002_4897", "df_2002_4897", "df_2012_9460", 
"df_2012_9460", "df_2012_9460", "df_2012_9460", "df_2016_6247", 
"df_2016_6247", "df_2016_6247", "df_2016_6248", "df_2016_6248", 
"df_2016_6248", "df_2016_6248", "df_2016_6248", "df_2016_6248", 
"df_2012_9459", "df_2012_9459", "df_2016_14178", "df_2016_14178", 
"df_2016_14689", "df_2016_14689", "df_2016_15814", "df_2016_15814", 
"df_2012_2360", "df_2012_2360", "df_2012_2360", "df_2012_2360", 
"df_2012_2360", "df_2012_2360", "df_2012_2360", "df_2012_8944", 
"df_2012_8944", "df_2012_8944", "df_2012_3604", "df_2012_3604", 
"df_2012_3604", "df_2012_3604", "df_2012_4967", "df_2012_4967", 
"df_2012_4967", "df_2012_7883", "df_2012_7883", "df_2012_7883", 
"df_2012_4836", "df_2012_4836", "df_2012_4836", "df_2002_18770", 
"df_2012_1305", "df_2012_2576", "df_2012_10710", "df_2012_5541", 
"df_2012_5578", "df_2012_5635", "df_2012_6044", "df_2012_6219", 
"df_2012_6317", "df_2012_6488", "df_2012_6500", "df_2012_6613", 
"df_2012_6615", "df_2012_6679", "df_2012_6928", "df_2012_6977", 
"df_2012_7489", "df_2012_7552", "df_2012_7667", "df_2012_8017", 
"df_2012_8302", "df_2012_8555", "df_2012_8809", "df_2012_9085", 
"df_2012_9171", "df_2012_9298", "df_2012_9813", "df_2012_10236", 
"df_2012_10437", "df_2012_10532", "df_2012_10602", "df_2012_11037", 
"df_2012_11070", "df_2012_11183", "df_2012_11606", "df_2012_12362", 
"df_2012_12618", "df_2016_678", "df_2016_790", "df_2016_1079", 
"df_2016_1414", "df_2016_1539", "df_2016_2074", "df_2016_2167", 
"df_2016_2314", "df_2016_2769"), `Publication Number` = c("WO2006046567A1", 
"WO2006046567A1", "WO2006046567A1", "WO2006046567A1", "WO2006046567A1", 
"WO2006046567A1", "WO2006046567A1", "DE3149931A1", "DE3149931A1", 
"WO2013002007A1", "WO2013002007A1", "WO2013002007A1", "WO2013002007A1", 
"WO2016114276A1", "WO2016114276A1", "WO2016114276A1", "WO2016114277A1", 
"WO2016114277A1", "WO2016114277A1", "WO2016114277A1", "WO2016114277A1", 
"WO2016114277A1", "JP2013005781A", "JP2013005781A", "WO2017094654A1", 
"WO2017094654A1", "JP2017112924A", "JP2017112924A", "WO2017169107A1", 
"WO2017169107A1", "WO2006070828A1", "WO2006070828A1", "WO2006070828A1", 
"WO2006070828A1", "WO2006070828A1", "WO2006070828A1", "WO2006070828A1", 
"JP2012183063A", "JP2012183063A", "JP2012183063A", "WO2007097088A1", 
"WO2007097088A1", "WO2007097088A1", "WO2007097088A1", "WO2009017116A1", 
"WO2009017116A1", "WO2009017116A1", "WO2011145670A1", "WO2011145670A1", 
"WO2011145670A1", "WO2008153118A1", "WO2008153118A1", "WO2008153118A1", 
"JP2013066497A", "JP2011030577A", "JP2011142922A", "JP2012105673A", 
"JP2009213393A", "JP2009225740A", "JP2009254247A", "AU2008297027A1", 
"JP2010130902A", "JP2010136658A", "JP2010207213A", "JP2010207217A", 
"JP2010220529A", "JP2010220536A", "JP2010252640A", "JP2011036129A", 
"JP2011030517A", "JP2011135841A", "JP2011142890A", "JP2011206047A", 
"JP2012024081A", "JP2012055235A", "JP2012105572A", "JP2012147775A", 
"JP2012213373A", "WO2012147465A1", "JP2012244965A", "JP2013042751A", 
"WO2013073628A1", "JP2013143938A", "JP2013150602A", "JP2013165707A", 
"JP2013255490A", "JP2013243970A", "JP2014000055A", "JP2014057537A", 
"EP2737810A1", "JP2014128251A", "JP2014217315A", "WO2014192826A1", 
"JP2015015926A", "WO2015029605A1", "JP2015053920A", "WO2015064748A1", 
"JP2015116187A", "JP2015100294A", "WO2015098744A1"), assignees_split = structure(c(19L, 
15L, 17L, 1L, 4L, 5L, 18L, 20L, 16L, 21L, 11L, 12L, 14L, 21L, 
12L, 14L, 21L, 12L, 15L, 2L, 3L, 8L, 21L, 14L, 21L, 14L, 21L, 
14L, 21L, 14L, 21L, 15L, 17L, 6L, 7L, 9L, 10L, 22L, 15L, 16L, 
23L, 13L, 15L, 16L, 23L, 15L, 16L, 23L, 15L, 16L, 24L, 15L, 16L, 
25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 
25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 
25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 
25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L), .Label = c(" FURUKUBO S ", 
" IBUSUKI D ", " ICHIMURA A ", " IZUMI T ", " KAKUDO Y ", " KIMURA M ", 
" MAKI H ", " MIYAO Y", " NAGAO K ", " NAKAHARA K", " SUNTORY BEVERAGE&FOOD LTD ", 
" SUNTORY FOOD & BEVERAGE CO LTD ", " SUNTORY HOLDINGS CO LTD ", 
" SUNTORY HOLDINGS LTD", " SUNTORY HOLDINGS LTD ", " SUNTORY LTD", 
" SUNTORY LTD ", " TAKAOKA S", "KOMATSU MFG CO LTD ", "SUN CHEM CORP ", 
"SUNTORY BEVERAGE & FOOD LTD ", "SUNTORY FOOD & BEVERAGE CO LTD ", 
"SUNTORY HOLDING LTD ", "SUNTORY HOLDINGS CO LTD ", "SUNTORY HOLDINGS LTD"
), class = "factor")), row.names = c(NA, -100L), class = c("tbl_df", 
"tbl", "data.frame"))

Dataframe are the assignees of the cited patents so cited_assignees.

structure(list(df_itemnumber_rounded = c("df_2012_2175", "df_2012_2175", 
"df_2012_2175", "df_2012_2175", "df_2012_2175", "df_2012_2175", 
"df_2002_4897", "df_2012_9460", "df_2012_9460", "df_2012_9460", 
"df_2012_9460", "df_2012_9460", "df_2012_9460", "df_2012_9460", 
"df_2016_6247", "df_2016_6247", "df_2016_6247", "df_2016_6247", 
"df_2016_6247", "df_2016_6247", "df_2016_6248", "df_2016_6248", 
"df_2016_6248", "df_2016_6248", "df_2016_6248", "df_2016_6248", 
"df_2012_9459", "df_2012_9459", "df_2016_14178", "df_2016_14178", 
"df_2016_14178", "df_2016_14178", "df_2016_14178", "df_2016_14178", 
"df_2016_14178", "df_2016_14178", "df_2016_14178", "df_2016_14178", 
"df_2016_14178", "df_2016_14178", "df_2016_14178", "df_2016_14689", 
"df_2016_14689", "df_2016_14689", "df_2016_14689", "df_2016_14689", 
"df_2016_14689", "df_2016_15814", "df_2016_15814", "df_2016_15814", 
"df_2016_15814", "df_2016_15814", "df_2012_2360", "df_2012_2360", 
"df_2012_2360", "df_2012_2360", "df_2012_2360", "df_2012_2360", 
"df_2012_2360", "df_2012_2360", "df_2012_2360", "df_2012_2360", 
"df_2012_2360", "df_2012_2360", "df_2012_8944", "df_2012_3604", 
"df_2012_3604", "df_2012_3604", "df_2012_3604", "df_2012_3604", 
"df_2012_3604", "df_2012_3604", "df_2012_3604", "df_2012_3604", 
"df_2012_4967", "df_2012_4967", "df_2012_4967", "df_2012_4967", 
"df_2012_4967", "df_2012_4967", "df_2012_4967", "df_2012_4967", 
"df_2012_4967", "df_2012_4967", "df_2012_4967", "df_2012_4967", 
"df_2012_4967", "df_2012_4967", "df_2012_4967", "df_2012_4967", 
"df_2012_4967", "df_2012_4967", "df_2012_4967", "df_2012_4967", 
"df_2012_7883", "df_2012_7883", "df_2012_7883", "df_2012_7883", 
"df_2012_7883", "df_2012_4836"), citedp_assignee = structure(c(40L, 
23L, 48L, 6L, 15L, 13L, 12L, 30L, 1L, 52L, 53L, 58L, 56L, 3L, 
52L, 52L, 16L, 66L, 52L, 67L, 16L, 66L, 52L, 68L, 52L, 51L, 63L, 
3L, 60L, 27L, 45L, 52L, 9L, 73L, 33L, 25L, 52L, 50L, 62L, 73L, 
64L, 2L, 59L, 28L, 28L, 52L, 46L, 4L, 42L, 1L, 42L, 43L, 31L, 
70L, 32L, 57L, 39L, 14L, 35L, 11L, 56L, 32L, 32L, 55L, 17L, 22L, 
22L, 56L, 56L, 5L, 36L, 56L, 41L, 21L, 20L, 29L, 18L, 19L, 47L, 
7L, 63L, 4L, 38L, 44L, 65L, 56L, 44L, 26L, 34L, 65L, 69L, 10L, 
24L, 8L, 71L, 49L, 54L, 61L, 72L, 37L), .Label = c("  ", "ABURADA MASAKI,TOKIWA PHYTOCHEMICAL CO LTD ", 
"ASAHI BREWERIES LTD", "ASAHI BREWERIES LTD ", "ASAHI BREWERIES LTD,JIBIKI MAKIKO ", 
"CERESTAR HOLDING BV ", "CHOYA UMESHU CO LTD ", "CHUKO SHUZO KK", 
"CREAGRI INC,CREA ROBERTO,MATTEUZZI FRANCESCO,ASTORE STEFANO,MILIONI IVANO ", 
"DEZAINAA FOODS KYOKAI KK ", "FUKUI HISASHI ", "GULF OIL CORP", 
"HOKKAIDO WAIN KK", "IKEDA SHOKKEN KK ", "INSUCHI PUROBUREMU NADEJINOSUT ", 
"ITO EN LTD ", "ITOEN KK", "JAPAN MAIZE PROD ", "KANEBO LTD ", 
"KANEBO LTD,HASEGAWA T CO LTD ", "KIRIN BREWERY", "KIRIN BREWERY ", 
"KIRIN BREWERY,JAPAN MAIZE PROD ", "KOHJIN CO ", "LION CORP ", 
"MANNS WINE CO LTD ", "MARUNAKA SHOKUHIN KK ", "MARUZEN PHARMA ", 
"MATSUSHITA ELECTRIC IND CO LTD ", "MATSUTANI KAGAKU KOGYO KK ", 
"MEIJI DAIRIES CORP ", "MEIJI MILK PROD CO LTD ", "MILLER BREWING INTERNATIONAL INC,LUSK LANCE T,RYDER DAVID S ", 
"MITSUBISHI CHEM IND ", "MITSUBISHI HEAVY IND LTD,KADO TAKASHI,ISOZAKI TOSHIKAZU ", 
"MIYAGI PREFECTURE,MIYAGIKEN SHUZO KYODO KUMIAI ", "NAT TAX ADMINISTRATION AGENCY ", 
"NIPPON BEET SUGAR MFG ", "OGAWA &amp; CO LTD ", "OKURA SYUZO KK ", 
"OZEKI KK ", "PEPSICO INC ", "PEPSICO INC,LEE THOMAS,NATTRESS LAURA ANN,RIHA WILLIAM", 
"POLA CHEM IND INC ", "RIVERSON KK ", "SAN EI GEN FFI INC", "SAN EI GEN FFI INC ", 
"SAPPORO BREWERIES ", "SAPPORO BREWERIES,OONO MASAO,SANO TOMOHIRO ", 
"SHOWA PHARM CHEM IND ", "SUNTORY HOLDINGS LTD", "SUNTORY HOLDINGS LTD ", 
"SUNTORY HOLDINGS LTD,IDO YOSHIHIRO,KOMINE TETSUYA ", "SUNTORY HOLDINGS LTD,KAGEYAMA NORIHIKO,INUI TAKAKO,TAKAGI DAISUKE ", 
"SUNTORY LTD", "SUNTORY LTD ", "SUNTORY LTD,KAGEYAMA NORIHIKO,NAKAHARA KOICHI,INUI TAKAKO,TAKAOKA SEISUKE,NAGAMI KENZO ", 
"SUNTORY LTD,WATANABE TOKUTOMI,DAIDO HIROMI,YOSHIHIRO AKIRA ", 
"SYMRISE GMBH &amp; CO KG ", "TAISHO PHARMA CO LTD ", "TAKARA HOLDINGS INC ", 
"TAKARA SHOKUHIN KK ", "TAKARA SHUZO CO ", "TAKASAGO PERFUMERY CO LTD", 
"TOKYO SHIBAURA ELECTRIC CO ", "TOYO SEIKAN KAISHA LTD ", "TROPICANA PROD INC", 
"TROPICANA PROD INC ", "TSUKISHIMA KIKAI CO ", "UNILEVER PLC,UNILEVER NV,LEVER HINDUSTAN LTD ", 
"WOONGJIN FOODS CO LTD ", "YAKULT HONSHA KK,KUMAMOTOKEN KAJITSU NOGYO KYOD", 
"YAMADA YASUYUKI "), class = "factor")), row.names = c(NA, -100L
), class = c("tbl_df", "tbl", "data.frame"))


Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source