'Multiple variable distribution plot using ggplot2

I have different categorical variable which I would like to show in terms of distribution.

the plot im trying to recreate

So in my data-frame I have like 147 patients and their traits such as age,gender,disease subtypes etc etc.

This is my dataframe subset

head(plot_meta)
       patient    Sex FAB Diagnosis-Age Bone-Marrow-Blast-Percentage                                                          Cytogenetics
1 TCGA-AB-2856   Male  M4            63                           82                                                             46,XY[20]
2 TCGA-AB-2849   Male  M0            39                           83 47,XY,del(5)(q22q33),t(10;11)(p13~p15;q22~23),i(17)(q10)[3]/46,XY[17]
3 TCGA-AB-2971 Female  M4            76                           91                                                             46,XX[20]
4 TCGA-AB-2930 Female  M2            62                           72                                                            46, XX[20]
5 TCGA-AB-2891   Male  M1            42                           68           45,XY,der(7)(t:7;12)(p11.1;p11.2),-12,-13,+mar[19]/46,XY[1]
6 TCGA-AB-2872   Male  M3            42                           88         46,XY,del(9)(q13:q22),t(11:21)(p13;q22),t(15;17)(q22;q210[20]
  Cytogenetic-Code--Other-     Induction
1         Normal Karyotype 7+3+Genasense
2     Complex Cytogenetics         7+3+3
3         Normal Karyotype           7+3
4         Normal Karyotype       7+3+AMD
5     Complex Cytogenetics         7+3+3
6                 PML-RARA      7+3+ATRA

So my goal is to show the patient traits distribution as the above figure.

a <- dput(head(plot_meta))
structure(list(patient = structure(c(36L, 33L, 122L, 95L, 66L, 
49L), .Label = c("TCGA-AB-2805", "TCGA-AB-2806", "TCGA-AB-2808", 
"TCGA-AB-2810", "TCGA-AB-2811", "TCGA-AB-2812", "TCGA-AB-2813", 
"TCGA-AB-2814", "TCGA-AB-2815", "TCGA-AB-2817", "TCGA-AB-2818", 
"TCGA-AB-2819", "TCGA-AB-2820", "TCGA-AB-2821", "TCGA-AB-2822", 
"TCGA-AB-2823", "TCGA-AB-2825", "TCGA-AB-2826", "TCGA-AB-2828", 
"TCGA-AB-2830", "TCGA-AB-2834", "TCGA-AB-2835", "TCGA-AB-2836", 
"TCGA-AB-2839", "TCGA-AB-2840", "TCGA-AB-2841", "TCGA-AB-2842", 
"TCGA-AB-2843", "TCGA-AB-2844", "TCGA-AB-2845", "TCGA-AB-2846", 
"TCGA-AB-2847", "TCGA-AB-2849", "TCGA-AB-2851", "TCGA-AB-2853", 
"TCGA-AB-2856", "TCGA-AB-2857", "TCGA-AB-2858", "TCGA-AB-2859", 
"TCGA-AB-2861", "TCGA-AB-2862", "TCGA-AB-2863", "TCGA-AB-2865", 
"TCGA-AB-2866", "TCGA-AB-2867", "TCGA-AB-2869", "TCGA-AB-2870", 
"TCGA-AB-2871", "TCGA-AB-2872", "TCGA-AB-2873", "TCGA-AB-2874", 
"TCGA-AB-2875", "TCGA-AB-2876", "TCGA-AB-2877", "TCGA-AB-2878", 
"TCGA-AB-2880", "TCGA-AB-2881", "TCGA-AB-2882", "TCGA-AB-2883", 
"TCGA-AB-2884", "TCGA-AB-2885", "TCGA-AB-2886", "TCGA-AB-2888", 
"TCGA-AB-2889", "TCGA-AB-2890", "TCGA-AB-2891", "TCGA-AB-2892", 
"TCGA-AB-2893", "TCGA-AB-2894", "TCGA-AB-2895", "TCGA-AB-2896", 
"TCGA-AB-2897", "TCGA-AB-2898", "TCGA-AB-2899", "TCGA-AB-2900", 
"TCGA-AB-2901", "TCGA-AB-2908", "TCGA-AB-2910", "TCGA-AB-2911", 
"TCGA-AB-2912", "TCGA-AB-2913", "TCGA-AB-2914", "TCGA-AB-2915", 
"TCGA-AB-2916", "TCGA-AB-2917", "TCGA-AB-2918", "TCGA-AB-2919", 
"TCGA-AB-2920", "TCGA-AB-2921", "TCGA-AB-2924", "TCGA-AB-2925", 
"TCGA-AB-2927", "TCGA-AB-2928", "TCGA-AB-2929", "TCGA-AB-2930", 
"TCGA-AB-2931", "TCGA-AB-2932", "TCGA-AB-2933", "TCGA-AB-2934", 
"TCGA-AB-2935", "TCGA-AB-2936", "TCGA-AB-2937", "TCGA-AB-2938", 
"TCGA-AB-2939", "TCGA-AB-2940", "TCGA-AB-2941", "TCGA-AB-2942", 
"TCGA-AB-2943", "TCGA-AB-2944", "TCGA-AB-2946", "TCGA-AB-2948", 
"TCGA-AB-2949", "TCGA-AB-2950", "TCGA-AB-2952", "TCGA-AB-2955", 
"TCGA-AB-2956", "TCGA-AB-2959", "TCGA-AB-2963", "TCGA-AB-2965", 
"TCGA-AB-2966", "TCGA-AB-2970", "TCGA-AB-2971", "TCGA-AB-2973", 
"TCGA-AB-2975", "TCGA-AB-2976", "TCGA-AB-2977", "TCGA-AB-2979", 
"TCGA-AB-2980", "TCGA-AB-2981", "TCGA-AB-2982", "TCGA-AB-2983", 
"TCGA-AB-2984", "TCGA-AB-2986", "TCGA-AB-2987", "TCGA-AB-2988", 
"TCGA-AB-2990", "TCGA-AB-2991", "TCGA-AB-2992", "TCGA-AB-2994", 
"TCGA-AB-2995", "TCGA-AB-2996", "TCGA-AB-2998", "TCGA-AB-2999", 
"TCGA-AB-3000", "TCGA-AB-3001", "TCGA-AB-3002", "TCGA-AB-3007", 
"TCGA-AB-3008", "TCGA-AB-3009", "TCGA-AB-3011", "TCGA-AB-3012"
), class = "factor"), Sex = structure(c(2L, 2L, 1L, 1L, 2L, 2L
), .Label = c("Female", "Male"), class = "factor"), FAB = structure(c(5L, 
1L, 5L, 3L, 2L, 4L), .Label = c("M0", "M1", "M2", "M3", "M4", 
"M5", "M6", "M7", "nc"), class = "factor"), `Diagnosis-Age` = c(63L, 
39L, 76L, 62L, 42L, 42L), `Bone-Marrow-Blast-Percentage` = c(82L, 
83L, 91L, 72L, 68L, 88L), Cytogenetics = structure(c(75L, 93L, 
51L, 27L, 21L, 57L), .Label = c("37~49,XY,+Y,der(1)add(1)(p13)del(1)(q21q25),-5,der(7)inv(7)(p15q11.2)?inv(7)(q22q32),+17,add(17)(p13),+21,+mar[cp20]", 
"39~47,XX,del(5)(q13q33),-7,der(8)t(8;?8;8)(p23;?p11.2p23;q11.2),der(14)t(1;14)(p12;p11.2)der(1)t(7;16)(p15;q22),+2mar[cp19]", 
"41~44,X,?i(X)(p10),-7,der(12)t(8;12)(q11.2;p11.2),-8 [cp11]/46,XX[8[", 
"42,XY,-5,-7,add(12)(p13),t(14;15)(q10;q10),der(17)t(5;17)(p13;p11.2),-18[6]/40,idem,-11,-add(12)(p13),der(12)t(?;12)(?;p13),-19[6]/41,idem,-der(17)[3]/41,idem,-der(17),+mar1,+mar[3]/41,idem,der(1)der(1)(p12)add(1)(p12),+der(1)(q21)add(1)(q21),-3,-8[2]", 
"43,XY-3,del(5)(q12q33),-7,der(10)t(10;11)(q26;q13),-12,-18,+2mar[20]", 
"44-45,X,-Y,-5,add(16)(q22),-17,-18,iso(21),+mars[cp5]/82-84,XX,-Y,-3,-4,-11,-12,-19,-21,+21[cp5}", 
"44~46,XX,del(11)(q23),der(19)?t(11;19)(q23;p13.1)[cp11]/44~45,XX,-19[cp4]/46,XX [5]", 
"44~47,XX,t(1;15)(q32;q26)[14],del(5)(q13q33)[19],-7[20],+8[7],del(12)(p11.2p11.2)[15],del(17)(q21)[8],der(22)t(1;22)(p13;p11.2)[20],+mar[13][cp20]", 
"44~47,XY,del(5)(q22q35)[20],-7[14],-8[6],der(12)t(10;12)(p11.2q21)[2],add(14)(p12)[11],-17[13],der(17)t(10;17)(q11.2;p13)[14],-18[7],add(18)(p11.2)[7],-21[10],i(21)(q10)[4],-22[4],+mar[10],+mar1x2[6][cp20]", 
"45,X,-X,t(8;21)(q22;q22)[20]", "45,X,-Y, t(8;7;21)(q22;p15;q22[22]/46,XY[3]", 
"45,X,-Y,t(8;21)(q22;q22)[13]/45,idem,del(9)(q22;q32)[7]", "45,X,-Y,t(8;21)(q22;q22)[19]/46,XY[1]", 
"45,X,-Y[3]/46,XY [17]", "45,XX-7[5]-only 5 metaphases", "45,XX,-7,t(9;11)(p22;q23)[19]/46,XX[1]", 
"45,XX,-7[12]/46,XX[8]", "45,XX,-7[20]", "45,XY,-7, t(9;22)(q34;q11.20) [19]/46,XY[1]", 
"45,XY,-7[20]", "45,XY,der(7)(t:7;12)(p11.1;p11.2),-12,-13,+mar[19]/46,XY[1]", 
"45~46,XY,add(X)(q22)[7],Y[4],der(5)t(5;17)(q13;21)[18],-7[18],+8[17],del(12)(q23)[16],-17[18],add(18)(p11.2)[14][cp18]", 
"46, XX[14]", "46, XX[15]", "46, XX[16]", "46, XX[19]", "46, XX[20]", 
"46, XY[15]", "46, XY[20]", "46,XX,1~50dmin[12]/46,idem,der(6)t(6;?)(q22;?)[2]/46,XX[6]", 
"46,XX,9qh+[20]", "46,XX,del(3)(q23q26.2),der(7)t(1:7)(q32;q32),del(10)(q22q25),t(13;16)(q34;p11.2)dup(21)(q22)[cp20]", 
"46,XX,del(5)(q11.2q33)[1]/48~52,idem,+1,+?del(5)(q15q33),+11,+11,?t(12;22)(p13;q12),-13,-17,+i(22)(q10),+i(22)(q10),+mar[cp19]", 
"46,XX,del(5)(q22q33)[4]/46,XX[16]", "46,XX,i(17)(q10)[1]/45,sl-7[2]/48,sl,+13,+19[3]/46,XX[15]", 
"46,XX,inv(16)(p13q22)[15]/46,XX[2]", "46,XX,inv(16)(p13q22)[19]/46,XX[1]", 
"46,XX,inv(16)(p13q22)[20]", "46,XX,inv(16)(p13q22)[5]/46,idem,t(3;3)(p13;q?28)[5]/46,XX[6]", 
"46,XX,t(15;17)(q22;q21.1)[19]/47,idem,+8 [1]", "46,XX,t(15;17)(q22;q21),t(16;19)(p13.3;p13.1)[17]/46,XX[3]", 
"46,XX,t(15;17)(q22;q21)[11]/46,XX[9]", "46,XX,t(15;17)(q22;q21)[12]/46,XX[8]", 
"46,XX,t(15;17)(q22;q21)[20]", "46,XX,t(8;21)(q22;q22)[17]/46,XX[3]", 
"46,XX,t(8;21)(q22;q22)[20]", "46,XX,t(8;21)[15]/46,idem,del(9)(q12q22)[5]", 
"46,XX[15]", "46,XX[18]", "46,XX[19]/46,XX,add(7)(p?22)[1]", 
"46,XX[20]", "46,XX+13,21[cp17]/46,XX[3]", "46,XY,9qh+[19]", 
"46,XY,del(11)(p12)[2]/46,XY[18]", "46,XY,del(20)(q11.2)[23]/92,XXYY,del(20)(q11.2)x2[2]/46,XY[3]", 
"46,XY,del(7)(q21q36)[18]/46,XY[2]", "46,XY,del(9)(q13:q22),t(11:21)(p13;q22),t(15;17)(q22;q210[20]", 
"46,XY,i(17)(q10)[15]/47,XY,idem+13[3]/46,XY[2]", "46,XY,inv(16)(p13;q22)[20]", 
"46,XY,inv(16)(p13q22)[17]/46,XY[3]", "46,XY,inv(16)(p13q22)[9]/46,XY[10]", 
"46,XY,t(11;19)(q23;p13)[17]/46,XY,t(11;19)(q23;p13),inv(12)(p12p13)[3]", 
"46,XY,t(11;19)(q23;p13)[20]", "46,XY,t(15;17)(q22;q21)[19]/46,XY[1]", 
"46,XY,t(15;17)(q22;q21)[20]", "46,XY,t(15;17)(q22:q21)[11]/46,XY[9]", 
"46,XY,t(2;4)(q34;q21)inv(16)(p13q22) [20]", "46,XY,t(6;11)(q27;q23)[15]", 
"46,XY,t(9;11)(p22;q23)[7]/47,XY,t(9;11)(p22;q23)[7]/46,XY[4]", 
"46,XY,t(9;22)(q34;q11.2)[13]/34~37,idem,-3,del(4),-4,-5,-7,-9,-10,t?(11;12),-12,-14,-14,-16,-17,-22[cp6]/46,XY[1]", 
"46,XY,t(9;22)(q34;q11.2[4]/50,idem,+8,+10,+21,+der(22)(t(9;22)(q34;q11.2)[16]", 
"46,XY[13]", "46,XY[15]", "46,XY[19]", "46,XY[20]", "46,XY[30]", 
"46~49,XY,del(3)(p14),del(5)(p11.2q33),del(17)(q21q21),add(21)(p11.2),+22,mar[cp20]", 
"47,XX,+der(5)t(2;5)(p11.2;q11.2)?,t(8;16)(p11.2;p13.3)[19]", 
"47,XX,i(11)(q10)[18]/46,XX [2]", "47,XX,t(15;17)(q22:q21)+mar[20]", 
"47,XX+11 [20]", "47,XX+8 [20]", "47,XXY [17]", "47,XY,+13[5]/46,XY[15]", 
"47,XY,+21 [6]/46,XY[13]", "47,XY,+21[11]/48,XY,+3,+21[8]", "47,XY,+22[10]/47,XY,+8[7]/45,XY,del(3)(p21),del(4)(p12p15),-7,?dup(7)(q11.2q36)[3]", 
"47,XY,+8 [10]/46,XY [10]", "47,XY,+8 [19]", "47,XY,+8 [20]", 
"47,XY,+8[15]/46,+8,-17[3]", "47,XY,+9[10]/46,XY[10]", "47,XY,del(5)(q22q33),t(10;11)(p13~p15;q22~23),i(17)(q10)[3]/46,XY[17]", 
"47,XY,del(7)(q22),+8,t(15;17)(q22;q21)[18]/46,XY,del(7)(q22),t(15;17)(q22;q21)[2]", 
"47,XY+8 [15]/48,XY+8+8[4]/46,XY[1]", "48,XY,+8,+8[16]/46,XY[4]", 
"52~54,XY,+2,+4,+6,+8,del(11)(q23),+19,+19,+21[17]/46,XY[3]", 
"53~56,XY,+1,del(2)(q33q34),+8,+10,+11x2,+13x1-2,+14,del(17)(p11.2),+19,add(21)(q22),+22[cp20]", 
"incomplete-46,XY,del(12)(p11.20[2]/46,XY[3]", "N.D.", "ND", 
"Outside hospital with inv(16)"), class = "factor"), `Cytogenetic-Code--Other-` = structure(c(8L, 
3L, 8L, 8L, 3L, 9L), .Label = c("BCR-ABL1", "CBFB-MYH11", "Complex Cytogenetics", 
"Intermediate Risk Cytogenetic Abnormality", "MLL translocation, poor risk", 
"MLL translocation, t(9;11)", "N.D.", "Normal Karyotype", "PML-RARA", 
"Poor Risk Cytogenetic Abnormality", "RUNX1-RUNX1T1"), class = "factor"), 
    Induction = structure(c(11L, 4L, 1L, 8L, 4L, 9L), .Label = c("7+3", 
    "7+3, dauna", "7+3, IT", "7+3+3", "7+3+3, gleevec", "7+3+3, then 5+2+2", 
    "7+3+3+PSC", "7+3+AMD", "7+3+ATRA", "7+3+dauno", "7+3+Genasense", 
    "7+3+study drug", "7+4+ATRA", "Azacitidine", "CLAM", "Cytarabine only", 
    "Decitabine", "Decitabine then 7+3", "Hydrea & Idarubicin", 
    "Hydrea, ATRA started", "hydrea, didn't get add'l chemo", 
    "LBH/Decitabine", "low dose Ara C", "no treatment", "Revlimid", 
    "Revlmd then Decitbne,7+3,5+2"), class = "factor")), row.names = c(NA, 
6L), class = "data.frame")

My attempt so far

df <- plot_meta %>% group_by(patient) %>% count(Sex,FAB,`Diagnosis-Age`) %>% mutate(Percent = n / sum(n)*100)
head(df)

ggplot(df, aes(x = Sex, y = Percent, fill = FAB))+
  geom_bar(stat = "identity")+
  geom_text(aes(label = paste(Percent,"%"), y = Percent), 
            position = position_stack(vjust = 0.5))+
  coord_flip()+
  labs(x = "Sex", y = "Percentage",fill = "FAB")

The figure Im getting is this

How do I incorporate other variables distribution in the same plot as above? I would like to add the percentage scale on the axis such that a common percent scale for all the variables is there on one axis.

Any help or suggestion would be really appreciated

this figure



Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source