'Spark SQL eliminate non repeating values across tables

I have three tables and expected output highlighted below in green. A ,2 4 adhoc values should not come in the output as it is non repeating in other tables. I need to show only values that are repeating in tables

enter image description here

// package com.allaboutscala.chapter.one.tutorial_04

//     val spark = SparkSession.builder.master("local[2]").appName("kafkaConsumer").getOrCreate()
    import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
    import org.apache.spark.sql.functions._
    import spark.implicits._

    case class Person(T1:String,T2:String,T3:String,T4:String)
//    val sqlContext = new org.apache.spark.sql.SQLContext(sc)
//    import spark.implicits._
    import org.apache.spark.sql.functions._
    val Table1=Seq(Person("A","1","3","adhoc"),Person("A","2","4","adhoc"),Person("A","3","5","adhoc")).toDF()
    val Table2=Seq(Person("A","1","3","adhoc")).toDF()
    val Table3=Seq(Person("A","3","5","adhoc")).toDF()

//    val a=Table1.toDF()
//    val b=Table2.toDF()
//    val c=Table3.toDF()

    Table1.createOrReplaceTempView("a")
    Table2.createOrReplaceTempView("b")
    Table3.createOrReplaceTempView("c")

    val b1=spark.sql(""" select distinct
    a.T1 as tbl1_ta , a.T2 as tbl1_t2 ,a.T3 as tbl1_t3 ,a.T4 as tbl1_t4,
    b.T1 as tbl2_ta , b.T2 as tbl2_t2 ,b.T3 as tbl2_t3 ,b.T4 as tbl2_t4,
    c.T1 as tbl3_ta , c.T2 as tbl3_t2 ,c.T3 as tbl3_t3 ,c.T4 as tbl3_t4   from  a left join  b on a.T1=b.T1 and a.T2=b.T2 and a.T3=b.T3 and a.T4=b.T4 left join  c
on a.T1=c.T1 and a.T2=c.T2 and a.T3=c.T3 and a.T4=c.T4 """)

b1.printSchema

b1.createOrReplaceTempView("b1c")
spark.sql(""" select tbl1_ta,tbl1_t2,tbl1_t3,tbl1_t4,tbl2_ta,tbl2_t2,tbl2_t3,tbl2_t4,tbl3_ta,tbl3_t2,tbl3_t4,tbl3_t4 from b1c group by 
tbl1_ta,tbl1_t2,tbl1_t3,tbl1_t4,tbl2_ta,tbl2_t2,tbl2_t3,tbl2_t4,tbl3_ta,tbl3_t2,tbl3_t4,tbl3_t4 having count(*) >1 """).show(false)

left join gives all rows, tried with group by count > 1 and exclude? used databricks community edition cluster,can anyone give someidea enter image description here



Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source