'Spark SQL eliminate non repeating values across tables
I have three tables and expected output highlighted below in green. A ,2 4 adhoc values should not come in the output as it is non repeating in other tables. I need to show only values that are repeating in tables
// package com.allaboutscala.chapter.one.tutorial_04
// val spark = SparkSession.builder.master("local[2]").appName("kafkaConsumer").getOrCreate()
import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
import org.apache.spark.sql.functions._
import spark.implicits._
case class Person(T1:String,T2:String,T3:String,T4:String)
// val sqlContext = new org.apache.spark.sql.SQLContext(sc)
// import spark.implicits._
import org.apache.spark.sql.functions._
val Table1=Seq(Person("A","1","3","adhoc"),Person("A","2","4","adhoc"),Person("A","3","5","adhoc")).toDF()
val Table2=Seq(Person("A","1","3","adhoc")).toDF()
val Table3=Seq(Person("A","3","5","adhoc")).toDF()
// val a=Table1.toDF()
// val b=Table2.toDF()
// val c=Table3.toDF()
Table1.createOrReplaceTempView("a")
Table2.createOrReplaceTempView("b")
Table3.createOrReplaceTempView("c")
val b1=spark.sql(""" select distinct
a.T1 as tbl1_ta , a.T2 as tbl1_t2 ,a.T3 as tbl1_t3 ,a.T4 as tbl1_t4,
b.T1 as tbl2_ta , b.T2 as tbl2_t2 ,b.T3 as tbl2_t3 ,b.T4 as tbl2_t4,
c.T1 as tbl3_ta , c.T2 as tbl3_t2 ,c.T3 as tbl3_t3 ,c.T4 as tbl3_t4 from a left join b on a.T1=b.T1 and a.T2=b.T2 and a.T3=b.T3 and a.T4=b.T4 left join c
on a.T1=c.T1 and a.T2=c.T2 and a.T3=c.T3 and a.T4=c.T4 """)
b1.printSchema
b1.createOrReplaceTempView("b1c")
spark.sql(""" select tbl1_ta,tbl1_t2,tbl1_t3,tbl1_t4,tbl2_ta,tbl2_t2,tbl2_t3,tbl2_t4,tbl3_ta,tbl3_t2,tbl3_t4,tbl3_t4 from b1c group by
tbl1_ta,tbl1_t2,tbl1_t3,tbl1_t4,tbl2_ta,tbl2_t2,tbl2_t3,tbl2_t4,tbl3_ta,tbl3_t2,tbl3_t4,tbl3_t4 having count(*) >1 """).show(false)
left join gives all rows, tried with group by count > 1 and exclude?
used databricks community edition cluster,can anyone give someidea

Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|

