'Spark sql encoder problem with custom data types
I've encountered problem when trying to use my custom classes/interfaces with Spark datasets (Apache Spark 3.1.2) and Encoders.bean(). First of all - I want to use bean instead of kryo, because I still need to do some column operations.
Here is the exception I'm getting:
2022-05-04 17:42:34 ERROR CodeGenerator:94 - failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 95, Column 52: Unknown variable or type "testing.codegen.FeatureAttribution$.MODULE$"
org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 95, Column 52: Unknown variable or type "testing.codegen.FeatureAttribution$.MODULE$"
at org.codehaus.janino.UnitCompiler.compileError(UnitCompiler.java:12021)
at org.codehaus.janino.UnitCompiler.getType2(UnitCompiler.java:6905)
at org.codehaus.janino.UnitCompiler.access$14000(UnitCompiler.java:226)
at org.codehaus.janino.UnitCompiler$22.visitPackage(UnitCompiler.java:6493)
at org.codehaus.janino.UnitCompiler$22.visitPackage(UnitCompiler.java:6490)
at org.codehaus.janino.Java$Package.accept(Java.java:4453)
at org.codehaus.janino.UnitCompiler.getType(UnitCompiler.java:6490)
at org.codehaus.janino.UnitCompiler.getType2(UnitCompiler.java:6900)
at org.codehaus.janino.UnitCompiler.access$14600(UnitCompiler.java:226)
at org.codehaus.janino.UnitCompiler$22$2$1.visitAmbiguousName(UnitCompiler.java:6518)
at org.codehaus.janino.UnitCompiler$22$2$1.visitAmbiguousName(UnitCompiler.java:6515)
at org.codehaus.janino.Java$AmbiguousName.accept(Java.java:4429)
at org.codehaus.janino.UnitCompiler$22$2.visitLvalue(UnitCompiler.java:6515)
at org.codehaus.janino.UnitCompiler$22$2.visitLvalue(UnitCompiler.java:6511)
at org.codehaus.janino.Java$Lvalue.accept(Java.java:4353)
at org.codehaus.janino.UnitCompiler$22.visitRvalue(UnitCompiler.java:6511)
at org.codehaus.janino.UnitCompiler$22.visitRvalue(UnitCompiler.java:6490)
at org.codehaus.janino.Java$Rvalue.accept(Java.java:4321)
at org.codehaus.janino.UnitCompiler.getType(UnitCompiler.java:6490)
at org.codehaus.janino.UnitCompiler.findIMethod(UnitCompiler.java:9110)
at org.codehaus.janino.UnitCompiler.compileGet2(UnitCompiler.java:5055)
at org.codehaus.janino.UnitCompiler.access$9100(UnitCompiler.java:226)
at org.codehaus.janino.UnitCompiler$16.visitMethodInvocation(UnitCompiler.java:4482)
at org.codehaus.janino.UnitCompiler$16.visitMethodInvocation(UnitCompiler.java:4455)
at org.codehaus.janino.Java$MethodInvocation.accept(Java.java:5286)
at org.codehaus.janino.UnitCompiler.compileGet(UnitCompiler.java:4455)
at org.codehaus.janino.UnitCompiler.compileGetValue(UnitCompiler.java:5683)
at org.codehaus.janino.UnitCompiler.compileGet2(UnitCompiler.java:4672)
at org.codehaus.janino.UnitCompiler.access$8800(UnitCompiler.java:226)
at org.codehaus.janino.UnitCompiler$16.visitConditionalExpression(UnitCompiler.java:4479)
at org.codehaus.janino.UnitCompiler$16.visitConditionalExpression(UnitCompiler.java:4455)
at org.codehaus.janino.Java$ConditionalExpression.accept(Java.java:4728)
at org.codehaus.janino.UnitCompiler.compileGet(UnitCompiler.java:4455)
at org.codehaus.janino.UnitCompiler.compileGetValue(UnitCompiler.java:5683)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:2581)
at org.codehaus.janino.UnitCompiler.access$2700(UnitCompiler.java:226)
at org.codehaus.janino.UnitCompiler$6.visitLocalVariableDeclarationStatement(UnitCompiler.java:1506)
at org.codehaus.janino.UnitCompiler$6.visitLocalVariableDeclarationStatement(UnitCompiler.java:1490)
at org.codehaus.janino.Java$LocalVariableDeclarationStatement.accept(Java.java:3712)
at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1490)
at org.codehaus.janino.UnitCompiler.compileStatements(UnitCompiler.java:1573)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:1559)
at org.codehaus.janino.UnitCompiler.access$1700(UnitCompiler.java:226)
at org.codehaus.janino.UnitCompiler$6.visitBlock(UnitCompiler.java:1496)
at org.codehaus.janino.UnitCompiler$6.visitBlock(UnitCompiler.java:1490)
at org.codehaus.janino.Java$Block.accept(Java.java:2969)
at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1490)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:2478)
at org.codehaus.janino.UnitCompiler.access$1900(UnitCompiler.java:226)
at org.codehaus.janino.UnitCompiler$6.visitIfStatement(UnitCompiler.java:1498)
at org.codehaus.janino.UnitCompiler$6.visitIfStatement(UnitCompiler.java:1490)
at org.codehaus.janino.Java$IfStatement.accept(Java.java:3140)
at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1490)
at org.codehaus.janino.UnitCompiler.compileStatements(UnitCompiler.java:1573)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:1559)
at org.codehaus.janino.UnitCompiler.access$1700(UnitCompiler.java:226)
at org.codehaus.janino.UnitCompiler$6.visitBlock(UnitCompiler.java:1496)
at org.codehaus.janino.UnitCompiler$6.visitBlock(UnitCompiler.java:1490)
at org.codehaus.janino.Java$Block.accept(Java.java:2969)
at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1490)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:1848)
at org.codehaus.janino.UnitCompiler.access$2200(UnitCompiler.java:226)
at org.codehaus.janino.UnitCompiler$6.visitWhileStatement(UnitCompiler.java:1501)
at org.codehaus.janino.UnitCompiler$6.visitWhileStatement(UnitCompiler.java:1490)
at org.codehaus.janino.Java$WhileStatement.accept(Java.java:3245)
at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1490)
at org.codehaus.janino.UnitCompiler.compileStatements(UnitCompiler.java:1573)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:1559)
at org.codehaus.janino.UnitCompiler.access$1700(UnitCompiler.java:226)
at org.codehaus.janino.UnitCompiler$6.visitBlock(UnitCompiler.java:1496)
at org.codehaus.janino.UnitCompiler$6.visitBlock(UnitCompiler.java:1490)
at org.codehaus.janino.Java$Block.accept(Java.java:2969)
at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1490)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:2486)
at org.codehaus.janino.UnitCompiler.access$1900(UnitCompiler.java:226)
at org.codehaus.janino.UnitCompiler$6.visitIfStatement(UnitCompiler.java:1498)
at org.codehaus.janino.UnitCompiler$6.visitIfStatement(UnitCompiler.java:1490)
at org.codehaus.janino.Java$IfStatement.accept(Java.java:3140)
at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:1490)
at org.codehaus.janino.UnitCompiler.compileStatements(UnitCompiler.java:1573)
at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:3420)
at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1362)
at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1335)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:807)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:975)
at org.codehaus.janino.UnitCompiler.access$700(UnitCompiler.java:226)
at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:392)
at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:384)
at org.codehaus.janino.Java$MemberClassDeclaration.accept(Java.java:1445)
at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:384)
at org.codehaus.janino.UnitCompiler.compileDeclaredMemberTypes(UnitCompiler.java:1312)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:833)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:410)
at org.codehaus.janino.UnitCompiler.access$400(UnitCompiler.java:226)
at org.codehaus.janino.UnitCompiler$2.visitPackageMemberClassDeclaration(UnitCompiler.java:389)
at org.codehaus.janino.UnitCompiler$2.visitPackageMemberClassDeclaration(UnitCompiler.java:384)
at org.codehaus.janino.Java$PackageMemberClassDeclaration.accept(Java.java:1594)
at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:384)
at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:362)
at org.codehaus.janino.UnitCompiler.access$000(UnitCompiler.java:226)
at org.codehaus.janino.UnitCompiler$1.visitCompilationUnit(UnitCompiler.java:336)
at org.codehaus.janino.UnitCompiler$1.visitCompilationUnit(UnitCompiler.java:333)
at org.codehaus.janino.Java$CompilationUnit.accept(Java.java:363)
at org.codehaus.janino.UnitCompiler.compileUnit(UnitCompiler.java:333)
at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:235)
at org.codehaus.janino.SimpleCompiler.compileToClassLoader(SimpleCompiler.java:464)
at org.codehaus.janino.ClassBodyEvaluator.compileToClass(ClassBodyEvaluator.java:314)
at org.codehaus.janino.ClassBodyEvaluator.cook(ClassBodyEvaluator.java:237)
at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:205)
at org.codehaus.commons.compiler.Cookable.cook(Cookable.java:80)
at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:1404)
at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:1501)
at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:1498)
at org.sparkproject.guava.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599)
at org.sparkproject.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2379)
at org.sparkproject.guava.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342)
at org.sparkproject.guava.cache.LocalCache$Segment.get(LocalCache.java:2257)
at org.sparkproject.guava.cache.LocalCache.get(LocalCache.java:4000)
at org.sparkproject.guava.cache.LocalCache.getOrLoad(LocalCache.java:4004)
at org.sparkproject.guava.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874)
at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.compile(CodeGenerator.scala:1352)
at org.apache.spark.sql.catalyst.expressions.codegen.GenerateSafeProjection$.create(GenerateSafeProjection.scala:205)
at org.apache.spark.sql.catalyst.expressions.codegen.GenerateSafeProjection$.create(GenerateSafeProjection.scala:39)
at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.generate(CodeGenerator.scala:1278)
at org.apache.spark.sql.catalyst.expressions.SafeProjection$.createCodeGeneratedObject(Projection.scala:170)
at org.apache.spark.sql.catalyst.expressions.SafeProjection$.createCodeGeneratedObject(Projection.scala:167)
at org.apache.spark.sql.catalyst.expressions.CodeGeneratorWithInterpretedFallback.createObject(CodeGeneratorWithInterpretedFallback.scala:52)
at org.apache.spark.sql.catalyst.expressions.SafeProjection$.create(Projection.scala:193)
at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$Deserializer.apply(ExpressionEncoder.scala:180)
at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$Deserializer.apply(ExpressionEncoder.scala:173)
at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238)
at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198)
at scala.collection.TraversableLike.map(TraversableLike.scala:238)
at scala.collection.TraversableLike.map$(TraversableLike.scala:231)
at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:198)
at org.apache.spark.sql.Dataset.collectFromPlan(Dataset.scala:3696)
at org.apache.spark.sql.Dataset.$anonfun$collectAsList$1(Dataset.scala:2977)
at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3687)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3685)
at org.apache.spark.sql.Dataset.collectAsList(Dataset.scala:2976)
at testing.codegen.Runner.run(Runner.java:54)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
at java.lang.reflect.Method.invoke(Unknown Source)
at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:59)
at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:56)
at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306)
at org.junit.runners.BlockJUnit4ClassRunner$1.evaluate(BlockJUnit4ClassRunner.java:100)
at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:366)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:103)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:63)
at org.junit.runners.ParentRunner$4.run(ParentRunner.java:331)
at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79)
at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:329)
at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66)
at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:293)
at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306)
at org.junit.runners.ParentRunner.run(ParentRunner.java:413)
at org.eclipse.jdt.internal.junit4.runner.JUnit4TestReference.run(JUnit4TestReference.java:93)
at org.eclipse.jdt.internal.junit.runner.TestExecution.run(TestExecution.java:40)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:529)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:756)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.run(RemoteTestRunner.java:452)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.main(RemoteTestRunner.java:210)
2022-05-04 17:42:34 INFO CodeGenerator:57 -
/* 001 */ public java.lang.Object generate(Object[] references) {
/* 002 */ return new SpecificSafeProjection(references);
/* 003 */ }
/* 004 */
/* 005 */ class SpecificSafeProjection extends org.apache.spark.sql.catalyst.expressions.codegen.BaseProjection {
/* 006 */
/* 007 */ private Object[] references;
/* 008 */ private InternalRow mutableRow;
/* 009 */ private InternalRow value_MapObject_lambda_variable_1;
/* 010 */ private boolean isNull_MapObject_lambda_variable_1;
/* 011 */ private boolean globalIsNull_0;
/* 012 */
/* 013 */ public SpecificSafeProjection(Object[] references) {
/* 014 */ this.references = references;
/* 015 */ mutableRow = (InternalRow) references[references.length - 1];
/* 016 */
/* 017 */
/* 018 */ }
/* 019 */
/* 020 */ public void initialize(int partitionIndex) {
/* 021 */
/* 022 */ }
/* 023 */
/* 024 */ public java.lang.Object apply(java.lang.Object _i) {
/* 025 */ InternalRow i = (InternalRow) _i;
/* 026 */ final testing.codegen.Feature value_1 = false ?
/* 027 */ null : new testing.codegen.Feature();
/* 028 */ testing.codegen.Feature javaBean_0 = value_1;
/* 029 */ if (!false) {
/* 030 */
/* 031 */ ArrayData value_10 = MapObjects_0(i);
/* 032 */ boolean isNull_2 = true;
/* 033 */ testing.codegen.FeatureAttribution[] value_2 = null;
/* 034 */ if (!globalIsNull_0) {
/* 035 */
/* 036 */ isNull_2 = false;
/* 037 */ if (!isNull_2) {
/* 038 */
/* 039 */ Object funcResult_0 = null;
/* 040 */ funcResult_0 = value_10.array();
/* 041 */
/* 042 */ if (funcResult_0 != null) {
/* 043 */ value_2 = (testing.codegen.FeatureAttribution[]) funcResult_0;
/* 044 */ } else {
/* 045 */ isNull_2 = true;
/* 046 */ }
/* 047 */
/* 048 */
/* 049 */ }
/* 050 */ }
/* 051 */ if (!isNull_2) {
/* 052 */ javaBean_0.setFeatureAttributes(value_2);
/* 053 */ }
/* 054 */
/* 055 */ }
/* 056 */ if (false) {
/* 057 */ mutableRow.setNullAt(0);
/* 058 */ } else {
/* 059 */
/* 060 */ mutableRow.update(0, value_1);
/* 061 */ }
/* 062 */
/* 063 */ return mutableRow;
/* 064 */ }
/* 065 */
/* 066 */
/* 067 */ private ArrayData MapObjects_0(InternalRow i) {
/* 068 */ boolean isNull_4 = i.isNullAt(0);
/* 069 */ ArrayData value_4 = isNull_4 ?
/* 070 */ null : (i.getArray(0));
/* 071 */ ArrayData value_3 = null;
/* 072 */
/* 073 */ if (!isNull_4) {
/* 074 */
/* 075 */ int dataLength_0 = value_4.numElements();
/* 076 */
/* 077 */ testing.codegen.FeatureAttribution[] convertedArray_0 = null;
/* 078 */ convertedArray_0 = new testing.codegen.FeatureAttribution[dataLength_0];
/* 079 */
/* 080 */
/* 081 */ int loopIndex_0 = 0;
/* 082 */
/* 083 */ while (loopIndex_0 < dataLength_0) {
/* 084 */ value_MapObject_lambda_variable_1 = (InternalRow) (value_4.getStruct(loopIndex_0, 0));
/* 085 */ isNull_MapObject_lambda_variable_1 = value_4.isNullAt(loopIndex_0);
/* 086 */
/* 087 */ boolean isNull_5 = false;
/* 088 */ testing.codegen.FeatureAttribution value_5 = null;
/* 089 */ if (!false && isNull_MapObject_lambda_variable_1) {
/* 090 */
/* 091 */ isNull_5 = true;
/* 092 */ value_5 = ((testing.codegen.FeatureAttribution)null);
/* 093 */ } else {
/* 094 */ final testing.codegen.FeatureAttribution value_9 = false ?
/* 095 */ null : testing.codegen.FeatureAttribution$.MODULE$.apply();
/* 096 */ testing.codegen.FeatureAttribution javaBean_1 = value_9;
/* 097 */ if (!false) {
/* 098 */
/* 099 */ }
/* 100 */ isNull_5 = false;
/* 101 */ value_5 = value_9;
/* 102 */ }
/* 103 */ if (isNull_5) {
/* 104 */ convertedArray_0[loopIndex_0] = null;
/* 105 */ } else {
/* 106 */ convertedArray_0[loopIndex_0] = value_5;
/* 107 */ }
/* 108 */
/* 109 */ loopIndex_0 += 1;
/* 110 */ }
/* 111 */
/* 112 */ value_3 = new org.apache.spark.sql.catalyst.util.GenericArrayData(convertedArray_0);
/* 113 */ }
/* 114 */ globalIsNull_0 = isNull_4;
/* 115 */ return value_3;
/* 116 */ }
/* 117 */
/* 118 */ }
Here is simplified code to simulate what I'm doing:
StructType schema = new StructType(new StructField[] {
new StructField("TYPE", DataTypes.IntegerType, false, Metadata.empty()),
new StructField("SOME", DataTypes.StringType, false, Metadata.empty()) });
List<Row> rowsList = new ArrayList<>();
rowsList.add(RowFactory.create(1, "TEST1"));
rowsList.add(RowFactory.create(2, "TEST2"));
SparkSession spark = SparkSession.builder().config("spark.master", "local").appName("test").getOrCreate();
Dataset<Row> dataset = spark.createDataFrame(rowsList, schema);
List<FeatureAttribution> reqAttributions = Arrays.asList(TypeAttribution.PROTOTYPE, SomeAttribution.PROTOTYPE);
FlatMapFunction<Row, Feature> processFunction = r -> rowToFeatures(r, reqAttributions);
Dataset<Feature> tiledFeatures = dataset.flatMap(
(FlatMapFunction<Row, Feature>) processFunction, Encoders.bean(Feature.class)); // Encoders.bean(WorldMapFeature.class));
tiledFeatures.foreach(f -> {
LOG.info("feature_dataset: {}", f);
});
List<Feature> res = tiledFeatures.collectAsList();
res.forEach(f -> {
LOG.info("feature_collected: {}", f);
});
public static Iterator<Feature> rowToFeatures(Row row, List<FeatureAttribution> requiredAttributions) {
List<Feature> features = new ArrayList<>();
List<FeatureAttribution> featureAttributions = new ArrayList<>();
for(FeatureAttribution reqAttr : requiredAttributions) {
featureAttributions.add(reqAttr.createNew(row));
}
Feature newFeature = new Feature();
newFeature.setFeatureAttributes(featureAttributions.toArray(new FeatureAttribution[0]));
features.add(newFeature);
return features.iterator();
}
classes:
public class Feature extends AbstractFeature {
private FeatureAttribution[] featureAttributes;
public FeatureAttribution[] getFeatureAttributes() {
return featureAttributes;
}
public void setFeatureAttributes(FeatureAttribution[] featureAttributes) {
this.featureAttributes = featureAttributes;
}
@Override
public String toString() {
return "Feature [featureAttributes=" + Arrays.toString(featureAttributes) + "]";
}
}
public interface FeatureAttribution extends Serializable{
FeatureAttribution createNew(Row row);
}
public class TypeAttribution implements FeatureAttribution {
public static TypeAttribution PROTOTYPE = new TypeAttribution();
private int type;
...
}
public class SomeAttribution implements FeatureAttribution {
public static SomeAttribution PROTOTYPE = new SomeAttribution();
private String someData;
...
}
First log will get me what I expect:
Runner:51 - feature_dataset: Feature [featureAttributes=[TypeAttribution [type=1], SomeAttribution [someData=TEST1]]]
Runner:51 - feature_dataset: Feature [featureAttributes=[TypeAttribution [type=2], SomeAttribution [someData=TEST2]]]
Then collect with fail with that encoder spark code generation problem. Evidently generated code is not correct. I there a way around it? When using know types in Feature e.g. List instead of FeatureAttribution[], everything works, but custom type (especially interface) is evidently a problem.
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
