Uploaded image for project: 'Spark'
  1. Spark
  2. SPARK-29561

Large Case Statement Code Generation OOM

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Resolved
    • Major
    • Resolution: Incomplete
    • 2.3.0
    • None
    • SQL

    Description

      Spark Configuration

      spark.driver.memory = 1g
      spark.master = "local"
      spark.deploy.mode = "client"

      Try to execute a case statement with 3000+ branches. Added sql statement as attachment
      Spark runs for a while before it OOM

      java.lang.OutOfMemoryError: GC overhead limit exceeded
      	at org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1.apply$mcV$sp(ContextCleaner.scala:182)
      	at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1320)
      	at org.apache.spark.ContextCleaner.org$apache$spark$ContextCleaner$$keepCleaning(ContextCleaner.scala:178)
      	at org.apache.spark.ContextCleaner$$anon$1.run(ContextCleaner.scala:73)
      19/10/22 16:19:54 ERROR FileFormatWriter: Aborting job null.
      java.lang.OutOfMemoryError: GC overhead limit exceeded
      	at java.util.HashMap.newNode(HashMap.java:1750)
      	at java.util.HashMap.putVal(HashMap.java:631)
      	at java.util.HashMap.putMapEntries(HashMap.java:515)
      	at java.util.HashMap.putAll(HashMap.java:785)
      	at org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3345)
      	at org.codehaus.janino.UnitCompiler.access$5000(UnitCompiler.java:212)
      	at org.codehaus.janino.UnitCompiler$8.visitLocalVariableDeclarationStatement(UnitCompiler.java:3230)
      	at org.codehaus.janino.UnitCompiler$8.visitLocalVariableDeclarationStatement(UnitCompiler.java:3198)
      	at org.codehaus.janino.Java$LocalVariableDeclarationStatement.accept(Java.java:3351)
      	at org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3197)
      	at org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3254)
      	at org.codehaus.janino.UnitCompiler.access$3900(UnitCompiler.java:212)
      	at org.codehaus.janino.UnitCompiler$8.visitBlock(UnitCompiler.java:3216)
      	at org.codehaus.janino.UnitCompiler$8.visitBlock(UnitCompiler.java:3198)
      	at org.codehaus.janino.Java$Block.accept(Java.java:2756)
      	at org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3197)
      	at org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3260)
      	at org.codehaus.janino.UnitCompiler.access$4000(UnitCompiler.java:212)
      	at org.codehaus.janino.UnitCompiler$8.visitDoStatement(UnitCompiler.java:3217)
      	at org.codehaus.janino.UnitCompiler$8.visitDoStatement(UnitCompiler.java:3198)
      	at org.codehaus.janino.Java$DoStatement.accept(Java.java:3304)
      	at org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3197)
      	at org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:3186)
      	at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:3009)
      	at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1336)
      	at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1309)
      	at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:799)
      	at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:958)
      	at org.codehaus.janino.UnitCompiler.access$700(UnitCompiler.java:212)
      	at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:393)
      	at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:385)
      	at org.codehaus.janino.Java$MemberClassDeclaration.accept(Java.java:1286)
      19/10/22 16:19:54 ERROR Utils: throw uncaught fatal error in thread Spark Context Cleaner
      java.lang.OutOfMemoryError: GC overhead limit exceeded
      	at org.apache.spark.ContextCleaner$$anonfun$org$apache$spark$ContextCleaner$$keepCleaning$1.apply$mcV$sp(ContextCleaner.scala:182)
      	at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1320)
      	at org.apache.spark.ContextCleaner.org$apache$spark$ContextCleaner$$keepCleaning(ContextCleaner.scala:178)
      	at org.apache.spark.ContextCleaner$$anon$1.run(ContextCleaner.scala:73)

       Generated code looks like

      /* 029 */   private void project_doConsume(InternalRow scan_row, UTF8String project_expr_0, boolean project_exprIsNull_0) throws java.io.IOException {
      /* 030 */     byte project_caseWhenResultState = -1;
      /* 031 */     do {
      /* 032 */       boolean project_isNull1 = true;
      /* 033 */       boolean project_value1 = false;
      /* 034 */
      /* 035 */       boolean project_isNull2 = project_exprIsNull_0;
      /* 036 */       int project_value2 = -1;
      /* 037 */       if (!project_exprIsNull_0) {
      /* 038 */         UTF8String.IntWrapper project_intWrapper = new UTF8String.IntWrapper();
      /* 039 */         if (project_expr_0.toInt(project_intWrapper)) {
      /* 040 */           project_value2 = project_intWrapper.value;
      /* 041 */         } else {
      /* 042 */           project_isNull2 = true;
      /* 043 */         }
      /* 044 */         project_intWrapper = null;
      /* 045 */
      /* 046 */       }

      ... bunch of lines ...

      if (!project_isNull15002) {
      /* 78048 */         project_isNull15001 = false; // resultCode could change nullability.
      /* 78049 */         project_value15001 = project_value15002 == 3000;
      /* 78050 */
      /* 78051 */       }
      /* 78052 */       if (!project_isNull15001 && project_value15001) {
      /* 78053 */         project_caseWhenResultState = (byte)(false ? 1 : 0);
      /* 78054 */         project_project_value = -3000;
      /* 78055 */         continue;
      /* 78056 */       }
      /* 78057 */
      /* 78058 */     } while (false);
      /* 78059 */     // TRUE if any condition is met and the result is null, or no any condition is met.
      /* 78060 */     final boolean project_isNull = (project_caseWhenResultState != 0);
      /* 78061 */     project_mutableStateArray2[0].zeroOutNullBytes();
      /* 78062 */
      /* 78063 */     if (project_isNull) {
      /* 78064 */       project_mutableStateArray2[0].setNullAt(0);
      /* 78065 */     } else {
      /* 78066 */       project_mutableStateArray2[0].write(0, project_project_value);
      /* 78067 */     }
      /* 78068 */     append(project_mutableStateArray[0]);
      /* 78069 */
      /* 78070 */   }
      /* 78071 */
      /* 78072 */   protected void processNext() throws java.io.IOException {
      /* 78073 */     while (scan_mutableStateArray[0].hasNext()) {
      /* 78074 */       InternalRow scan_row = (InternalRow) scan_mutableStateArray[0].next();
      /* 78075 */       ((org.apache.spark.sql.execution.metric.SQLMetric) references[0] /* numOutputRows */).add(1);
      /* 78076 */       boolean scan_isNull = scan_row.isNullAt(0);
      /* 78077 */       UTF8String scan_value = scan_isNull ? null : (scan_row.getUTF8String(0));
      /* 78078 */
      /* 78079 */       project_doConsume(scan_row, scan_value, scan_isNull);
      /* 78080 */       if (shouldStop()) return;
      /* 78081 */     }
      /* 78082 */   }
      /* 78083 */
      /* 78084 */ }

       

      Attachments

        1. apacheSparkCase.sql
          83 kB
          Michael Chen

        Activity

          People

            Unassigned Unassigned
            mikechen Michael Chen
            Votes:
            0 Vote for this issue
            Watchers:
            2 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: