Uploaded image for project: 'Spark'
  1. Spark
  2. SPARK-29044

Resolved attribute(s) R#661751,residue#661752 missing from ipi#660814,residue#660731,exper_set#660827,R#660730,description#660815,sequence#660817,exper#660828,symbol#660816

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Resolved
    • Major
    • Resolution: Incomplete
    • 2.4.3
    • None
    • PySpark, SQL

    Description

      SELECT group_averages.*
      FROM group_averages
      NATURAL INNER JOIN (
          SELECT MAX(R) AS max_R, ipi AS ipi, description AS description, symbol AS symbol, residue
          FROM group_averages
          GROUP BY ipi, description, symbol, residue
      ) AS all_rows_bigger_than_four
      WHERE all_rows_bigger_than_four.max_R >= 4.0
      

      causes,

      ---------------------------------------------------------------------------
      Py4JJavaError                             Traceback (most recent call last)
      /usr/local/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
           62         try:
      ---> 63             return f(*a, **kw)
           64         except py4j.protocol.Py4JJavaError as e:
      
      /usr/local/spark/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
          327                     "An error occurred while calling {0}{1}{2}.\n".
      --> 328                     format(target_id, ".", name), value)
          329             else:
      
      Py4JJavaError: An error occurred while calling o21.sql.
      : org.apache.spark.sql.AnalysisException: Resolved attribute(s) R#661751,residue#661752 missing from ipi#660814,residue#660731,exper_set#660827,R#660730,description#660815,sequence#660817,exper#660828,symbol#660816 in operator !Project [ipi#660814, symbol#660816, description#660815, sequence#660817, R#661751, exper#660828, exper_set#660827, residue#661752]. Attribute(s) with the same name appear in the operation: R,residue. Please check if the right attribute(s) are used.;;
      Project [ipi#660546, description#660547, symbol#660548, residue#660731, group_description#660716, total_residues_detected#660809L, num_datasets#660810L, R#660811]
      +- Filter (max_R#661746 >= cast(4.0 as double))
         +- Project [ipi#660546, description#660547, symbol#660548, residue#660731, group_description#660716, total_residues_detected#660809L, num_datasets#660810L, R#660811, max_R#661746]
            +- Join Inner, ((((ipi#660546 = ipi#661747) && (description#660547 = description#661748)) && (symbol#660548 = symbol#661749)) && (residue#660731 = residue#661752))
               :- SubqueryAlias `group_averages`
               :  +- Filter (num_datasets#660810L > cast(1 as bigint))
               :     +- Aggregate [ipi#660546, description#660547, symbol#660548, residue#660731, exper_set#660559, group_description#660716, total_residues_detected#660809L], [ipi#660546, description#660547, symbol#660548, residue#660731, group_description#660716, total_residues_detected#660809L, count(R#660758) AS num_datasets#660810L, CASE WHEN (stddev_samp(R#660758) < (cast(0.6 as double) * avg(R#660758))) THEN avg(R#660758) ELSE CASE WHEN (min(R#660758) < cast(4 as double)) THEN min(R#660758) ELSE avg(R#660758) END END AS R#660811]
               :        +- Project [ipi#660546, description#660547, symbol#660548, exper_set#660559, exper#660560, residue#660731, group_description#660716, R#660758, total_residues_detected#660809L]
               :           +- Join Inner, (((ipi#660546 = ipi#660814) && (description#660547 = description#660815)) && (symbol#660548 = symbol#660816))
               :              :- SubqueryAlias `table_by_residue`
               :              :  +- Aggregate [ipi#660546, description#660547, symbol#660548, residue#660731, exper_set#660559, exper#660560, group_description#660716], [ipi#660546, description#660547, symbol#660548, exper_set#660559, exper#660560, residue#660731, group_description#660716, CASE WHEN (stddev_samp(R#660730) < (cast(0.6 as double) * avg(R#660730))) THEN avg(R#660730) ELSE CASE WHEN (min(R#660730) < cast(4 as double)) THEN min(R#660730) ELSE avg(R#660730) END END AS R#660758]
               :              :     +- Join Inner, (exper#660560 = Cimage link#660715)
               :              :        :- SubqueryAlias `table_by_peptide`
               :              :        :  +- Project [ipi#660546, symbol#660548, description#660547, sequence#660549, R#660730, exper#660560, exper_set#660559, residue#660731]
               :              :        :     +- Sort [ipi#660546 ASC NULLS FIRST], true
               :              :        :        +- Aggregate [exper#660560, ipi#660546, ((instr(protein_sequence#660699, regexp_replace(sequence#660549, [.*-], )) + instr(sequence#660549, *)) - 3), symbol#660548, exper_set#660559, sp#660696, sequence#660549, charge#660551, description#660547], [ipi#660546, symbol#660548, description#660547, sequence#660549, avg(cast(IR#660553 as double)) AS R#660730, exper#660560, exper_set#660559, ((instr(protein_sequence#660699, regexp_replace(sequence#660549, [.*-], )) + instr(sequence#660549, *)) - 3) AS residue#660731]
               :              :        :           +- Filter (((exper_set#660559 = Metabolites) && NOT sp#660696 LIKE Reverse%) && (sequence#660549 LIKE %C*% && (cast(R2#660556 as double) > cast(0.8 as double))))
               :              :        :              +- Join Inner, (ipi#660546 = ipi#660697)
               :              :        :                 :- SubqueryAlias `uniprot_sequences`
               :              :        :                 :  +- Relation[sp#660696,ipi#660697,origin_protein_description#660698,protein_sequence#660699] csv
               :              :        :                 +- SubqueryAlias `joined_spectral`
               :              :        :                    +- Project [ipi#660546, description#660547, symbol#660548, sequence#660549, mass#660550, charge#660551, segment#660552, IR#660553, INT#660554, NP#660555, R2#660556, entry#660557, exper#660560, exper_set#660559]
               :              :        :                       +- Filter (NOT ipi#660546 LIKE %Reverse% && ((spectral_counts#660646L > cast(1 as bigint)) || (cast(IR#660553 as int) < 20)))
               :              :        :                          +- Join Inner, (((sequence#660549 = sequence#660591) && (exper_set#660559 = exper_set#660592)) && (exper#660560 = exper#660593))
               :              :        :                             :- SubqueryAlias `raw_output_dta`
               :              :        :                             :  +- Relation[index#660545,ipi#660546,description#660547,symbol#660548,sequence#660549,mass#660550,charge#660551,segment#660552,IR#660553,INT#660554,NP#660555,R2#660556,entry#660557,link#660558,exper_set#660559,exper#660560] parquet
               :              :        :                             +- SubqueryAlias `spectral_data`
               :              :        :                                +- Aggregate [exper_set#660592, exper#660593, Sequence#660591], [Sequence#660591, exper#660593, exper_set#660592, count(Sequence#660591) AS spectral_counts#660646L]
               :              :        :                                   +- SubqueryAlias `clean`
               :              :        :                                      +- Project [locus#660577, sequence_count#660578, spectrum_count#660579, sequence_coverage#660580, length#660581, molwt#660582, pi#660583, validation_status#660584, nsaf#660585, empai#660586, descriptive_name#660587, hredundancy#660588, lredundancy#660589, mredundancy#660590, Sequence#660591, exper_set#660592, exper#660593]
               :              :        :                                         +- Filter Sequence#660591 RLIKE ^[KR-].[^.]+(?:[KR]..|..-)$
               :              :        :                                            +- SubqueryAlias `raw_spectral_dta`
               :              :        :                                               +- Relation[locus#660577,sequence_count#660578,spectrum_count#660579,sequence_coverage#660580,length#660581,molwt#660582,pi#660583,validation_status#660584,nsaf#660585,empai#660586,descriptive_name#660587,hredundancy#660588,lredundancy#660589,mredundancy#660590,Sequence#660591,exper_set#660592,exper#660593] parquet
               :              :        +- SubqueryAlias `group_names_separated`
               :              :           +- Relation[Group ##660714,Cimage link#660715,group_description#660716,cell_type#660717] csv
               :              +- SubqueryAlias `occurances_table`
               :                 +- Aggregate [ipi#660814, description#660815, symbol#660816], [count(distinct residue#660731) AS total_residues_detected#660809L, ipi#660814, description#660815, symbol#660816]
               :                    +- SubqueryAlias `table_by_residue`
               :                       +- Aggregate [ipi#660814, description#660815, symbol#660816, residue#660731, exper_set#660827, exper#660828, group_description#660716], [ipi#660814, description#660815, symbol#660816, exper_set#660827, exper#660828, residue#660731, group_description#660716, CASE WHEN (stddev_samp(R#660730) < (cast(0.6 as double) * avg(R#660730))) THEN avg(R#660730) ELSE CASE WHEN (min(R#660730) < cast(4 as double)) THEN min(R#660730) ELSE avg(R#660730) END END AS R#660758]
               :                          +- Join Inner, (exper#660828 = Cimage link#660715)
               :                             :- SubqueryAlias `table_by_peptide`
               :                             :  +- Project [ipi#660814, symbol#660816, description#660815, sequence#660817, R#660730, exper#660828, exper_set#660827, residue#660731]
               :                             :     +- Sort [ipi#660814 ASC NULLS FIRST], true
               :                             :        +- Aggregate [exper#660828, ipi#660814, ((instr(protein_sequence#660699, regexp_replace(sequence#660817, [.*-], )) + instr(sequence#660817, *)) - 3), symbol#660816, exper_set#660827, sp#660696, sequence#660817, charge#660819, description#660815], [ipi#660814, symbol#660816, description#660815, sequence#660817, avg(cast(IR#660821 as double)) AS R#660730, exper#660828, exper_set#660827, ((instr(protein_sequence#660699, regexp_replace(sequence#660817, [.*-], )) + instr(sequence#660817, *)) - 3) AS residue#660731]
               :                             :           +- Filter (((exper_set#660827 = Metabolites) && NOT sp#660696 LIKE Reverse%) && (sequence#660817 LIKE %C*% && (cast(R2#660824 as double) > cast(0.8 as double))))
               :                             :              +- Join Inner, (ipi#660814 = ipi#660697)
               :                             :                 :- SubqueryAlias `uniprot_sequences`
               :                             :                 :  +- Relation[sp#660696,ipi#660697,origin_protein_description#660698,protein_sequence#660699] csv
               :                             :                 +- SubqueryAlias `joined_spectral`
               :                             :                    +- Project [ipi#660814, description#660815, symbol#660816, sequence#660817, mass#660818, charge#660819, segment#660820, IR#660821, INT#660822, NP#660823, R2#660824, entry#660825, exper#660828, exper_set#660827]
               :                             :                       +- Filter (NOT ipi#660814 LIKE %Reverse% && ((spectral_counts#660646L > cast(1 as bigint)) || (cast(IR#660821 as int) < 20)))
               :                             :                          +- Join Inner, (((sequence#660817 = sequence#660591) && (exper_set#660827 = exper_set#660592)) && (exper#660828 = exper#660593))
               :                             :                             :- SubqueryAlias `raw_output_dta`
               :                             :                             :  +- Relation[index#660813,ipi#660814,description#660815,symbol#660816,sequence#660817,mass#660818,charge#660819,segment#660820,IR#660821,INT#660822,NP#660823,R2#660824,entry#660825,link#660826,exper_set#660827,exper#660828] parquet
               :                             :                             +- SubqueryAlias `spectral_data`
               :                             :                                +- Aggregate [exper_set#660592, exper#660593, Sequence#660591], [Sequence#660591, exper#660593, exper_set#660592, count(Sequence#660591) AS spectral_counts#660646L]
               :                             :                                   +- SubqueryAlias `clean`
               :                             :                                      +- Project [locus#660577, sequence_count#660578, spectrum_count#660579, sequence_coverage#660580, length#660581, molwt#660582, pi#660583, validation_status#660584, nsaf#660585, empai#660586, descriptive_name#660587, hredundancy#660588, lredundancy#660589, mredundancy#660590, Sequence#660591, exper_set#660592, exper#660593]
               :                             :                                         +- Filter Sequence#660591 RLIKE ^[KR-].[^.]+(?:[KR]..|..-)$
               :                             :                                            +- SubqueryAlias `raw_spectral_dta`
               :                             :                                               +- Relation[locus#660577,sequence_count#660578,spectrum_count#660579,sequence_coverage#660580,length#660581,molwt#660582,pi#660583,validation_status#660584,nsaf#660585,empai#660586,descriptive_name#660587,hredundancy#660588,lredundancy#660589,mredundancy#660590,Sequence#660591,exper_set#660592,exper#660593] parquet
               :                             +- SubqueryAlias `group_names_separated`
               :                                +- Relation[Group ##660714,Cimage link#660715,group_description#660716,cell_type#660717] csv
               +- SubqueryAlias `all_rows_bigger_than_four`
                  +- Aggregate [ipi#660546, description#660547, symbol#660548, residue#661752], [max(R#660811) AS max_R#661746, ipi#660546 AS ipi#661747, description#660547 AS description#661748, symbol#660548 AS symbol#661749, residue#661752]
                     +- SubqueryAlias `group_averages`
                        +- Filter (num_datasets#660810L > cast(1 as bigint))
                           +- Aggregate [ipi#660546, description#660547, symbol#660548, residue#661752, exper_set#660559, group_description#660716, total_residues_detected#660809L], [ipi#660546, description#660547, symbol#660548, residue#661752, group_description#660716, total_residues_detected#660809L, count(R#660758) AS num_datasets#660810L, CASE WHEN (stddev_samp(R#660758) < (cast(0.6 as double) * avg(R#660758))) THEN avg(R#660758) ELSE CASE WHEN (min(R#660758) < cast(4 as double)) THEN min(R#660758) ELSE avg(R#660758) END END AS R#660811]
                              +- Project [ipi#660546, description#660547, symbol#660548, exper_set#660559, exper#660560, residue#661752, group_description#660716, R#660758, total_residues_detected#660809L]
                                 +- Join Inner, (((ipi#660546 = ipi#660814) && (description#660547 = description#660815)) && (symbol#660548 = symbol#660816))
                                    :- SubqueryAlias `table_by_residue`
                                    :  +- Aggregate [ipi#660546, description#660547, symbol#660548, residue#661752, exper_set#660559, exper#660560, group_description#660716], [ipi#660546, description#660547, symbol#660548, exper_set#660559, exper#660560, residue#661752, group_description#660716, CASE WHEN (stddev_samp(R#661751) < (cast(0.6 as double) * avg(R#661751))) THEN avg(R#661751) ELSE CASE WHEN (min(R#661751) < cast(4 as double)) THEN min(R#661751) ELSE avg(R#661751) END END AS R#660758]
                                    :     +- Join Inner, (exper#660560 = Cimage link#660715)
                                    :        :- SubqueryAlias `table_by_peptide`
                                    :        :  +- Project [ipi#660546, symbol#660548, description#660547, sequence#660549, R#661751, exper#660560, exper_set#660559, residue#661752]
                                    :        :     +- Sort [ipi#660546 ASC NULLS FIRST], true
                                    :        :        +- Aggregate [exper#660560, ipi#660546, ((instr(protein_sequence#660699, regexp_replace(sequence#660549, [.*-], )) + instr(sequence#660549, *)) - 3), symbol#660548, exper_set#660559, sp#660696, sequence#660549, charge#660551, description#660547], [ipi#660546, symbol#660548, description#660547, sequence#660549, avg(cast(IR#660553 as double)) AS R#661751, exper#660560, exper_set#660559, ((instr(protein_sequence#660699, regexp_replace(sequence#660549, [.*-], )) + instr(sequence#660549, *)) - 3) AS residue#661752]
                                    :        :           +- Filter (((exper_set#660559 = Metabolites) && NOT sp#660696 LIKE Reverse%) && (sequence#660549 LIKE %C*% && (cast(R2#660556 as double) > cast(0.8 as double))))
                                    :        :              +- Join Inner, (ipi#660546 = ipi#660697)
                                    :        :                 :- SubqueryAlias `uniprot_sequences`
                                    :        :                 :  +- Relation[sp#660696,ipi#660697,origin_protein_description#660698,protein_sequence#660699] csv
                                    :        :                 +- SubqueryAlias `joined_spectral`
                                    :        :                    +- Project [ipi#660546, description#660547, symbol#660548, sequence#660549, mass#660550, charge#660551, segment#660552, IR#660553, INT#660554, NP#660555, R2#660556, entry#660557, exper#660560, exper_set#660559]
                                    :        :                       +- Filter (NOT ipi#660546 LIKE %Reverse% && ((spectral_counts#660646L > cast(1 as bigint)) || (cast(IR#660553 as int) < 20)))
                                    :        :                          +- Join Inner, (((sequence#660549 = sequence#660591) && (exper_set#660559 = exper_set#660592)) && (exper#660560 = exper#660593))
                                    :        :                             :- SubqueryAlias `raw_output_dta`
                                    :        :                             :  +- Relation[index#660545,ipi#660546,description#660547,symbol#660548,sequence#660549,mass#660550,charge#660551,segment#660552,IR#660553,INT#660554,NP#660555,R2#660556,entry#660557,link#660558,exper_set#660559,exper#660560] parquet
                                    :        :                             +- SubqueryAlias `spectral_data`
                                    :        :                                +- Aggregate [exper_set#660592, exper#660593, Sequence#660591], [Sequence#660591, exper#660593, exper_set#660592, count(Sequence#660591) AS spectral_counts#660646L]
                                    :        :                                   +- SubqueryAlias `clean`
                                    :        :                                      +- Project [locus#660577, sequence_count#660578, spectrum_count#660579, sequence_coverage#660580, length#660581, molwt#660582, pi#660583, validation_status#660584, nsaf#660585, empai#660586, descriptive_name#660587, hredundancy#660588, lredundancy#660589, mredundancy#660590, Sequence#660591, exper_set#660592, exper#660593]
                                    :        :                                         +- Filter Sequence#660591 RLIKE ^[KR-].[^.]+(?:[KR]..|..-)$
                                    :        :                                            +- SubqueryAlias `raw_spectral_dta`
                                    :        :                                               +- Relation[locus#660577,sequence_count#660578,spectrum_count#660579,sequence_coverage#660580,length#660581,molwt#660582,pi#660583,validation_status#660584,nsaf#660585,empai#660586,descriptive_name#660587,hredundancy#660588,lredundancy#660589,mredundancy#660590,Sequence#660591,exper_set#660592,exper#660593] parquet
                                    :        +- SubqueryAlias `group_names_separated`
                                    :           +- Relation[Group ##660714,Cimage link#660715,group_description#660716,cell_type#660717] csv
                                    +- SubqueryAlias `occurances_table`
                                       +- Aggregate [ipi#660814, description#660815, symbol#660816], [count(distinct residue#661752) AS total_residues_detected#660809L, ipi#660814, description#660815, symbol#660816]
                                          +- SubqueryAlias `table_by_residue`
                                             +- Aggregate [ipi#660814, description#660815, symbol#660816, residue#661752, exper_set#660827, exper#660828, group_description#660716], [ipi#660814, description#660815, symbol#660816, exper_set#660827, exper#660828, residue#661752, group_description#660716, CASE WHEN (stddev_samp(R#661751) < (cast(0.6 as double) * avg(R#661751))) THEN avg(R#661751) ELSE CASE WHEN (min(R#661751) < cast(4 as double)) THEN min(R#661751) ELSE avg(R#661751) END END AS R#660758]
                                                +- Join Inner, (exper#660828 = Cimage link#660715)
                                                   :- SubqueryAlias `table_by_peptide`
                                                   :  +- !Project [ipi#660814, symbol#660816, description#660815, sequence#660817, R#661751, exper#660828, exper_set#660827, residue#661752]
                                                   :     +- Sort [ipi#660814 ASC NULLS FIRST], true
                                                   :        +- Aggregate [exper#660828, ipi#660814, ((instr(protein_sequence#660699, regexp_replace(sequence#660817, [.*-], )) + instr(sequence#660817, *)) - 3), symbol#660816, exper_set#660827, sp#660696, sequence#660817, charge#660819, description#660815], [ipi#660814, symbol#660816, description#660815, sequence#660817, avg(cast(IR#660821 as double)) AS R#660730, exper#660828, exper_set#660827, ((instr(protein_sequence#660699, regexp_replace(sequence#660817, [.*-], )) + instr(sequence#660817, *)) - 3) AS residue#660731]
                                                   :           +- Filter (((exper_set#660827 = Metabolites) && NOT sp#660696 LIKE Reverse%) && (sequence#660817 LIKE %C*% && (cast(R2#660824 as double) > cast(0.8 as double))))
                                                   :              +- Join Inner, (ipi#660814 = ipi#660697)
                                                   :                 :- SubqueryAlias `uniprot_sequences`
                                                   :                 :  +- Relation[sp#660696,ipi#660697,origin_protein_description#660698,protein_sequence#660699] csv
                                                   :                 +- SubqueryAlias `joined_spectral`
                                                   :                    +- Project [ipi#660814, description#660815, symbol#660816, sequence#660817, mass#660818, charge#660819, segment#660820, IR#660821, INT#660822, NP#660823, R2#660824, entry#660825, exper#660828, exper_set#660827]
                                                   :                       +- Filter (NOT ipi#660814 LIKE %Reverse% && ((spectral_counts#660646L > cast(1 as bigint)) || (cast(IR#660821 as int) < 20)))
                                                   :                          +- Join Inner, (((sequence#660817 = sequence#660591) && (exper_set#660827 = exper_set#660592)) && (exper#660828 = exper#660593))
                                                   :                             :- SubqueryAlias `raw_output_dta`
                                                   :                             :  +- Relation[index#660813,ipi#660814,description#660815,symbol#660816,sequence#660817,mass#660818,charge#660819,segment#660820,IR#660821,INT#660822,NP#660823,R2#660824,entry#660825,link#660826,exper_set#660827,exper#660828] parquet
                                                   :                             +- SubqueryAlias `spectral_data`
                                                   :                                +- Aggregate [exper_set#660592, exper#660593, Sequence#660591], [Sequence#660591, exper#660593, exper_set#660592, count(Sequence#660591) AS spectral_counts#660646L]
                                                   :                                   +- SubqueryAlias `clean`
                                                   :                                      +- Project [locus#660577, sequence_count#660578, spectrum_count#660579, sequence_coverage#660580, length#660581, molwt#660582, pi#660583, validation_status#660584, nsaf#660585, empai#660586, descriptive_name#660587, hredundancy#660588, lredundancy#660589, mredundancy#660590, Sequence#660591, exper_set#660592, exper#660593]
                                                   :                                         +- Filter Sequence#660591 RLIKE ^[KR-].[^.]+(?:[KR]..|..-)$
                                                   :                                            +- SubqueryAlias `raw_spectral_dta`
                                                   :                                               +- Relation[locus#660577,sequence_count#660578,spectrum_count#660579,sequence_coverage#660580,length#660581,molwt#660582,pi#660583,validation_status#660584,nsaf#660585,empai#660586,descriptive_name#660587,hredundancy#660588,lredundancy#660589,mredundancy#660590,Sequence#660591,exper_set#660592,exper#660593] parquet
                                                   +- SubqueryAlias `group_names_separated`
                                                      +- Relation[Group ##660714,Cimage link#660715,group_description#660716,cell_type#660717] csv
      
      	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:42)
      	at org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:95)
      	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:326)
      	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:85)
      	at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:127)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at scala.collection.immutable.List.foreach(List.scala:392)
      	at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at scala.collection.immutable.List.foreach(List.scala:392)
      	at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at scala.collection.immutable.List.foreach(List.scala:392)
      	at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at scala.collection.immutable.List.foreach(List.scala:392)
      	at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at scala.collection.immutable.List.foreach(List.scala:392)
      	at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at scala.collection.immutable.List.foreach(List.scala:392)
      	at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at scala.collection.immutable.List.foreach(List.scala:392)
      	at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at scala.collection.immutable.List.foreach(List.scala:392)
      	at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at scala.collection.immutable.List.foreach(List.scala:392)
      	at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at scala.collection.immutable.List.foreach(List.scala:392)
      	at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at scala.collection.immutable.List.foreach(List.scala:392)
      	at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at scala.collection.immutable.List.foreach(List.scala:392)
      	at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at scala.collection.immutable.List.foreach(List.scala:392)
      	at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at scala.collection.immutable.List.foreach(List.scala:392)
      	at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at scala.collection.immutable.List.foreach(List.scala:392)
      	at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at scala.collection.immutable.List.foreach(List.scala:392)
      	at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126)
      	at scala.collection.immutable.List.foreach(List.scala:392)
      	at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126)
      	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:85)
      	at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:95)
      	at org.apache.spark.sql.catalyst.analysis.Analyzer$$anonfun$executeAndCheck$1.apply(Analyzer.scala:108)
      	at org.apache.spark.sql.catalyst.analysis.Analyzer$$anonfun$executeAndCheck$1.apply(Analyzer.scala:105)
      	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:201)
      	at org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:105)
      	at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:57)
      	at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:55)
      	at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:47)
      	at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:78)
      	at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642)
      	at sun.reflect.GeneratedMethodAccessor90.invoke(Unknown Source)
      	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
      	at java.lang.reflect.Method.invoke(Method.java:498)
      	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
      	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
      	at py4j.Gateway.invoke(Gateway.java:282)
      	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
      	at py4j.commands.CallCommand.execute(CallCommand.java:79)
      	at py4j.GatewayConnection.run(GatewayConnection.java:238)
      	at java.lang.Thread.run(Thread.java:748)
      
      
      During handling of the above exception, another exception occurred:
      
      AnalysisException                         Traceback (most recent call last)
      <ipython-input-295-d3f078949c8c> in <module>
            9 ) AS all_rows_bigger_than_four
           10 WHERE all_rows_bigger_than_four.max_R >= 4.0
      ---> 11 """)
           12 filtered_group_averages.registerTempTable("filtered_group_averages")
           13 sql.sql("""SELECT * FROM filtered_group_averages
      
      /usr/local/spark/python/pyspark/sql/context.py in sql(self, sqlQuery)
          356         [Row(f1=1, f2=u'row1'), Row(f1=2, f2=u'row2'), Row(f1=3, f2=u'row3')]
          357         """
      --> 358         return self.sparkSession.sql(sqlQuery)
          359 
          360     @since(1.0)
      
      /usr/local/spark/python/pyspark/sql/session.py in sql(self, sqlQuery)
          765         [Row(f1=1, f2=u'row1'), Row(f1=2, f2=u'row2'), Row(f1=3, f2=u'row3')]
          766         """
      --> 767         return DataFrame(self._jsparkSession.sql(sqlQuery), self._wrapped)
          768 
          769     @since(2.0)
      
      /usr/local/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py in __call__(self, *args)
         1255         answer = self.gateway_client.send_command(command)
         1256         return_value = get_return_value(
      -> 1257             answer, self.gateway_client, self.target_id, self.name)
         1258 
         1259         for temp_arg in temp_args:
      
      /usr/local/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
           67                                              e.java_exception.getStackTrace()))
           68             if s.startswith('org.apache.spark.sql.AnalysisException: '):
      ---> 69                 raise AnalysisException(s.split(': ', 1)[1], stackTrace)
           70             if s.startswith('org.apache.spark.sql.catalyst.analysis'):
           71                 raise AnalysisException(s.split(': ', 1)[1], stackTrace)
      
      AnalysisException: 'Resolved attribute(s) R#661751,residue#661752 missing from ipi#660814,residue#660731,exper_set#660827,R#660730,description#660815,sequence#660817,exper#660828,symbol#660816 in operator !Project [ipi#660814, symbol#660816, description#660815, sequence#660817, R#661751, exper#660828, exper_set#660827, residue#661752]. Attribute(s) with the same name appear in the operation: R,residue. Please check if the right attribute(s) are used.;;\nProject [ipi#660546, description#660547, symbol#660548, residue#660731, group_description#660716, total_residues_detected#660809L, num_datasets#660810L, R#660811]\n+- Filter (max_R#661746 >= cast(4.0 as double))\n   +- Project [ipi#660546, description#660547, symbol#660548, residue#660731, group_description#660716, total_residues_detected#660809L, num_datasets#660810L, R#660811, max_R#661746]\n      +- Join Inner, ((((ipi#660546 = ipi#661747) && (description#660547 = description#661748)) && (symbol#660548 = symbol#661749)) && (residue#660731 = residue#661752))\n         :- SubqueryAlias `group_averages`\n         :  +- Filter (num_datasets#660810L > cast(1 as bigint))\n         :     +- Aggregate [ipi#660546, description#660547, symbol#660548, residue#660731, exper_set#660559, group_description#660716, total_residues_detected#660809L], [ipi#660546, description#660547, symbol#660548, residue#660731, group_description#660716, total_residues_detected#660809L, count(R#660758) AS num_datasets#660810L, CASE WHEN (stddev_samp(R#660758) < (cast(0.6 as double) * avg(R#660758))) THEN avg(R#660758) ELSE CASE WHEN (min(R#660758) < cast(4 as double)) THEN min(R#660758) ELSE avg(R#660758) END END AS R#660811]\n         :        +- Project [ipi#660546, description#660547, symbol#660548, exper_set#660559, exper#660560, residue#660731, group_description#660716, R#660758, total_residues_detected#660809L]\n         :           +- Join Inner, (((ipi#660546 = ipi#660814) && (description#660547 = description#660815)) && (symbol#660548 = symbol#660816))\n         :              :- SubqueryAlias `table_by_residue`\n         :              :  +- Aggregate [ipi#660546, description#660547, symbol#660548, residue#660731, exper_set#660559, exper#660560, group_description#660716], [ipi#660546, description#660547, symbol#660548, exper_set#660559, exper#660560, residue#660731, group_description#660716, CASE WHEN (stddev_samp(R#660730) < (cast(0.6 as double) * avg(R#660730))) THEN avg(R#660730) ELSE CASE WHEN (min(R#660730) < cast(4 as double)) THEN min(R#660730) ELSE avg(R#660730) END END AS R#660758]\n         :              :     +- Join Inner, (exper#660560 = Cimage link#660715)\n         :              :        :- SubqueryAlias `table_by_peptide`\n         :              :        :  +- Project [ipi#660546, symbol#660548, description#660547, sequence#660549, R#660730, exper#660560, exper_set#660559, residue#660731]\n         :              :        :     +- Sort [ipi#660546 ASC NULLS FIRST], true\n         :              :        :        +- Aggregate [exper#660560, ipi#660546, ((instr(protein_sequence#660699, regexp_replace(sequence#660549, [.*-], )) + instr(sequence#660549, *)) - 3), symbol#660548, exper_set#660559, sp#660696, sequence#660549, charge#660551, description#660547], [ipi#660546, symbol#660548, description#660547, sequence#660549, avg(cast(IR#660553 as double)) AS R#660730, exper#660560, exper_set#660559, ((instr(protein_sequence#660699, regexp_replace(sequence#660549, [.*-], )) + instr(sequence#660549, *)) - 3) AS residue#660731]\n         :              :        :           +- Filter (((exper_set#660559 = Metabolites) && NOT sp#660696 LIKE Reverse%) && (sequence#660549 LIKE %C*% && (cast(R2#660556 as double) > cast(0.8 as double))))\n         :              :        :              +- Join Inner, (ipi#660546 = ipi#660697)\n         :              :        :                 :- SubqueryAlias `uniprot_sequences`\n         :              :        :                 :  +- Relation[sp#660696,ipi#660697,origin_protein_description#660698,protein_sequence#660699] csv\n         :              :        :                 +- SubqueryAlias `joined_spectral`\n         :              :        :                    +- Project [ipi#660546, description#660547, symbol#660548, sequence#660549, mass#660550, charge#660551, segment#660552, IR#660553, INT#660554, NP#660555, R2#660556, entry#660557, exper#660560, exper_set#660559]\n         :              :        :                       +- Filter (NOT ipi#660546 LIKE %Reverse% && ((spectral_counts#660646L > cast(1 as bigint)) || (cast(IR#660553 as int) < 20)))\n         :              :        :                          +- Join Inner, (((sequence#660549 = sequence#660591) && (exper_set#660559 = exper_set#660592)) && (exper#660560 = exper#660593))\n         :              :        :                             :- SubqueryAlias `raw_output_dta`\n         :              :        :                             :  +- Relation[index#660545,ipi#660546,description#660547,symbol#660548,sequence#660549,mass#660550,charge#660551,segment#660552,IR#660553,INT#660554,NP#660555,R2#660556,entry#660557,link#660558,exper_set#660559,exper#660560] parquet\n         :              :        :                             +- SubqueryAlias `spectral_data`\n         :              :        :                                +- Aggregate [exper_set#660592, exper#660593, Sequence#660591], [Sequence#660591, exper#660593, exper_set#660592, count(Sequence#660591) AS spectral_counts#660646L]\n         :              :        :                                   +- SubqueryAlias `clean`\n         :              :        :                                      +- Project [locus#660577, sequence_count#660578, spectrum_count#660579, sequence_coverage#660580, length#660581, molwt#660582, pi#660583, validation_status#660584, nsaf#660585, empai#660586, descriptive_name#660587, hredundancy#660588, lredundancy#660589, mredundancy#660590, Sequence#660591, exper_set#660592, exper#660593]\n         :              :        :                                         +- Filter Sequence#660591 RLIKE ^[KR-].[^.]+(?:[KR]..|..-)$\n         :              :        :                                            +- SubqueryAlias `raw_spectral_dta`\n         :              :        :                                               +- Relation[locus#660577,sequence_count#660578,spectrum_count#660579,sequence_coverage#660580,length#660581,molwt#660582,pi#660583,validation_status#660584,nsaf#660585,empai#660586,descriptive_name#660587,hredundancy#660588,lredundancy#660589,mredundancy#660590,Sequence#660591,exper_set#660592,exper#660593] parquet\n         :              :        +- SubqueryAlias `group_names_separated`\n         :              :           +- Relation[Group ##660714,Cimage link#660715,group_description#660716,cell_type#660717] csv\n         :              +- SubqueryAlias `occurances_table`\n         :                 +- Aggregate [ipi#660814, description#660815, symbol#660816], [count(distinct residue#660731) AS total_residues_detected#660809L, ipi#660814, description#660815, symbol#660816]\n         :                    +- SubqueryAlias `table_by_residue`\n         :                       +- Aggregate [ipi#660814, description#660815, symbol#660816, residue#660731, exper_set#660827, exper#660828, group_description#660716], [ipi#660814, description#660815, symbol#660816, exper_set#660827, exper#660828, residue#660731, group_description#660716, CASE WHEN (stddev_samp(R#660730) < (cast(0.6 as double) * avg(R#660730))) THEN avg(R#660730) ELSE CASE WHEN (min(R#660730) < cast(4 as double)) THEN min(R#660730) ELSE avg(R#660730) END END AS R#660758]\n         :                          +- Join Inner, (exper#660828 = Cimage link#660715)\n         :                             :- SubqueryAlias `table_by_peptide`\n         :                             :  +- Project [ipi#660814, symbol#660816, description#660815, sequence#660817, R#660730, exper#660828, exper_set#660827, residue#660731]\n         :                             :     +- Sort [ipi#660814 ASC NULLS FIRST], true\n         :                             :        +- Aggregate [exper#660828, ipi#660814, ((instr(protein_sequence#660699, regexp_replace(sequence#660817, [.*-], )) + instr(sequence#660817, *)) - 3), symbol#660816, exper_set#660827, sp#660696, sequence#660817, charge#660819, description#660815], [ipi#660814, symbol#660816, description#660815, sequence#660817, avg(cast(IR#660821 as double)) AS R#660730, exper#660828, exper_set#660827, ((instr(protein_sequence#660699, regexp_replace(sequence#660817, [.*-], )) + instr(sequence#660817, *)) - 3) AS residue#660731]\n         :                             :           +- Filter (((exper_set#660827 = Metabolites) && NOT sp#660696 LIKE Reverse%) && (sequence#660817 LIKE %C*% && (cast(R2#660824 as double) > cast(0.8 as double))))\n         :                             :              +- Join Inner, (ipi#660814 = ipi#660697)\n         :                             :                 :- SubqueryAlias `uniprot_sequences`\n         :                             :                 :  +- Relation[sp#660696,ipi#660697,origin_protein_description#660698,protein_sequence#660699] csv\n         :                             :                 +- SubqueryAlias `joined_spectral`\n         :                             :                    +- Project [ipi#660814, description#660815, symbol#660816, sequence#660817, mass#660818, charge#660819, segment#660820, IR#660821, INT#660822, NP#660823, R2#660824, entry#660825, exper#660828, exper_set#660827]\n         :                             :                       +- Filter (NOT ipi#660814 LIKE %Reverse% && ((spectral_counts#660646L > cast(1 as bigint)) || (cast(IR#660821 as int) < 20)))\n         :                             :                          +- Join Inner, (((sequence#660817 = sequence#660591) && (exper_set#660827 = exper_set#660592)) && (exper#660828 = exper#660593))\n         :                             :                             :- SubqueryAlias `raw_output_dta`\n         :                             :                             :  +- Relation[index#660813,ipi#660814,description#660815,symbol#660816,sequence#660817,mass#660818,charge#660819,segment#660820,IR#660821,INT#660822,NP#660823,R2#660824,entry#660825,link#660826,exper_set#660827,exper#660828] parquet\n         :                             :                             +- SubqueryAlias `spectral_data`\n         :                             :                                +- Aggregate [exper_set#660592, exper#660593, Sequence#660591], [Sequence#660591, exper#660593, exper_set#660592, count(Sequence#660591) AS spectral_counts#660646L]\n         :                             :                                   +- SubqueryAlias `clean`\n         :                             :                                      +- Project [locus#660577, sequence_count#660578, spectrum_count#660579, sequence_coverage#660580, length#660581, molwt#660582, pi#660583, validation_status#660584, nsaf#660585, empai#660586, descriptive_name#660587, hredundancy#660588, lredundancy#660589, mredundancy#660590, Sequence#660591, exper_set#660592, exper#660593]\n         :                             :                                         +- Filter Sequence#660591 RLIKE ^[KR-].[^.]+(?:[KR]..|..-)$\n         :                             :                                            +- SubqueryAlias `raw_spectral_dta`\n         :                             :                                               +- Relation[locus#660577,sequence_count#660578,spectrum_count#660579,sequence_coverage#660580,length#660581,molwt#660582,pi#660583,validation_status#660584,nsaf#660585,empai#660586,descriptive_name#660587,hredundancy#660588,lredundancy#660589,mredundancy#660590,Sequence#660591,exper_set#660592,exper#660593] parquet\n         :                             +- SubqueryAlias `group_names_separated`\n         :                                +- Relation[Group ##660714,Cimage link#660715,group_description#660716,cell_type#660717] csv\n         +- SubqueryAlias `all_rows_bigger_than_four`\n            +- Aggregate [ipi#660546, description#660547, symbol#660548, residue#661752], [max(R#660811) AS max_R#661746, ipi#660546 AS ipi#661747, description#660547 AS description#661748, symbol#660548 AS symbol#661749, residue#661752]\n               +- SubqueryAlias `group_averages`\n                  +- Filter (num_datasets#660810L > cast(1 as bigint))\n                     +- Aggregate [ipi#660546, description#660547, symbol#660548, residue#661752, exper_set#660559, group_description#660716, total_residues_detected#660809L], [ipi#660546, description#660547, symbol#660548, residue#661752, group_description#660716, total_residues_detected#660809L, count(R#660758) AS num_datasets#660810L, CASE WHEN (stddev_samp(R#660758) < (cast(0.6 as double) * avg(R#660758))) THEN avg(R#660758) ELSE CASE WHEN (min(R#660758) < cast(4 as double)) THEN min(R#660758) ELSE avg(R#660758) END END AS R#660811]\n                        +- Project [ipi#660546, description#660547, symbol#660548, exper_set#660559, exper#660560, residue#661752, group_description#660716, R#660758, total_residues_detected#660809L]\n                           +- Join Inner, (((ipi#660546 = ipi#660814) && (description#660547 = description#660815)) && (symbol#660548 = symbol#660816))\n                              :- SubqueryAlias `table_by_residue`\n                              :  +- Aggregate [ipi#660546, description#660547, symbol#660548, residue#661752, exper_set#660559, exper#660560, group_description#660716], [ipi#660546, description#660547, symbol#660548, exper_set#660559, exper#660560, residue#661752, group_description#660716, CASE WHEN (stddev_samp(R#661751) < (cast(0.6 as double) * avg(R#661751))) THEN avg(R#661751) ELSE CASE WHEN (min(R#661751) < cast(4 as double)) THEN min(R#661751) ELSE avg(R#661751) END END AS R#660758]\n                              :     +- Join Inner, (exper#660560 = Cimage link#660715)\n                              :        :- SubqueryAlias `table_by_peptide`\n                              :        :  +- Project [ipi#660546, symbol#660548, description#660547, sequence#660549, R#661751, exper#660560, exper_set#660559, residue#661752]\n                              :        :     +- Sort [ipi#660546 ASC NULLS FIRST], true\n                              :        :        +- Aggregate [exper#660560, ipi#660546, ((instr(protein_sequence#660699, regexp_replace(sequence#660549, [.*-], )) + instr(sequence#660549, *)) - 3), symbol#660548, exper_set#660559, sp#660696, sequence#660549, charge#660551, description#660547], [ipi#660546, symbol#660548, description#660547, sequence#660549, avg(cast(IR#660553 as double)) AS R#661751, exper#660560, exper_set#660559, ((instr(protein_sequence#660699, regexp_replace(sequence#660549, [.*-], )) + instr(sequence#660549, *)) - 3) AS residue#661752]\n                              :        :           +- Filter (((exper_set#660559 = Metabolites) && NOT sp#660696 LIKE Reverse%) && (sequence#660549 LIKE %C*% && (cast(R2#660556 as double) > cast(0.8 as double))))\n                              :        :              +- Join Inner, (ipi#660546 = ipi#660697)\n                              :        :                 :- SubqueryAlias `uniprot_sequences`\n                              :        :                 :  +- Relation[sp#660696,ipi#660697,origin_protein_description#660698,protein_sequence#660699] csv\n                              :        :                 +- SubqueryAlias `joined_spectral`\n                              :        :                    +- Project [ipi#660546, description#660547, symbol#660548, sequence#660549, mass#660550, charge#660551, segment#660552, IR#660553, INT#660554, NP#660555, R2#660556, entry#660557, exper#660560, exper_set#660559]\n                              :        :                       +- Filter (NOT ipi#660546 LIKE %Reverse% && ((spectral_counts#660646L > cast(1 as bigint)) || (cast(IR#660553 as int) < 20)))\n                              :        :                          +- Join Inner, (((sequence#660549 = sequence#660591) && (exper_set#660559 = exper_set#660592)) && (exper#660560 = exper#660593))\n                              :        :                             :- SubqueryAlias `raw_output_dta`\n                              :        :                             :  +- Relation[index#660545,ipi#660546,description#660547,symbol#660548,sequence#660549,mass#660550,charge#660551,segment#660552,IR#660553,INT#660554,NP#660555,R2#660556,entry#660557,link#660558,exper_set#660559,exper#660560] parquet\n                              :        :                             +- SubqueryAlias `spectral_data`\n                              :        :                                +- Aggregate [exper_set#660592, exper#660593, Sequence#660591], [Sequence#660591, exper#660593, exper_set#660592, count(Sequence#660591) AS spectral_counts#660646L]\n                              :        :                                   +- SubqueryAlias `clean`\n                              :        :                                      +- Project [locus#660577, sequence_count#660578, spectrum_count#660579, sequence_coverage#660580, length#660581, molwt#660582, pi#660583, validation_status#660584, nsaf#660585, empai#660586, descriptive_name#660587, hredundancy#660588, lredundancy#660589, mredundancy#660590, Sequence#660591, exper_set#660592, exper#660593]\n                              :        :                                         +- Filter Sequence#660591 RLIKE ^[KR-].[^.]+(?:[KR]..|..-)$\n                              :        :                                            +- SubqueryAlias `raw_spectral_dta`\n                              :        :                                               +- Relation[locus#660577,sequence_count#660578,spectrum_count#660579,sequence_coverage#660580,length#660581,molwt#660582,pi#660583,validation_status#660584,nsaf#660585,empai#660586,descriptive_name#660587,hredundancy#660588,lredundancy#660589,mredundancy#660590,Sequence#660591,exper_set#660592,exper#660593] parquet\n                              :        +- SubqueryAlias `group_names_separated`\n                              :           +- Relation[Group ##660714,Cimage link#660715,group_description#660716,cell_type#660717] csv\n                              +- SubqueryAlias `occurances_table`\n                                 +- Aggregate [ipi#660814, description#660815, symbol#660816], [count(distinct residue#661752) AS total_residues_detected#660809L, ipi#660814, description#660815, symbol#660816]\n                                    +- SubqueryAlias `table_by_residue`\n                                       +- Aggregate [ipi#660814, description#660815, symbol#660816, residue#661752, exper_set#660827, exper#660828, group_description#660716], [ipi#660814, description#660815, symbol#660816, exper_set#660827, exper#660828, residue#661752, group_description#660716, CASE WHEN (stddev_samp(R#661751) < (cast(0.6 as double) * avg(R#661751))) THEN avg(R#661751) ELSE CASE WHEN (min(R#661751) < cast(4 as double)) THEN min(R#661751) ELSE avg(R#661751) END END AS R#660758]\n                                          +- Join Inner, (exper#660828 = Cimage link#660715)\n                                             :- SubqueryAlias `table_by_peptide`\n                                             :  +- !Project [ipi#660814, symbol#660816, description#660815, sequence#660817, R#661751, exper#660828, exper_set#660827, residue#661752]\n                                             :     +- Sort [ipi#660814 ASC NULLS FIRST], true\n                                             :        +- Aggregate [exper#660828, ipi#660814, ((instr(protein_sequence#660699, regexp_replace(sequence#660817, [.*-], )) + instr(sequence#660817, *)) - 3), symbol#660816, exper_set#660827, sp#660696, sequence#660817, charge#660819, description#660815], [ipi#660814, symbol#660816, description#660815, sequence#660817, avg(cast(IR#660821 as double)) AS R#660730, exper#660828, exper_set#660827, ((instr(protein_sequence#660699, regexp_replace(sequence#660817, [.*-], )) + instr(sequence#660817, *)) - 3) AS residue#660731]\n                                             :           +- Filter (((exper_set#660827 = Metabolites) && NOT sp#660696 LIKE Reverse%) && (sequence#660817 LIKE %C*% && (cast(R2#660824 as double) > cast(0.8 as double))))\n                                             :              +- Join Inner, (ipi#660814 = ipi#660697)\n                                             :                 :- SubqueryAlias `uniprot_sequences`\n                                             :                 :  +- Relation[sp#660696,ipi#660697,origin_protein_description#660698,protein_sequence#660699] csv\n                                             :                 +- SubqueryAlias `joined_spectral`\n                                             :                    +- Project [ipi#660814, description#660815, symbol#660816, sequence#660817, mass#660818, charge#660819, segment#660820, IR#660821, INT#660822, NP#660823, R2#660824, entry#660825, exper#660828, exper_set#660827]\n                                             :                       +- Filter (NOT ipi#660814 LIKE %Reverse% && ((spectral_counts#660646L > cast(1 as bigint)) || (cast(IR#660821 as int) < 20)))\n                                             :                          +- Join Inner, (((sequence#660817 = sequence#660591) && (exper_set#660827 = exper_set#660592)) && (exper#660828 = exper#660593))\n                                             :                             :- SubqueryAlias `raw_output_dta`\n                                             :                             :  +- Relation[index#660813,ipi#660814,description#660815,symbol#660816,sequence#660817,mass#660818,charge#660819,segment#660820,IR#660821,INT#660822,NP#660823,R2#660824,entry#660825,link#660826,exper_set#660827,exper#660828] parquet\n                                             :                             +- SubqueryAlias `spectral_data`\n                                             :                                +- Aggregate [exper_set#660592, exper#660593, Sequence#660591], [Sequence#660591, exper#660593, exper_set#660592, count(Sequence#660591) AS spectral_counts#660646L]\n                                             :                                   +- SubqueryAlias `clean`\n                                             :                                      +- Project [locus#660577, sequence_count#660578, spectrum_count#660579, sequence_coverage#660580, length#660581, molwt#660582, pi#660583, validation_status#660584, nsaf#660585, empai#660586, descriptive_name#660587, hredundancy#660588, lredundancy#660589, mredundancy#660590, Sequence#660591, exper_set#660592, exper#660593]\n                                             :                                         +- Filter Sequence#660591 RLIKE ^[KR-].[^.]+(?:[KR]..|..-)$\n                                             :                                            +- SubqueryAlias `raw_spectral_dta`\n                                             :                                               +- Relation[locus#660577,sequence_count#660578,spectrum_count#660579,sequence_coverage#660580,length#660581,molwt#660582,pi#660583,validation_status#660584,nsaf#660585,empai#660586,descriptive_name#660587,hredundancy#660588,lredundancy#660589,mredundancy#660590,Sequence#660591,exper_set#660592,exper#660593] parquet\n                                             +- SubqueryAlias `group_names_separated`\n                                                +- Relation[Group ##660714,Cimage link#660715,group_description#660716,cell_type#660717] csv\n'
      

      while,

       

      SELECT group_averages.*
      FROM group_averages
      NATURAL INNER JOIN (
          SELECT MAX(R) AS max_R, ipi AS ipi, description AS description, symbol AS symbol, residue AS residue
          FROM group_averages
          GROUP BY ipi, description, symbol, residue
      ) AS all_rows_bigger_than_four
      WHERE all_rows_bigger_than_four.max_R >= 4.0
      

      works

       

       

      Attachments

        Activity

          People

            Unassigned Unassigned
            krusty-senkane Kristine Senkane
            Votes:
            0 Vote for this issue
            Watchers:
            1 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: