Uploaded image for project: 'IMPALA'
  1. IMPALA
  2. IMPALA-12898

Tidy up test matrix of test_scanner.py

    XMLWordPrintableJSON

Details

    • Test
    • Status: Resolved
    • Minor
    • Resolution: Fixed
    • None
    • Impala 4.4.0
    • Infrastructure
    • None

    Description

      Several tests in test_scanner.py seemingly declared with tests dimensions that is ignored by its tests.

      For example, TestScannersAllTableFormats

      class TestScannersAllTableFormats(ImpalaTestSuite):
        BATCH_SIZES = [0, 1, 16]
      
        @classmethod
        def get_workload(cls):
          return 'functional-query'
      
        @classmethod
        def add_test_dimensions(cls):
          super(TestScannersAllTableFormats, cls).add_test_dimensions()
          if cls.exploration_strategy() == 'core':
            # The purpose of this test is to get some base coverage of all the file formats.
            # Even in 'core', we'll test each format by using the pairwise strategy.
            cls.ImpalaTestMatrix.add_dimension(cls.create_table_info_dimension('pairwise'))
          cls.ImpalaTestMatrix.add_dimension(
              ImpalaTestDimension('batch_size', *TestScannersAllTableFormats.BATCH_SIZES))
          cls.ImpalaTestMatrix.add_dimension(
              ImpalaTestDimension('debug_action', *DEBUG_ACTION_DIMS))
          cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('mt_dop', *MT_DOP_VALUES))
      
        def test_scanners(self, vector):
          new_vector = deepcopy(vector)
          # Copy over test dimensions to the matching query options.
          new_vector.get_value('exec_option')['batch_size'] = vector.get_value('batch_size')
          new_vector.get_value('exec_option')['debug_action'] = vector.get_value('debug_action')
          new_vector.get_value('exec_option')['mt_dop'] = vector.get_value('mt_dop')
          self.run_test_case('QueryTest/scanners', new_vector)
      
        def test_many_nulls(self, vector):
          if vector.get_value('table_format').file_format == 'hbase':
            # manynulls table not loaded for HBase
            pytest.skip()
          # Copy over test dimensions to the matching query options.
          new_vector = deepcopy(vector)
          new_vector.get_value('exec_option')['batch_size'] = vector.get_value('batch_size')
          new_vector.get_value('exec_option')['debug_action'] = vector.get_value('debug_action')
          new_vector.get_value('exec_option')['mt_dop'] = vector.get_value('mt_dop')
          self.run_test_case('QueryTest/scanners-many-nulls', new_vector)
      
        def test_hdfs_scanner_profile(self, vector):
          if vector.get_value('table_format').file_format in ('kudu', 'hbase') or \
             vector.get_value('exec_option')['num_nodes'] != 0:
            pytest.skip()
          self.run_test_case('QueryTest/hdfs_scanner_profile', vector)
      
        def test_string_escaping(self, vector):
          """Test handling of string escape sequences."""
          if vector.get_value('table_format').file_format == 'rc':
            # IMPALA-7778: RCFile scanner incorrectly ignores escapes for now.
            self.run_test_case('QueryTest/string-escaping-rcfile-bug', vector)
          else:
            self.run_test_case('QueryTest/string-escaping', vector)
      

      test_scanners and test_many_nulls correctly copy exec_option values fromĀ  test vector. But test_hdfs_scanner_profile and test_string_escaping is not, and unnecessary run multiple times even though it does not permuting its exec_option. This and other test classes inside test_scanner.py can benefit from refactoring and dimension reduction.

      Attachments

        Issue Links

          Activity

            People

              rizaon Riza Suminto
              rizaon Riza Suminto
              Votes:
              0 Vote for this issue
              Watchers:
              2 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved: