Uploaded image for project: 'Beam'
  1. Beam
  2. BEAM-9386

_ReadRange in filebasedsource.py fails with IndexError

Details

    Description

      We are using Apache Beam in version 2.17.0 (Python SDK using Python 3.7) with the Dataflow runner running on the Google Cloud Platform.

      We are getting frequently a index out of range exception in the filebasedsource.py, i.e. in that line: https://github.com/apache/beam/blob/release-2.17.0/sdks/python/apache_beam/io/filebasedsource.py#L370

      The whole stack trace:

      Traceback (most recent call last):
        File "/usr/local/lib/python3.7/site-packages/dataflow_worker/batchworker.py", line 650, in do_work
          work_executor.execute()
        File "/usr/local/lib/python3.7/site-packages/dataflow_worker/executor.py", line 176, in execute
          op.start()
        File "dataflow_worker/shuffle_operations.py", line 50, in dataflow_worker.shuffle_operations.GroupedShuffleReadOperation.start
        File "dataflow_worker/shuffle_operations.py", line 51, in dataflow_worker.shuffle_operations.GroupedShuffleReadOperation.start
        File "dataflow_worker/shuffle_operations.py", line 66, in dataflow_worker.shuffle_operations.GroupedShuffleReadOperation.start
        File "dataflow_worker/shuffle_operations.py", line 67, in dataflow_worker.shuffle_operations.GroupedShuffleReadOperation.start
        File "dataflow_worker/shuffle_operations.py", line 71, in dataflow_worker.shuffle_operations.GroupedShuffleReadOperation.start
        File "apache_beam/runners/worker/operations.py", line 256, in apache_beam.runners.worker.operations.Operation.output
        File "apache_beam/runners/worker/operations.py", line 143, in apache_beam.runners.worker.operations.SingletonConsumerSet.receive
        File "dataflow_worker/shuffle_operations.py", line 234, in dataflow_worker.shuffle_operations.BatchGroupAlsoByWindowsOperation.process
        File "dataflow_worker/shuffle_operations.py", line 241, in dataflow_worker.shuffle_operations.BatchGroupAlsoByWindowsOperation.process
        File "apache_beam/runners/worker/operations.py", line 256, in apache_beam.runners.worker.operations.Operation.output
        File "apache_beam/runners/worker/operations.py", line 143, in apache_beam.runners.worker.operations.SingletonConsumerSet.receive
        File "apache_beam/runners/worker/operations.py", line 593, in apache_beam.runners.worker.operations.DoOperation.process
        File "apache_beam/runners/worker/operations.py", line 594, in apache_beam.runners.worker.operations.DoOperation.process
        File "apache_beam/runners/common.py", line 776, in apache_beam.runners.common.DoFnRunner.receive
        File "apache_beam/runners/common.py", line 782, in apache_beam.runners.common.DoFnRunner.process
        File "apache_beam/runners/common.py", line 834, in apache_beam.runners.common.DoFnRunner._reraise_augmented
        File "apache_beam/runners/common.py", line 780, in apache_beam.runners.common.DoFnRunner.process
        File "apache_beam/runners/common.py", line 440, in apache_beam.runners.common.SimpleInvoker.invoke_process
        File "apache_beam/runners/common.py", line 919, in apache_beam.runners.common._OutputProcessor.process_outputs
        File "apache_beam/runners/worker/operations.py", line 143, in apache_beam.runners.worker.operations.SingletonConsumerSet.receive
        File "apache_beam/runners/worker/operations.py", line 593, in apache_beam.runners.worker.operations.DoOperation.process
        File "apache_beam/runners/worker/operations.py", line 594, in apache_beam.runners.worker.operations.DoOperation.process
        File "apache_beam/runners/common.py", line 776, in apache_beam.runners.common.DoFnRunner.receive
        File "apache_beam/runners/common.py", line 782, in apache_beam.runners.common.DoFnRunner.process
        File "apache_beam/runners/common.py", line 834, in apache_beam.runners.common.DoFnRunner._reraise_augmented
        File "apache_beam/runners/common.py", line 780, in apache_beam.runners.common.DoFnRunner.process
        File "apache_beam/runners/common.py", line 440, in apache_beam.runners.common.SimpleInvoker.invoke_process
        File "apache_beam/runners/common.py", line 919, in apache_beam.runners.common._OutputProcessor.process_outputs
        File "apache_beam/runners/worker/operations.py", line 143, in apache_beam.runners.worker.operations.SingletonConsumerSet.receive
        File "apache_beam/runners/worker/operations.py", line 593, in apache_beam.runners.worker.operations.DoOperation.process
        File "apache_beam/runners/worker/operations.py", line 594, in apache_beam.runners.worker.operations.DoOperation.process
        File "apache_beam/runners/common.py", line 776, in apache_beam.runners.common.DoFnRunner.receive
        File "apache_beam/runners/common.py", line 782, in apache_beam.runners.common.DoFnRunner.process
        File "apache_beam/runners/common.py", line 849, in apache_beam.runners.common.DoFnRunner._reraise_augmented
        File "/usr/local/lib/python3.7/site-packages/future/utils/__init__.py", line 421, in raise_with_traceback
          raise exc.with_traceback(traceback)
        File "apache_beam/runners/common.py", line 780, in apache_beam.runners.common.DoFnRunner.process
        File "apache_beam/runners/common.py", line 440, in apache_beam.runners.common.SimpleInvoker.invoke_process
        File "apache_beam/runners/common.py", line 895, in apache_beam.runners.common._OutputProcessor.process_outputs
        File "/usr/local/lib/python3.7/site-packages/apache_beam/io/filebasedsource.py", line 370, in process
          source = list(source.split(float('inf')))[0].source
      IndexError: list index out of range [while running 'example/new_data/read/ReadAllFiles/ReadRange']
      

      Attachments

        Activity

          People

            Unassigned Unassigned
            fabian.fdc Fabian Rosenthal
            Votes:
            0 Vote for this issue
            Watchers:
            3 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: