I have tested kafka server with by creating pipeline (kafka to kafka) and works fine.
I have configured all the necessary options:
-streaming
-project
-region
-staging location
-network
-subnetwork
I use beam 2.50 and the pyarrow 13. I work with dataflow runner on GCP.
Full stack trace is this:
Traceback (most recent call last):
File "/tmp/ipykernel_981/2428808391.py", line 2, in <module>
pipeline = streaming_pipeline()
File "/tmp/ipykernel_981/1611686763.py", line 19, in streaming_pipeline
kafka_msg = (p | 'read kafka' >> ReadFromKafka(consumer_config={'bootstrap.servers':'34.77.114.15:9094',
File "/jupyter/.kernels/apache-beam-2.50.0/lib/python3.8/site-packages/apache_beam/pvalue.py", line 137, in __or__
return self.pipeline.apply(ptransform, self)
File "/jupyter/.kernels/apache-beam-2.50.0/lib/python3.8/site-packages/apache_beam/pipeline.py", line 655, in apply
return self.apply(
File "/jupyter/.kernels/apache-beam-2.50.0/lib/python3.8/site-packages/apache_beam/pipeline.py", line 666, in apply
return self.apply(transform, pvalueish)
File "/jupyter/.kernels/apache-beam-2.50.0/lib/python3.8/site-packages/apache_beam/pipeline.py", line 712, in apply
pvalueish_result = self.runner.apply(transform, pvalueish, self._options)
File "/jupyter/.kernels/apache-beam-2.50.0/lib/python3.8/site-packages/apache_beam/runners/dataflow/dataflow_runner.py", line 102, in apply
return super().apply(transform, input, options)
File "/jupyter/.kernels/apache-beam-2.50.0/lib/python3.8/site-packages/apache_beam/runners/runner.py", line 201, in apply
return self.apply_PTransform(transform, input, options)
File "/jupyter/.kernels/apache-beam-2.50.0/lib/python3.8/site-packages/apache_beam/runners/runner.py", line 205, in apply_PTransform
return transform.expand(input)
File "/jupyter/.kernels/apache-beam-2.50.0/lib/python3.8/site-packages/apache_beam/io/parquetio.py", line 537, in expand
return pcoll | ParDo(
File "/jupyter/.kernels/apache-beam-2.50.0/lib/python3.8/site-packages/apache_beam/pvalue.py", line 137, in __or__
return self.pipeline.apply(ptransform, self)
File "/jupyter/.kernels/apache-beam-2.50.0/lib/python3.8/site-packages/apache_beam/pipeline.py", line 712, in apply
pvalueish_result = self.runner.apply(transform, pvalueish, self._options)
File "/jupyter/.kernels/apache-beam-2.50.0/lib/python3.8/site-packages/apache_beam/runners/dataflow/dataflow_runner.py", line 102, in apply
return super().apply(transform, input, options)
File "/jupyter/.kernels/apache-beam-2.50.0/lib/python3.8/site-packages/apache_beam/runners/runner.py", line 201, in apply
return self.apply_PTransform(transform, input, options)
File "/jupyter/.kernels/apache-beam-2.50.0/lib/python3.8/site-packages/apache_beam/runners/runner.py", line 205, in apply_PTransform
return transform.expand(input)
File "/jupyter/.kernels/apache-beam-2.50.0/lib/python3.8/site-packages/apache_beam/io/iobase.py", line 1076, in expand
return pcoll | WriteImpl(self.sink)
File "/jupyter/.kernels/apache-beam-2.50.0/lib/python3.8/site-packages/apache_beam/pvalue.py", line 137, in __or__
return self.pipeline.apply(ptransform, self)
File "/jupyter/.kernels/apache-beam-2.50.0/lib/python3.8/site-packages/apache_beam/pipeline.py", line 712, in apply
pvalueish_result = self.runner.apply(transform, pvalueish, self._options)
File "/jupyter/.kernels/apache-beam-2.50.0/lib/python3.8/site-packages/apache_beam/runners/dataflow/dataflow_runner.py", line 102, in apply
return super().apply(transform, input, options)
File "/jupyter/.kernels/apache-beam-2.50.0/lib/python3.8/site-packages/apache_beam/runners/runner.py", line 201, in apply
return self.apply_PTransform(transform, input, options)
File "/jupyter/.kernels/apache-beam-2.50.0/lib/python3.8/site-packages/apache_beam/runners/runner.py", line 205, in apply_PTransform
return transform.expand(input)
File "/jupyter/.kernels/apache-beam-2.50.0/lib/python3.8/site-packages/apache_beam/io/iobase.py", line 1155, in expand
pcoll
File "/jupyter/.kernels/apache-beam-2.50.0/lib/python3.8/site-packages/apache_beam/pvalue.py", line 137, in __or__
return self.pipeline.apply(ptransform, self)
File "/jupyter/.kernels/apache-beam-2.50.0/lib/python3.8/site-packages/apache_beam/pipeline.py", line 712, in apply
pvalueish_result = self.runner.apply(transform, pvalueish, self._options)
File "/jupyter/.kernels/apache-beam-2.50.0/lib/python3.8/site-packages/apache_beam/runners/dataflow/dataflow_runner.py", line 102, in apply
return super().apply(transform, input, options)
File "/jupyter/.kernels/apache-beam-2.50.0/lib/python3.8/site-packages/apache_beam/runners/runner.py", line 201, in apply
return self.apply_PTransform(transform, input, options)
File "/jupyter/.kernels/apache-beam-2.50.0/lib/python3.8/site-packages/apache_beam/runners/runner.py", line 205, in apply_PTransform
return transform.expand(input)
File "/jupyter/.kernels/apache-beam-2.50.0/lib/python3.8/site-packages/apache_beam/transforms/core.py", line 3083, in expand
raise ValueError(
ValueError: GroupByKey cannot be applied to an unbounded PCollection with global windowing and a default trigger
Can you please run the code you suggested me on dataflow to verify that is working for you ?