-
Notifications
You must be signed in to change notification settings - Fork 53
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fence writer zombies (breaking change) #255
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,7 +19,6 @@ | |
package io.tabular.iceberg.connect.channel; | ||
|
||
import static java.util.stream.Collectors.toList; | ||
import static java.util.stream.Collectors.toMap; | ||
|
||
import io.tabular.iceberg.connect.IcebergSinkConfig; | ||
import io.tabular.iceberg.connect.data.Offset; | ||
|
@@ -29,7 +28,6 @@ | |
import java.util.Map; | ||
import java.util.Optional; | ||
import java.util.UUID; | ||
import java.util.concurrent.ExecutionException; | ||
import org.apache.iceberg.catalog.Catalog; | ||
import org.apache.iceberg.connect.events.DataComplete; | ||
import org.apache.iceberg.connect.events.DataWritten; | ||
|
@@ -42,11 +40,8 @@ | |
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; | ||
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; | ||
import org.apache.iceberg.relocated.com.google.common.collect.Lists; | ||
import org.apache.kafka.clients.admin.ListConsumerGroupOffsetsOptions; | ||
import org.apache.kafka.clients.admin.ListConsumerGroupOffsetsResult; | ||
import org.apache.kafka.clients.consumer.ConsumerGroupMetadata; | ||
import org.apache.kafka.common.TopicPartition; | ||
import org.apache.kafka.connect.errors.ConnectException; | ||
import org.apache.kafka.connect.sink.SinkTaskContext; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
@@ -57,6 +52,7 @@ public class CommitterImpl extends Channel implements Committer, AutoCloseable { | |
private final SinkTaskContext context; | ||
private final IcebergSinkConfig config; | ||
private final Optional<CoordinatorThread> maybeCoordinatorThread; | ||
private final ConsumerGroupMetadata consumerGroupMetadata; | ||
|
||
public CommitterImpl(SinkTaskContext context, IcebergSinkConfig config, Catalog catalog) { | ||
this(context, config, catalog, new KafkaClientFactory(config.kafkaProps())); | ||
|
@@ -92,11 +88,14 @@ private CommitterImpl( | |
|
||
this.maybeCoordinatorThread = coordinatorThreadFactory.create(context, config); | ||
|
||
// The source-of-truth for source-topic offsets is the control-group-id | ||
Map<TopicPartition, Long> stableConsumerOffsets = | ||
fetchStableConsumerOffsets(config.controlGroupId()); | ||
// Rewind kafka connect consumer to avoid duplicates | ||
context.offset(stableConsumerOffsets); | ||
ConsumerGroupMetadata groupMetadata; | ||
try { | ||
groupMetadata = KafkaUtils.consumerGroupMetadata(context); | ||
} catch (IllegalArgumentException e) { | ||
LOG.warn("Could not extract ConsumerGroupMetadata from consumer inside Kafka Connect, falling back to simple ConsumerGroupMetadata which can result in duplicates from zombie tasks"); | ||
groupMetadata = new ConsumerGroupMetadata(config.connectGroupId()); | ||
} | ||
Comment on lines
+92
to
+97
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We fetch the consumer-group-metadata via reflection from inside the Kafka Connect framework. This is technically unsafe as we are relying on private, implementation details. Hence I also implemented falling back to simple ConsumerGroupMetadata (which is basically what we were doing previously) and does not do zombie fencing. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not just fail? |
||
this.consumerGroupMetadata = groupMetadata; | ||
|
||
consumeAvailable( | ||
// initial poll with longer duration so the consumer will initialize... | ||
|
@@ -108,20 +107,6 @@ private CommitterImpl( | |
() -> new Committable(ImmutableMap.of(), ImmutableList.of()))); | ||
} | ||
|
||
private Map<TopicPartition, Long> fetchStableConsumerOffsets(String groupId) { | ||
try { | ||
ListConsumerGroupOffsetsResult response = | ||
admin() | ||
.listConsumerGroupOffsets( | ||
groupId, new ListConsumerGroupOffsetsOptions().requireStable(true)); | ||
return response.partitionsToOffsetAndMetadata().get().entrySet().stream() | ||
.filter(entry -> context.assignment().contains(entry.getKey())) | ||
.collect(toMap(Map.Entry::getKey, entry -> entry.getValue().offset())); | ||
} catch (InterruptedException | ExecutionException e) { | ||
throw new ConnectException(e); | ||
} | ||
} | ||
|
||
private void throwExceptionIfCoordinatorIsTerminated() { | ||
if (maybeCoordinatorThread.map(CoordinatorThread::isTerminated).orElse(false)) { | ||
throw new IllegalStateException("Coordinator unexpectedly terminated"); | ||
|
@@ -183,8 +168,7 @@ private void sendCommitResponse(UUID commitId, CommittableSupplier committableSu | |
events.add(commitReady); | ||
|
||
Map<TopicPartition, Offset> offsets = committable.offsetsByTopicPartition(); | ||
send(events, offsets, new ConsumerGroupMetadata(config.controlGroupId())); | ||
send(ImmutableList.of(), offsets, new ConsumerGroupMetadata(config.connectGroupId())); | ||
send(events, offsets, consumerGroupMetadata); | ||
Comment on lines
-186
to
+171
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Notice how we commit offsets against only one consumer group now; the We no longer commit source topic offsets to the |
||
} | ||
|
||
@Override | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,11 +19,17 @@ | |
package io.tabular.iceberg.connect.channel; | ||
|
||
import java.util.concurrent.ExecutionException; | ||
|
||
import org.apache.iceberg.common.DynFields; | ||
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; | ||
import org.apache.kafka.clients.admin.Admin; | ||
import org.apache.kafka.clients.admin.ConsumerGroupDescription; | ||
import org.apache.kafka.clients.admin.DescribeConsumerGroupsResult; | ||
import org.apache.kafka.clients.consumer.Consumer; | ||
import org.apache.kafka.clients.consumer.ConsumerGroupMetadata; | ||
import org.apache.kafka.connect.errors.ConnectException; | ||
import org.apache.kafka.connect.runtime.WorkerSinkTaskContext; | ||
import org.apache.kafka.connect.sink.SinkTaskContext; | ||
|
||
public class KafkaUtils { | ||
|
||
|
@@ -40,5 +46,18 @@ public static ConsumerGroupDescription consumerGroupDescription( | |
} | ||
} | ||
|
||
private static final String WorkerSinkTaskContextClassName = | ||
WorkerSinkTaskContext.class.getName(); | ||
|
||
@SuppressWarnings("unchecked") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. worth a comment around using reflection to get at some very specific implementation detail stuff here but otherwise 👍 |
||
public static ConsumerGroupMetadata consumerGroupMetadata(SinkTaskContext sinkTaskContext) { | ||
return ((Consumer<byte[], byte[]>) DynFields | ||
.builder() | ||
.hiddenImpl(WorkerSinkTaskContextClassName, "consumer") | ||
.build(sinkTaskContext) | ||
.get()) | ||
.groupMetadata(); | ||
} | ||
|
||
private KafkaUtils() {} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice docs. Thanks for looking out for the users.