Source code for kafka.admin.client

"""KafkaAdminClient - high-level Kafka cluster administration."""

import copy
import logging
import selectors
import socket
import time

import kafka.errors as Errors
from kafka.errors import KafkaConfigurationError, UnrecognizedBrokerVersion
from kafka.metrics import MetricConfig, Metrics
from kafka.net.compat import KafkaNetClient
from kafka.protocol.metadata import MetadataRequest, FindCoordinatorRequest, CoordinatorType
from kafka.version import __version__

from kafka.admin._acls import ACLAdminMixin
from kafka.admin._cluster import ClusterAdminMixin
from kafka.admin._configs import ConfigAdminMixin
from kafka.admin._groups import GroupAdminMixin
from kafka.admin._partitions import PartitionAdminMixin
from kafka.admin._topics import TopicAdminMixin
from kafka.admin._transactions import TransactionsAdminMixin
from kafka.admin._users import UserAdminMixin

log = logging.getLogger(__name__)


[docs] class KafkaAdminClient( ACLAdminMixin, ClusterAdminMixin, ConfigAdminMixin, GroupAdminMixin, PartitionAdminMixin, TopicAdminMixin, TransactionsAdminMixin, UserAdminMixin, ): """A class for administering the Kafka cluster. Keyword Arguments: bootstrap_servers: 'host[:port]' string (or list of 'host[:port]' strings) that the consumer should contact to bootstrap initial cluster metadata. This does not have to be the full node list. It just needs to have at least one broker that will respond to a Metadata API Request. Default port is 9092. If no servers are specified, will default to localhost:9092. client_id (str): a name for this client. This string is passed in each request to servers and can be used to identify specific server-side log entries that correspond to this client. Also submitted to GroupCoordinator for logging with respect to consumer group administration. Default: 'kafka-python-{version}' reconnect_backoff_ms (int): The amount of time in milliseconds to wait before attempting to reconnect to a given host. Default: 50. reconnect_backoff_max_ms (int): The maximum amount of time in milliseconds to backoff/wait when reconnecting to a broker that has repeatedly failed to connect. If provided, the backoff per host will increase exponentially for each consecutive connection failure, up to this maximum. Once the maximum is reached, reconnection attempts will continue periodically with this fixed rate. To avoid connection storms, a randomization factor of 0.2 will be applied to the backoff resulting in a random range between 20% below and 20% above the computed value. Default: 30000. request_timeout_ms (int): Client request timeout in milliseconds. Default: 30000. connections_max_idle_ms: Close idle connections after the number of milliseconds specified by this config. The broker closes idle connections after connections.max.idle.ms, so this avoids hitting unexpected socket disconnected errors on the client. Default: 540000 retry_backoff_ms (int): Milliseconds to backoff when retrying on errors. Default: 100. max_in_flight_requests_per_connection (int): Requests are pipelined to kafka brokers up to this number of maximum requests per broker connection. Default: 5. receive_message_max_bytes (int): Maximum allowed network frame size. Used to avoid OOM when decoding malformed network message header. Default: 1000000. receive_buffer_bytes (int): The size of the TCP receive buffer (SO_RCVBUF) to use when reading data. Default: None (relies on system defaults). Java client defaults to 32768. send_buffer_bytes (int): The size of the TCP send buffer (SO_SNDBUF) to use when sending data. Default: None (relies on system defaults). Java client defaults to 131072. socket_options (list): List of tuple-arguments to socket.setsockopt to apply to broker connection sockets. Default: [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] metadata_max_age_ms (int): The period of time in milliseconds after which we force a refresh of metadata even if we haven't seen any partition leadership changes to proactively discover any new brokers or partitions. Default: 300000 security_protocol (str): Protocol used to communicate with brokers. Valid values are: PLAINTEXT, SSL, SASL_PLAINTEXT, SASL_SSL. Default: PLAINTEXT. ssl_context (ssl.SSLContext): Pre-configured SSLContext for wrapping socket connections. If provided, all other ssl_* configurations will be ignored. Default: None. ssl_check_hostname (bool): Flag to configure whether SSL handshake should verify that the certificate matches the broker's hostname. Default: True. ssl_cafile (str): Optional filename of CA file to use in certificate verification. Default: None. ssl_certfile (str): Optional filename of file in PEM format containing the client certificate, as well as any CA certificates needed to establish the certificate's authenticity. Default: None. ssl_keyfile (str): Optional filename containing the client private key. Default: None. ssl_password (str): Optional password to be used when loading the certificate chain. Default: None. ssl_crlfile (str): Optional filename containing the CRL to check for certificate expiration. By default, no CRL check is done. When providing a file, only the leaf certificate will be checked against this CRL. Default: None. api_version (tuple): Specify which Kafka API version to use. If set to None, the client will infer the broker version from the results of ApiVersionsRequest API. For brokers earlier than 0.10, which do not support the ApiVersionsRequest API, api_version is required. Note: Dynamic version checking is performed eagerly during __init__ and can raise KafkaTimeoutError if no connection can be made before timeout (see bootstrap_timeout_ms below). Different versions enable different functionality. Examples:: (4, 3) most recent broker release, enable all supported features (2, 7) support SCRAM user credential apis (0, 11) enables message format v2 (internal) (0, 10, 0) enables sasl authentication and message format v1 (0, 9) enables full group coordination features with automatic partition assignment and rebalancing, (0, 8, 2) enables kafka-storage offset commits with manual partition assignment only, (0, 8, 1) enables zookeeper-storage offset commits with manual partition assignment only, (0, 8, 0) enables basic functionality but requires manual partition assignment and offset management. Default: None bootstrap_timeout_ms (int): number of milliseconds to wait for first successful cluster bootstrap. If provided, an attempt to bootstrap will raise KafkaTimeoutError if it is unable to fetch cluster metadata before the configured timeout. Note that bootstrap is called eagerly from __init__(). Default: 30000 selector (selectors.BaseSelector): Provide a specific selector implementation to use for I/O multiplexing. Default: selectors.DefaultSelector metrics (kafka.metrics.Metrics): Optionally provide a metrics instance for capturing network IO stats. Default: None. metric_group_prefix (str): Prefix for metric names. Default: '' sasl_mechanism (str): Authentication mechanism when security_protocol is configured for SASL_PLAINTEXT or SASL_SSL. Valid values are: PLAIN, GSSAPI, OAUTHBEARER, SCRAM-SHA-256, SCRAM-SHA-512. sasl_plain_username (str): username for sasl PLAIN and SCRAM authentication. Required if sasl_mechanism is PLAIN or one of the SCRAM mechanisms. sasl_plain_password (str): password for sasl PLAIN and SCRAM authentication. Required if sasl_mechanism is PLAIN or one of the SCRAM mechanisms. sasl_kerberos_name (str or gssapi.Name): Constructed gssapi.Name for use with sasl mechanism handshake. If provided, sasl_kerberos_service_name and sasl_kerberos_domain name are ignored. Default: None. sasl_kerberos_service_name (str): Service name to include in GSSAPI sasl mechanism handshake. Default: 'kafka' sasl_kerberos_domain_name (str): kerberos domain name to use in GSSAPI sasl mechanism handshake. Default: one of bootstrap servers sasl_oauth_token_provider (kafka.net.sasl.oauth.AbstractTokenProvider): OAuthBearer token provider instance. Default: None proxy_url (str): URL to proxy socket connections through. Supports SOCKS5 only. Requires scheme:// (e.g., socks5://foo.bar/). Default: None kafka_client (callable): Custom class / callable for creating KafkaNetClient instances """ DEFAULT_CONFIG = { # client configs 'bootstrap_servers': 'localhost', 'client_id': 'kafka-python-' + __version__, 'request_timeout_ms': 30000, 'connections_max_idle_ms': 9 * 60 * 1000, 'reconnect_backoff_ms': 50, 'reconnect_backoff_max_ms': 30000, 'max_in_flight_requests_per_connection': 5, 'receive_message_max_bytes': 1000000, 'receive_buffer_bytes': None, 'send_buffer_bytes': None, 'socket_options': [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)], 'retry_backoff_ms': 100, 'metadata_max_age_ms': 300000, 'client_dns_lookup': 'use_all_dns_ips', 'security_protocol': 'PLAINTEXT', 'ssl_context': None, 'ssl_check_hostname': True, 'ssl_cafile': None, 'ssl_certfile': None, 'ssl_keyfile': None, 'ssl_password': None, 'ssl_crlfile': None, 'api_version': None, 'bootstrap_timeout_ms': 30000, 'selector': selectors.DefaultSelector, 'sasl_mechanism': None, 'sasl_plain_username': None, 'sasl_plain_password': None, 'sasl_kerberos_name': None, 'sasl_kerberos_service_name': 'kafka', 'sasl_kerberos_domain_name': None, 'sasl_oauth_token_provider': None, 'proxy_url': None, 'socks5_proxy': None, # deprecated # metrics configs 'metric_reporters': [], 'metrics_num_samples': 2, 'metrics_sample_window_ms': 30000, 'kafka_client': KafkaNetClient, } def __init__(self, **configs): log.debug("Starting KafkaAdminClient with configuration: %s", configs) extra_configs = set(configs).difference(self.DEFAULT_CONFIG) if extra_configs: raise KafkaConfigurationError("Unrecognized configs: {}".format(extra_configs)) self.config = copy.copy(self.DEFAULT_CONFIG) self.config.update(configs) # Configure metrics metrics_tags = {'client-id': self.config['client_id']} metric_config = MetricConfig(samples=self.config['metrics_num_samples'], time_window_ms=self.config['metrics_sample_window_ms'], tags=metrics_tags) reporters = [reporter() for reporter in self.config['metric_reporters']] self._metrics = Metrics(metric_config, reporters) self._client = self.config['kafka_client']( metrics=self._metrics, metric_group_prefix='admin', **self.config ) # Goal: migrate all self._client calls -> self._manager (skipping compat layer) self._manager = self._client._manager self._net = self._manager._net # Run all IO on a dedicated background thread; public admin methods # block on cross-thread Events via self._manager.run(...). self._net.start() # Bootstrap on __init__ self._manager.bootstrap(self.config['bootstrap_timeout_ms']) self._closed = False self._controller_id = None log.debug("KafkaAdminClient started.") def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.close()
[docs] def close(self): """Close the KafkaAdminClient connection to the Kafka broker.""" if not hasattr(self, '_closed') or self._closed: log.info("KafkaAdminClient already closed.") return self._closed = True self._metrics.close() self._client.close() self._net.stop() log.debug("KafkaAdminClient is now closed.")
def _validate_timeout(self, timeout_ms): """Validate the timeout is set or use the configuration default.""" return timeout_ms or self.config['request_timeout_ms'] # -- Routing primitives (used by mixins) ---------------------------------- async def _refresh_controller_id(self, timeout_ms=30000): """Determine the Kafka cluster controller.""" if self._manager.broker_version < (0, 10): raise UnrecognizedBrokerVersion( "Kafka Admin Client controller requests requires broker version >= (0, 10)") request = MetadataRequest() timeout_at = time.monotonic() + timeout_ms / 1000 while time.monotonic() < timeout_at: response = await self._manager.send(request) controller_id = response.controller_id if controller_id == -1: log.warning("Controller ID not available, got -1") await self._net.sleep(1) continue return controller_id else: raise Errors.NodeNotReadyError('controller') async def _find_coordinator_ids(self, keys, key_type=CoordinatorType.GROUP): """Find broker node_ids of the coordinators for a set of keys. ``key_type`` is the CoordinatorType enum (GROUP=0, TRANSACTION=1, SHARE=2). Results are cached in the shared ``ClusterMetadata._coordinators`` map; only keys not already in the cache hit the network. On brokers supporting FindCoordinator v4+ (KIP-699, Apache Kafka 3.0+), all unknown keys are resolved in a single batched request; older brokers fall back to one request per key. Returns a dict mapping key -> node_id. """ key_type = CoordinatorType.build_from(key_type) cluster = self._manager.cluster result = {} unknown = [] for key in keys: cached = cluster.get_coordinator(key, key_type) if cached is not None: result[key] = cached else: unknown.append(key) if not unknown: return result if self._manager.broker_version_data.api_version(FindCoordinatorRequest) >= 4: request = FindCoordinatorRequest(key_type=key_type.value, coordinator_keys=unknown, min_version=4) response = await self._manager.send(request) # v4+ Coordinator rows are duck-compatible with v0-v3 response # shape: both have error_code/error_message/node_id/host/port. for coordinator in response.coordinators: node_id = cluster.add_coordinator( coordinator, key_type, coordinator.key, synthesize_node_id=False) result[coordinator.key] = node_id else: # Broker does not support batch api; fan-out request per-key for key in unknown: request = FindCoordinatorRequest(key=key, key_type=key_type.value, max_version=3) response = await self._manager.send(request) node_id = cluster.add_coordinator( response, key_type, key, synthesize_node_id=False) result[key] = node_id return result async def _find_coordinator_id(self, key, key_type=CoordinatorType.GROUP): """Single-key wrapper for _find_coordinator_ids()""" ids = await self._find_coordinator_ids([key], key_type=key_type) return ids[key] async def _send_request_to_controller(self, request, get_errors_fn=lambda r: (), raise_errors=True, ignore_errors=()): """Send a Kafka protocol message to the cluster controller. Retries once on NotControllerError after refreshing the controller id. """ if self._controller_id is None or self._controller_id == -1: self._controller_id = await self._refresh_controller_id() response = await self._manager.send(request, node_id=self._controller_id) # Refresh controller and retry on NotControllerError if Errors.NotControllerError in get_errors_fn(response): self._controller_id = await self._refresh_controller_id() response = await self._manager.send(request, node_id=self._controller_id) for error_type in get_errors_fn(response): if error_type is Errors.NoError: continue elif error_type is Errors.NotControllerError: raise RuntimeError("Failed to find active controller id!") elif raise_errors and error_type not in ignore_errors: raise error_type( "Request '{}' failed with response '{}'." .format(request, response)) return response