redpanda using docker-compose
This commit is contained in:
@@ -0,0 +1,56 @@
|
||||
# =================================================================
|
||||
# This file defines initial cluster properties for a Redpanda cluster.
|
||||
# Some of these settings are intended for quickstart development and evaluation
|
||||
# and are not suitable for production environments.
|
||||
#
|
||||
# For more information on bootstrap files, see:
|
||||
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#configure-a-bootstrap-file
|
||||
# =================================================================
|
||||
|
||||
#
|
||||
# Enable SASL authentication for the Kafka and Admin APIs.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#admin_api_require_auth
|
||||
admin_api_require_auth: true
|
||||
# At least one superuser is required to be able to create other SASL users
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#superusers
|
||||
superusers:
|
||||
- superuser
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_sasl
|
||||
enable_sasl: true
|
||||
# Allow topics to be created on first access.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#auto_create_topics_enabled
|
||||
auto_create_topics_enabled: true
|
||||
# Enable data transforms.
|
||||
# https://docs.redpanda.com/current/develop/data-transforms/how-transforms-work/
|
||||
data_transforms_enabled: true
|
||||
# Enable audit logging (enterprise feature).
|
||||
# https://docs.redpanda.com/current/manage/audit-logging/
|
||||
audit_enabled: true
|
||||
# Enable Tiered Storage (enterprise feature).
|
||||
# https://docs.redpanda.com/current/manage/tiered-storage/
|
||||
cloud_storage_enabled: true
|
||||
cloud_storage_region: local
|
||||
cloud_storage_access_key: minio
|
||||
cloud_storage_secret_key: redpandaTieredStorage7
|
||||
cloud_storage_api_endpoint: minio
|
||||
cloud_storage_api_endpoint_port: 9000
|
||||
cloud_storage_disable_tls: true
|
||||
cloud_storage_bucket: redpanda
|
||||
# Forces segments to be uploaded to Tiered Storage faster for the purposes of the quickstart
|
||||
# https://docs.redpanda.com/current/reference/properties/object-storage-properties/#cloud_storage_segment_max_upload_interval_sec
|
||||
cloud_storage_segment_max_upload_interval_sec: 60
|
||||
# Continuous Data Balancing (enterprise feature) continuously monitors your node and rack availability and disk usage. This enables self-healing clusters that dynamically balance partitions, ensuring smooth operations and optimal cluster performance.
|
||||
# https://docs.redpanda.com/current/manage/cluster-maintenance/continuous-data-balancing/
|
||||
partition_autobalancing_mode: continuous
|
||||
# Enable Redpanda to collect consumer group metrics.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_consumer_group_metrics
|
||||
enable_consumer_group_metrics:
|
||||
- "group"
|
||||
- "partition"
|
||||
- "consumer_lag"
|
||||
# Lower the interval for the quickstart
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#consumer_group_lag_collection_interval_sec
|
||||
consumer_group_lag_collection_interval_sec: 60
|
||||
# Enable Redpanda to collect host metrics.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_host_metrics
|
||||
enable_host_metrics: true
|
||||
@@ -0,0 +1,403 @@
|
||||
name: redpanda-quickstart-multi-broker
|
||||
networks:
|
||||
redpanda_network:
|
||||
driver: bridge
|
||||
volumes:
|
||||
redpanda-0: null
|
||||
redpanda-1: null
|
||||
redpanda-2: null
|
||||
minio: null
|
||||
services:
|
||||
##################
|
||||
# Redpanda Brokers #
|
||||
##################
|
||||
redpanda-0:
|
||||
command:
|
||||
- redpanda
|
||||
- start
|
||||
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:19092
|
||||
# Address the broker advertises to clients that connect to the Kafka API.
|
||||
# Use the internal addresses to connect to the Redpanda brokers
|
||||
# from inside the same Docker network.
|
||||
# Use the external addresses to connect to the Redpanda brokers
|
||||
# from outside the Docker network.
|
||||
- --advertise-kafka-addr internal://redpanda-0:9092,external://localhost:19092
|
||||
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:18082
|
||||
# Address the broker advertises to clients that connect to the HTTP Proxy.
|
||||
- --advertise-pandaproxy-addr internal://redpanda-0:8082,external://localhost:18082
|
||||
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:18081
|
||||
# Redpanda brokers use the RPC API to communicate with each other internally.
|
||||
- --rpc-addr redpanda-0:33145
|
||||
- --advertise-rpc-addr redpanda-0:33145
|
||||
# Mode dev-container uses well-known configuration properties for development in containers.
|
||||
- --mode dev-container
|
||||
# Tells Seastar (the framework Redpanda uses under the hood) to use 1 core on the system.
|
||||
- --smp 1
|
||||
- --default-log-level=info
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
container_name: redpanda-0
|
||||
# Sets the username and password of the bootstrap SCRAM superuser
|
||||
# See https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#bootstrap-a-user-account
|
||||
environment:
|
||||
RP_BOOTSTRAP_USER: "superuser:secretpassword"
|
||||
volumes:
|
||||
- redpanda-0:/var/lib/redpanda/data
|
||||
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
|
||||
networks:
|
||||
- redpanda_network
|
||||
ports:
|
||||
- 18081:18081
|
||||
- 18082:18082
|
||||
- 19092:19092
|
||||
- 19644:9644
|
||||
healthcheck:
|
||||
test: ["CMD", "rpk", "cluster", "info", "-X", "user=superuser", "-X", "pass=secretpassword"]
|
||||
interval: 10s
|
||||
timeout: 15s
|
||||
retries: 10
|
||||
depends_on:
|
||||
minio:
|
||||
condition: service_healthy
|
||||
redpanda-1:
|
||||
command:
|
||||
- redpanda
|
||||
- start
|
||||
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:29092
|
||||
- --advertise-kafka-addr internal://redpanda-1:9092,external://localhost:29092
|
||||
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:28082
|
||||
- --advertise-pandaproxy-addr internal://redpanda-1:8082,external://localhost:28082
|
||||
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:28081
|
||||
- --rpc-addr redpanda-1:33145
|
||||
- --advertise-rpc-addr redpanda-1:33145
|
||||
- --mode dev-container
|
||||
- --smp 1
|
||||
- --default-log-level=info
|
||||
- --seeds redpanda-0:33145
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
container_name: redpanda-1
|
||||
environment:
|
||||
RP_BOOTSTRAP_USER: "superuser:secretpassword"
|
||||
volumes:
|
||||
- redpanda-1:/var/lib/redpanda/data
|
||||
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
|
||||
networks:
|
||||
- redpanda_network
|
||||
ports:
|
||||
- 28081:28081
|
||||
- 28082:28082
|
||||
- 29092:29092
|
||||
- 29644:9644
|
||||
depends_on:
|
||||
- redpanda-0
|
||||
- minio
|
||||
redpanda-2:
|
||||
command:
|
||||
- redpanda
|
||||
- start
|
||||
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:39092
|
||||
- --advertise-kafka-addr internal://redpanda-2:9092,external://localhost:39092
|
||||
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:38082
|
||||
- --advertise-pandaproxy-addr internal://redpanda-2:8082,external://localhost:38082
|
||||
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:38081
|
||||
- --rpc-addr redpanda-2:33145
|
||||
- --advertise-rpc-addr redpanda-2:33145
|
||||
- --mode dev-container
|
||||
- --smp 1
|
||||
- --default-log-level=info
|
||||
- --seeds redpanda-0:33145
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
container_name: redpanda-2
|
||||
environment:
|
||||
RP_BOOTSTRAP_USER: "superuser:secretpassword"
|
||||
volumes:
|
||||
- redpanda-2:/var/lib/redpanda/data
|
||||
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
|
||||
networks:
|
||||
- redpanda_network
|
||||
ports:
|
||||
- 38081:38081
|
||||
- 38082:38082
|
||||
- 39092:39092
|
||||
- 39644:9644
|
||||
depends_on:
|
||||
- redpanda-0
|
||||
- minio
|
||||
####################
|
||||
# Redpanda Console #
|
||||
####################
|
||||
console:
|
||||
container_name: redpanda-console
|
||||
image: docker.redpanda.com/redpandadata/console:v3.2.2
|
||||
networks:
|
||||
- redpanda_network
|
||||
entrypoint: /bin/sh
|
||||
command: -c 'echo "$$CONSOLE_CONFIG_FILE" > /tmp/config.yml && /app/console'
|
||||
volumes:
|
||||
- ./config:/tmp/config/
|
||||
environment:
|
||||
CONFIG_FILEPATH: ${CONFIG_FILEPATH:-/tmp/config.yml}
|
||||
CONSOLE_CONFIG_FILE: |
|
||||
# Configure a connection to the Redpanda cluster
|
||||
# See https://docs.redpanda.com/current/console/config/connect-to-redpanda/
|
||||
kafka:
|
||||
brokers: ["redpanda-0:9092"]
|
||||
sasl:
|
||||
enabled: true
|
||||
impersonateUser: true
|
||||
schemaRegistry:
|
||||
enabled: true
|
||||
urls: ["http://redpanda-0:8081","http://redpanda-1:8081","http://redpanda-2:8081"]
|
||||
authentication:
|
||||
impersonateUser: true
|
||||
redpanda:
|
||||
adminApi:
|
||||
enabled: true
|
||||
urls: ["http://redpanda-0:9644","http://redpanda-1:9644","http://redpanda-2:9644"]
|
||||
authentication:
|
||||
basic:
|
||||
username: superuser
|
||||
password: secretpassword
|
||||
impersonateUser: false
|
||||
console:
|
||||
# Configures Redpanda Console to fetch topic documentation from GitHub and display it in the UI.
|
||||
# See https://docs.redpanda.com/current/console/config/topic-documentation/
|
||||
topicDocumentation:
|
||||
enabled: true
|
||||
git:
|
||||
enabled: true
|
||||
repository:
|
||||
url: https://github.com/redpanda-data/docs
|
||||
branch: main
|
||||
baseDirectory: tests/docker-compose
|
||||
authentication:
|
||||
jwtSigningKey: vazxnT+ZHtxKslK6QlDGovcYnSjTk/lKMmZ+mHrBVE+YdVDkLgSuP6AszAKe9Gvq
|
||||
basic:
|
||||
enabled: true
|
||||
authorization:
|
||||
roleBindings:
|
||||
- roleName: admin
|
||||
users:
|
||||
- loginType: basic
|
||||
name: superuser
|
||||
ports:
|
||||
- 8080:8080
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
createtopic:
|
||||
condition: service_completed_successfully
|
||||
registerschema:
|
||||
condition: service_completed_successfully
|
||||
deploytransform:
|
||||
condition: service_completed_successfully
|
||||
####################
|
||||
# Redpanda Connect #
|
||||
####################
|
||||
connect:
|
||||
container_name: redpanda-connect
|
||||
image: docker.redpanda.com/redpandadata/connect
|
||||
networks:
|
||||
- redpanda_network
|
||||
entrypoint: /bin/sh
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
command: -c 'echo "$$CONNECT_CFG_FILE" > /tmp/connect.yml; /redpanda-connect -c /tmp/connect.yml'
|
||||
environment:
|
||||
# This Redpanda Connect configuration creates fake data,
|
||||
# processes it, and writes the output to a set of topics.
|
||||
#
|
||||
# Input:
|
||||
# - Uses Redpanda Connect's generate input to generate fake data.
|
||||
# See https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
|
||||
# Pipeline:
|
||||
# - Bloblang mapping to batch each input and map 1 message to 'logins'
|
||||
# topic, and a random number (1-3) of messages to 'transaction' topic
|
||||
# - Unarchive processor to parse the JSON array and extract each
|
||||
# element into its own message.
|
||||
# See https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
|
||||
# Output:
|
||||
# - kafka_franz output to write the messages to the Redpanda brokers.
|
||||
# See https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
|
||||
CONNECT_CFG_FILE: |
|
||||
input:
|
||||
generate:
|
||||
interval: 1s
|
||||
mapping: |
|
||||
let first_name = fake("first_name")
|
||||
let last_name = fake("last_name")
|
||||
|
||||
root.user_id = counter()
|
||||
root.name = $$first_name + " " + $$last_name
|
||||
root.email = ($$first_name.slice(0,1) + $$last_name + "@" + fake("domain_name")).lowercase()
|
||||
root.ip = fake("ipv4")
|
||||
root.login_time = now()
|
||||
pipeline:
|
||||
processors:
|
||||
- mapping: |
|
||||
root = range(0, random_int(min:2, max:4)).map_each(cust -> this)
|
||||
- unarchive:
|
||||
format: "json_array"
|
||||
- mapping: |
|
||||
if batch_index() == 0 {
|
||||
meta topic = "logins"
|
||||
root = this
|
||||
} else {
|
||||
meta topic = "transactions"
|
||||
root.user_id = this.user_id
|
||||
root.email = this.email
|
||||
root.index = batch_index() - 1
|
||||
root.product_url = fake("url")
|
||||
root.price = fake("amount_with_currency")
|
||||
root.timestamp = now()
|
||||
}
|
||||
output:
|
||||
kafka_franz:
|
||||
seed_brokers: [ "redpanda-0:9092" ]
|
||||
topic: $${! metadata("topic") }
|
||||
sasl:
|
||||
- mechanism: SCRAM-SHA-256
|
||||
password: secretpassword
|
||||
username: superuser
|
||||
####################
|
||||
# rpk container to create the edu-filtered-domains topic #
|
||||
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-create/
|
||||
####################
|
||||
createtopic:
|
||||
command:
|
||||
- topic
|
||||
- create
|
||||
- edu-filtered-domains
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X brokers=redpanda-0:9092
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
####################
|
||||
# rpk container to register the schema #
|
||||
# See https://docs.redpanda.com/current/manage/schema-reg/schema-reg-api/
|
||||
####################
|
||||
registerschema:
|
||||
command:
|
||||
- registry
|
||||
- schema
|
||||
- create
|
||||
- transactions
|
||||
- --schema
|
||||
- /etc/redpanda/transactions-schema.json
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X registry.hosts=redpanda-0:8081
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
# Mount the local directory that contains your schema to the container.
|
||||
volumes:
|
||||
- ./transactions-schema.json:/etc/redpanda/transactions-schema.json
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
####################
|
||||
# rpk container to deploy a consumer group #
|
||||
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-consume/
|
||||
####################
|
||||
consumergroup:
|
||||
command:
|
||||
- topic
|
||||
- consume
|
||||
- transactions
|
||||
- --group
|
||||
- transactions-consumer
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X brokers=redpanda-0:9092
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
createtopic:
|
||||
condition: service_completed_successfully
|
||||
deploytransform:
|
||||
condition: service_completed_successfully
|
||||
####################
|
||||
# rpk container to deploy the pre-built data transform #
|
||||
# See https://docs.redpanda.com/current/develop/data-transforms/deploy/
|
||||
####################
|
||||
deploytransform:
|
||||
command:
|
||||
- transform
|
||||
- deploy
|
||||
- --file=/etc/redpanda/regex.wasm
|
||||
- --name=regex
|
||||
- --input-topic=logins
|
||||
- --output-topic=edu-filtered-domains
|
||||
- --var=PATTERN="[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.edu"
|
||||
- --var=MATCH_VALUE=true
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X admin.hosts=redpanda-0:9644
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
volumes:
|
||||
- ./transform/regex.wasm:/etc/redpanda/regex.wasm
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
createtopic:
|
||||
condition: service_completed_successfully
|
||||
####################
|
||||
# MinIO for Tiered Storage #
|
||||
# See https://min.io/
|
||||
#
|
||||
# NOTE: MinIO is included in this quickstart for development and evaluation purposes only.
|
||||
# It is not supported for production deployments of Redpanda.
|
||||
#
|
||||
# For production environments, use one of the supported object storage providers:
|
||||
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/requirements/#object-storage-providers-for-tiered-storage
|
||||
####################
|
||||
minio:
|
||||
container_name: minio
|
||||
image: minio/minio:RELEASE.2025-05-24T17-08-30Z
|
||||
command: server --console-address ":9001" /data
|
||||
ports:
|
||||
- 9000:9000
|
||||
- 9001:9001
|
||||
environment:
|
||||
MINIO_ROOT_USER: minio
|
||||
MINIO_ROOT_PASSWORD: redpandaTieredStorage7
|
||||
MINIO_SERVER_URL: "http://minio:9000"
|
||||
MINIO_REGION_NAME: local
|
||||
MINIO_DOMAIN: minio
|
||||
volumes:
|
||||
- minio:/data
|
||||
networks:
|
||||
redpanda_network:
|
||||
aliases:
|
||||
- redpanda.minio
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/ready"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
mc:
|
||||
depends_on:
|
||||
minio:
|
||||
condition: service_healthy
|
||||
image: minio/mc:RELEASE.2025-05-21T01-59-54Z
|
||||
container_name: mc
|
||||
networks:
|
||||
- redpanda_network
|
||||
environment:
|
||||
- AWS_ACCESS_KEY_ID=minio
|
||||
- AWS_SECRET_ACCESS_KEY=redpandaTieredStorage7
|
||||
- AWS_REGION=local
|
||||
entrypoint: >
|
||||
/bin/sh -c "
|
||||
until (/usr/bin/mc alias set minio http://minio:9000 minio redpandaTieredStorage7) do echo '...waiting...' && sleep 1; done;
|
||||
/usr/bin/mc mb minio/redpanda;
|
||||
/usr/bin/mc policy set public minio/redpanda;
|
||||
tail -f /dev/null
|
||||
"
|
||||
@@ -0,0 +1,77 @@
|
||||
input:
|
||||
# Use the 'generate' input
|
||||
# https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
|
||||
generate:
|
||||
# The interval at which new records are generated.
|
||||
interval: 1s
|
||||
# The mapping section defines how each generated record is structured.
|
||||
# The language used here is called Bloblang.
|
||||
# https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
|
||||
mapping: |
|
||||
# Generate a fake first name using the 'first_name' faker function.
|
||||
let first_name = fake("first_name")
|
||||
|
||||
# Generate a fake last name using the 'last_name' faker function.
|
||||
let last_name = fake("last_name")
|
||||
|
||||
# Define possible subscription levels for users.
|
||||
let subscription_levels = ["Free", "Basic", "Premium"]
|
||||
|
||||
# Define possible notification channels for user preferences.
|
||||
let notifications = ["email", "sms", "push" ]
|
||||
|
||||
# Define supported languages for user preferences.
|
||||
let languages = ["en", "es", "fr", "de", "zh", "jp"]
|
||||
|
||||
# Assign a unique user ID using a UUID digit generator.
|
||||
root.user_id = fake("uuid_digit")
|
||||
|
||||
# Assign the generated first name to the 'first_name' field.
|
||||
root.first_name = $first_name
|
||||
|
||||
# Assign the generated last name to the 'last_name' field.
|
||||
root.last_name = $last_name
|
||||
|
||||
# Construct the user's email by combining the first initial, last name, and a fake domain name.
|
||||
# The email is converted to lowercase for consistency.
|
||||
root.email = ($first_name.slice(0,1) + $last_name + "@" + fake("domain_name")).lowercase()
|
||||
|
||||
# Assign a fake registration date using the 'date' faker function.
|
||||
root.registration_date = fake("date")
|
||||
|
||||
# Assign the current timestamp as the last login time.
|
||||
root.last_login = now()
|
||||
|
||||
# Randomly assign a subscription level by selecting an index from the 'subscription_levels' array.
|
||||
root.subscription_level = $subscription_levels.index(random_int(min: 0, max: 2))
|
||||
|
||||
# Randomly assign a language preference by selecting an index from the 'languages' array.
|
||||
root.preferences.language = $languages.index(random_int(min: 0, max: 5))
|
||||
|
||||
# Randomly assign a notification preference by selecting an index from the 'notifications' array.
|
||||
root.preferences.notifications = $notifications.index(random_int(min: 0, max: 2))
|
||||
pipeline:
|
||||
processors:
|
||||
- mapping: |
|
||||
# Set the target topic for the generated records to 'profiles'.
|
||||
meta topic = "profiles"
|
||||
|
||||
# Assign the entire record (root) to be sent to the specified topic.
|
||||
root = this
|
||||
output:
|
||||
# Use the 'kafka_franz' output to send the result back to Redpanda
|
||||
# https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
|
||||
kafka_franz:
|
||||
# Define the list of seed brokers for the Kafka cluster.
|
||||
seed_brokers: [ "localhost:19092", "localhost:29092", "localhost:39092"]
|
||||
# Dynamically assign the topic based on the metadata specified in the processors.
|
||||
# In this case, it resolves to the 'profiles' topic.
|
||||
topic: ${! metadata("topic") }
|
||||
# Configure SASL authentication to securely connect to the Kafka brokers.
|
||||
sasl:
|
||||
- # Specify the SASL mechanism to use for authentication.
|
||||
mechanism: SCRAM-SHA-256
|
||||
# The password for the SASL authentication.
|
||||
password: secretpassword
|
||||
# The username for the SASL authentication.
|
||||
username: superuser
|
||||
@@ -0,0 +1,56 @@
|
||||
# =================================================================
|
||||
# This file defines initial cluster properties for a Redpanda cluster.
|
||||
# Some of these settings are intended for quickstart development and evaluation
|
||||
# and are not suitable for production environments.
|
||||
#
|
||||
# For more information on bootstrap files, see:
|
||||
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#configure-a-bootstrap-file
|
||||
# =================================================================
|
||||
|
||||
#
|
||||
# Enable SASL authentication for the Kafka and Admin APIs.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#admin_api_require_auth
|
||||
admin_api_require_auth: true
|
||||
# At least one superuser is required to be able to create other SASL users
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#superusers
|
||||
superusers:
|
||||
- superuser
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_sasl
|
||||
enable_sasl: true
|
||||
# Allow topics to be created on first access.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#auto_create_topics_enabled
|
||||
auto_create_topics_enabled: true
|
||||
# Enable data transforms.
|
||||
# https://docs.redpanda.com/current/develop/data-transforms/how-transforms-work/
|
||||
data_transforms_enabled: true
|
||||
# Enable audit logging (enterprise feature).
|
||||
# https://docs.redpanda.com/current/manage/audit-logging/
|
||||
audit_enabled: true
|
||||
# Enable Tiered Storage (enterprise feature).
|
||||
# https://docs.redpanda.com/current/manage/tiered-storage/
|
||||
cloud_storage_enabled: true
|
||||
cloud_storage_region: local
|
||||
cloud_storage_access_key: minio
|
||||
cloud_storage_secret_key: redpandaTieredStorage7
|
||||
cloud_storage_api_endpoint: minio
|
||||
cloud_storage_api_endpoint_port: 9000
|
||||
cloud_storage_disable_tls: true
|
||||
cloud_storage_bucket: redpanda
|
||||
# Forces segments to be uploaded to Tiered Storage faster for the purposes of the quickstart
|
||||
# https://docs.redpanda.com/current/reference/properties/object-storage-properties/#cloud_storage_segment_max_upload_interval_sec
|
||||
cloud_storage_segment_max_upload_interval_sec: 60
|
||||
# Continuous Data Balancing (enterprise feature) continuously monitors your node and rack availability and disk usage. This enables self-healing clusters that dynamically balance partitions, ensuring smooth operations and optimal cluster performance.
|
||||
# https://docs.redpanda.com/current/manage/cluster-maintenance/continuous-data-balancing/
|
||||
partition_autobalancing_mode: continuous
|
||||
# Enable Redpanda to collect consumer group metrics.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_consumer_group_metrics
|
||||
enable_consumer_group_metrics:
|
||||
- "group"
|
||||
- "partition"
|
||||
- "consumer_lag"
|
||||
# Lower the interval for the quickstart
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#consumer_group_lag_collection_interval_sec
|
||||
consumer_group_lag_collection_interval_sec: 60
|
||||
# Enable Redpanda to collect host metrics.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_host_metrics
|
||||
enable_host_metrics: true
|
||||
@@ -0,0 +1,403 @@
|
||||
name: redpanda-quickstart-multi-broker
|
||||
networks:
|
||||
redpanda_network:
|
||||
driver: bridge
|
||||
volumes:
|
||||
redpanda-0: null
|
||||
redpanda-1: null
|
||||
redpanda-2: null
|
||||
minio: null
|
||||
services:
|
||||
##################
|
||||
# Redpanda Brokers #
|
||||
##################
|
||||
redpanda-0:
|
||||
command:
|
||||
- redpanda
|
||||
- start
|
||||
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:19092
|
||||
# Address the broker advertises to clients that connect to the Kafka API.
|
||||
# Use the internal addresses to connect to the Redpanda brokers
|
||||
# from inside the same Docker network.
|
||||
# Use the external addresses to connect to the Redpanda brokers
|
||||
# from outside the Docker network.
|
||||
- --advertise-kafka-addr internal://redpanda-0:9092,external://localhost:19092
|
||||
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:18082
|
||||
# Address the broker advertises to clients that connect to the HTTP Proxy.
|
||||
- --advertise-pandaproxy-addr internal://redpanda-0:8082,external://localhost:18082
|
||||
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:18081
|
||||
# Redpanda brokers use the RPC API to communicate with each other internally.
|
||||
- --rpc-addr redpanda-0:33145
|
||||
- --advertise-rpc-addr redpanda-0:33145
|
||||
# Mode dev-container uses well-known configuration properties for development in containers.
|
||||
- --mode dev-container
|
||||
# Tells Seastar (the framework Redpanda uses under the hood) to use 1 core on the system.
|
||||
- --smp 1
|
||||
- --default-log-level=info
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
container_name: redpanda-0
|
||||
# Sets the username and password of the bootstrap SCRAM superuser
|
||||
# See https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#bootstrap-a-user-account
|
||||
environment:
|
||||
RP_BOOTSTRAP_USER: "superuser:secretpassword"
|
||||
volumes:
|
||||
- redpanda-0:/var/lib/redpanda/data
|
||||
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
|
||||
networks:
|
||||
- redpanda_network
|
||||
ports:
|
||||
- 18081:18081
|
||||
- 18082:18082
|
||||
- 19092:19092
|
||||
- 19644:9644
|
||||
healthcheck:
|
||||
test: ["CMD", "rpk", "cluster", "info", "-X", "user=superuser", "-X", "pass=secretpassword"]
|
||||
interval: 10s
|
||||
timeout: 15s
|
||||
retries: 10
|
||||
depends_on:
|
||||
minio:
|
||||
condition: service_healthy
|
||||
redpanda-1:
|
||||
command:
|
||||
- redpanda
|
||||
- start
|
||||
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:29092
|
||||
- --advertise-kafka-addr internal://redpanda-1:9092,external://localhost:29092
|
||||
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:28082
|
||||
- --advertise-pandaproxy-addr internal://redpanda-1:8082,external://localhost:28082
|
||||
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:28081
|
||||
- --rpc-addr redpanda-1:33145
|
||||
- --advertise-rpc-addr redpanda-1:33145
|
||||
- --mode dev-container
|
||||
- --smp 1
|
||||
- --default-log-level=info
|
||||
- --seeds redpanda-0:33145
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
container_name: redpanda-1
|
||||
environment:
|
||||
RP_BOOTSTRAP_USER: "superuser:secretpassword"
|
||||
volumes:
|
||||
- redpanda-1:/var/lib/redpanda/data
|
||||
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
|
||||
networks:
|
||||
- redpanda_network
|
||||
ports:
|
||||
- 28081:28081
|
||||
- 28082:28082
|
||||
- 29092:29092
|
||||
- 29644:9644
|
||||
depends_on:
|
||||
- redpanda-0
|
||||
- minio
|
||||
redpanda-2:
|
||||
command:
|
||||
- redpanda
|
||||
- start
|
||||
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:39092
|
||||
- --advertise-kafka-addr internal://redpanda-2:9092,external://localhost:39092
|
||||
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:38082
|
||||
- --advertise-pandaproxy-addr internal://redpanda-2:8082,external://localhost:38082
|
||||
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:38081
|
||||
- --rpc-addr redpanda-2:33145
|
||||
- --advertise-rpc-addr redpanda-2:33145
|
||||
- --mode dev-container
|
||||
- --smp 1
|
||||
- --default-log-level=info
|
||||
- --seeds redpanda-0:33145
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
container_name: redpanda-2
|
||||
environment:
|
||||
RP_BOOTSTRAP_USER: "superuser:secretpassword"
|
||||
volumes:
|
||||
- redpanda-2:/var/lib/redpanda/data
|
||||
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
|
||||
networks:
|
||||
- redpanda_network
|
||||
ports:
|
||||
- 38081:38081
|
||||
- 38082:38082
|
||||
- 39092:39092
|
||||
- 39644:9644
|
||||
depends_on:
|
||||
- redpanda-0
|
||||
- minio
|
||||
####################
|
||||
# Redpanda Console #
|
||||
####################
|
||||
console:
|
||||
container_name: redpanda-console
|
||||
image: docker.redpanda.com/redpandadata/console:v3.2.2
|
||||
networks:
|
||||
- redpanda_network
|
||||
entrypoint: /bin/sh
|
||||
command: -c 'echo "$$CONSOLE_CONFIG_FILE" > /tmp/config.yml && /app/console'
|
||||
volumes:
|
||||
- ./config:/tmp/config/
|
||||
environment:
|
||||
CONFIG_FILEPATH: ${CONFIG_FILEPATH:-/tmp/config.yml}
|
||||
CONSOLE_CONFIG_FILE: |
|
||||
# Configure a connection to the Redpanda cluster
|
||||
# See https://docs.redpanda.com/current/console/config/connect-to-redpanda/
|
||||
kafka:
|
||||
brokers: ["redpanda-0:9092"]
|
||||
sasl:
|
||||
enabled: true
|
||||
impersonateUser: true
|
||||
schemaRegistry:
|
||||
enabled: true
|
||||
urls: ["http://redpanda-0:8081","http://redpanda-1:8081","http://redpanda-2:8081"]
|
||||
authentication:
|
||||
impersonateUser: true
|
||||
redpanda:
|
||||
adminApi:
|
||||
enabled: true
|
||||
urls: ["http://redpanda-0:9644","http://redpanda-1:9644","http://redpanda-2:9644"]
|
||||
authentication:
|
||||
basic:
|
||||
username: superuser
|
||||
password: secretpassword
|
||||
impersonateUser: false
|
||||
console:
|
||||
# Configures Redpanda Console to fetch topic documentation from GitHub and display it in the UI.
|
||||
# See https://docs.redpanda.com/current/console/config/topic-documentation/
|
||||
topicDocumentation:
|
||||
enabled: true
|
||||
git:
|
||||
enabled: true
|
||||
repository:
|
||||
url: https://github.com/redpanda-data/docs
|
||||
branch: main
|
||||
baseDirectory: tests/docker-compose
|
||||
authentication:
|
||||
jwtSigningKey: vazxnT+ZHtxKslK6QlDGovcYnSjTk/lKMmZ+mHrBVE+YdVDkLgSuP6AszAKe9Gvq
|
||||
basic:
|
||||
enabled: true
|
||||
authorization:
|
||||
roleBindings:
|
||||
- roleName: admin
|
||||
users:
|
||||
- loginType: basic
|
||||
name: superuser
|
||||
ports:
|
||||
- 8080:8080
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
createtopic:
|
||||
condition: service_completed_successfully
|
||||
registerschema:
|
||||
condition: service_completed_successfully
|
||||
deploytransform:
|
||||
condition: service_completed_successfully
|
||||
####################
|
||||
# Redpanda Connect #
|
||||
####################
|
||||
connect:
|
||||
container_name: redpanda-connect
|
||||
image: docker.redpanda.com/redpandadata/connect
|
||||
networks:
|
||||
- redpanda_network
|
||||
entrypoint: /bin/sh
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
command: -c 'echo "$$CONNECT_CFG_FILE" > /tmp/connect.yml; /redpanda-connect -c /tmp/connect.yml'
|
||||
environment:
|
||||
# This Redpanda Connect configuration creates fake data,
|
||||
# processes it, and writes the output to a set of topics.
|
||||
#
|
||||
# Input:
|
||||
# - Uses Redpanda Connect's generate input to generate fake data.
|
||||
# See https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
|
||||
# Pipeline:
|
||||
# - Bloblang mapping to batch each input and map 1 message to 'logins'
|
||||
# topic, and a random number (1-3) of messages to 'transaction' topic
|
||||
# - Unarchive processor to parse the JSON array and extract each
|
||||
# element into its own message.
|
||||
# See https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
|
||||
# Output:
|
||||
# - kafka_franz output to write the messages to the Redpanda brokers.
|
||||
# See https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
|
||||
CONNECT_CFG_FILE: |
|
||||
input:
|
||||
generate:
|
||||
interval: 1s
|
||||
mapping: |
|
||||
let first_name = fake("first_name")
|
||||
let last_name = fake("last_name")
|
||||
|
||||
root.user_id = counter()
|
||||
root.name = $$first_name + " " + $$last_name
|
||||
root.email = ($$first_name.slice(0,1) + $$last_name + "@" + fake("domain_name")).lowercase()
|
||||
root.ip = fake("ipv4")
|
||||
root.login_time = now()
|
||||
pipeline:
|
||||
processors:
|
||||
- mapping: |
|
||||
root = range(0, random_int(min:2, max:4)).map_each(cust -> this)
|
||||
- unarchive:
|
||||
format: "json_array"
|
||||
- mapping: |
|
||||
if batch_index() == 0 {
|
||||
meta topic = "logins"
|
||||
root = this
|
||||
} else {
|
||||
meta topic = "transactions"
|
||||
root.user_id = this.user_id
|
||||
root.email = this.email
|
||||
root.index = batch_index() - 1
|
||||
root.product_url = fake("url")
|
||||
root.price = fake("amount_with_currency")
|
||||
root.timestamp = now()
|
||||
}
|
||||
output:
|
||||
kafka_franz:
|
||||
seed_brokers: [ "redpanda-0:9092" ]
|
||||
topic: $${! metadata("topic") }
|
||||
sasl:
|
||||
- mechanism: SCRAM-SHA-256
|
||||
password: secretpassword
|
||||
username: superuser
|
||||
####################
|
||||
# rpk container to create the edu-filtered-domains topic #
|
||||
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-create/
|
||||
####################
|
||||
createtopic:
|
||||
command:
|
||||
- topic
|
||||
- create
|
||||
- edu-filtered-domains
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X brokers=redpanda-0:9092
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
####################
|
||||
# rpk container to register the schema #
|
||||
# See https://docs.redpanda.com/current/manage/schema-reg/schema-reg-api/
|
||||
####################
|
||||
registerschema:
|
||||
command:
|
||||
- registry
|
||||
- schema
|
||||
- create
|
||||
- transactions
|
||||
- --schema
|
||||
- /etc/redpanda/transactions-schema.json
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X registry.hosts=redpanda-0:8081
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
# Mount the local directory that contains your schema to the container.
|
||||
volumes:
|
||||
- ./transactions-schema.json:/etc/redpanda/transactions-schema.json
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
####################
|
||||
# rpk container to deploy a consumer group #
|
||||
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-consume/
|
||||
####################
|
||||
consumergroup:
|
||||
command:
|
||||
- topic
|
||||
- consume
|
||||
- transactions
|
||||
- --group
|
||||
- transactions-consumer
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X brokers=redpanda-0:9092
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
createtopic:
|
||||
condition: service_completed_successfully
|
||||
deploytransform:
|
||||
condition: service_completed_successfully
|
||||
####################
|
||||
# rpk container to deploy the pre-built data transform #
|
||||
# See https://docs.redpanda.com/current/develop/data-transforms/deploy/
|
||||
####################
|
||||
deploytransform:
|
||||
command:
|
||||
- transform
|
||||
- deploy
|
||||
- --file=/etc/redpanda/regex.wasm
|
||||
- --name=regex
|
||||
- --input-topic=logins
|
||||
- --output-topic=edu-filtered-domains
|
||||
- --var=PATTERN="[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.edu"
|
||||
- --var=MATCH_VALUE=true
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X admin.hosts=redpanda-0:9644
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
volumes:
|
||||
- ./transform/regex.wasm:/etc/redpanda/regex.wasm
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
createtopic:
|
||||
condition: service_completed_successfully
|
||||
####################
|
||||
# MinIO for Tiered Storage #
|
||||
# See https://min.io/
|
||||
#
|
||||
# NOTE: MinIO is included in this quickstart for development and evaluation purposes only.
|
||||
# It is not supported for production deployments of Redpanda.
|
||||
#
|
||||
# For production environments, use one of the supported object storage providers:
|
||||
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/requirements/#object-storage-providers-for-tiered-storage
|
||||
####################
|
||||
minio:
|
||||
container_name: minio
|
||||
image: minio/minio:RELEASE.2025-05-24T17-08-30Z
|
||||
command: server --console-address ":9001" /data
|
||||
ports:
|
||||
- 9000:9000
|
||||
- 9001:9001
|
||||
environment:
|
||||
MINIO_ROOT_USER: minio
|
||||
MINIO_ROOT_PASSWORD: redpandaTieredStorage7
|
||||
MINIO_SERVER_URL: "http://minio:9000"
|
||||
MINIO_REGION_NAME: local
|
||||
MINIO_DOMAIN: minio
|
||||
volumes:
|
||||
- minio:/data
|
||||
networks:
|
||||
redpanda_network:
|
||||
aliases:
|
||||
- redpanda.minio
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/ready"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
mc:
|
||||
depends_on:
|
||||
minio:
|
||||
condition: service_healthy
|
||||
image: minio/mc:RELEASE.2025-05-21T01-59-54Z
|
||||
container_name: mc
|
||||
networks:
|
||||
- redpanda_network
|
||||
environment:
|
||||
- AWS_ACCESS_KEY_ID=minio
|
||||
- AWS_SECRET_ACCESS_KEY=redpandaTieredStorage7
|
||||
- AWS_REGION=local
|
||||
entrypoint: >
|
||||
/bin/sh -c "
|
||||
until (/usr/bin/mc alias set minio http://minio:9000 minio redpandaTieredStorage7) do echo '...waiting...' && sleep 1; done;
|
||||
/usr/bin/mc mb minio/redpanda;
|
||||
/usr/bin/mc policy set public minio/redpanda;
|
||||
tail -f /dev/null
|
||||
"
|
||||
@@ -0,0 +1,77 @@
|
||||
input:
|
||||
# Use the 'generate' input
|
||||
# https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
|
||||
generate:
|
||||
# The interval at which new records are generated.
|
||||
interval: 1s
|
||||
# The mapping section defines how each generated record is structured.
|
||||
# The language used here is called Bloblang.
|
||||
# https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
|
||||
mapping: |
|
||||
# Generate a fake first name using the 'first_name' faker function.
|
||||
let first_name = fake("first_name")
|
||||
|
||||
# Generate a fake last name using the 'last_name' faker function.
|
||||
let last_name = fake("last_name")
|
||||
|
||||
# Define possible subscription levels for users.
|
||||
let subscription_levels = ["Free", "Basic", "Premium"]
|
||||
|
||||
# Define possible notification channels for user preferences.
|
||||
let notifications = ["email", "sms", "push" ]
|
||||
|
||||
# Define supported languages for user preferences.
|
||||
let languages = ["en", "es", "fr", "de", "zh", "jp"]
|
||||
|
||||
# Assign a unique user ID using a UUID digit generator.
|
||||
root.user_id = fake("uuid_digit")
|
||||
|
||||
# Assign the generated first name to the 'first_name' field.
|
||||
root.first_name = $first_name
|
||||
|
||||
# Assign the generated last name to the 'last_name' field.
|
||||
root.last_name = $last_name
|
||||
|
||||
# Construct the user's email by combining the first initial, last name, and a fake domain name.
|
||||
# The email is converted to lowercase for consistency.
|
||||
root.email = ($first_name.slice(0,1) + $last_name + "@" + fake("domain_name")).lowercase()
|
||||
|
||||
# Assign a fake registration date using the 'date' faker function.
|
||||
root.registration_date = fake("date")
|
||||
|
||||
# Assign the current timestamp as the last login time.
|
||||
root.last_login = now()
|
||||
|
||||
# Randomly assign a subscription level by selecting an index from the 'subscription_levels' array.
|
||||
root.subscription_level = $subscription_levels.index(random_int(min: 0, max: 2))
|
||||
|
||||
# Randomly assign a language preference by selecting an index from the 'languages' array.
|
||||
root.preferences.language = $languages.index(random_int(min: 0, max: 5))
|
||||
|
||||
# Randomly assign a notification preference by selecting an index from the 'notifications' array.
|
||||
root.preferences.notifications = $notifications.index(random_int(min: 0, max: 2))
|
||||
pipeline:
|
||||
processors:
|
||||
- mapping: |
|
||||
# Set the target topic for the generated records to 'profiles'.
|
||||
meta topic = "profiles"
|
||||
|
||||
# Assign the entire record (root) to be sent to the specified topic.
|
||||
root = this
|
||||
output:
|
||||
# Use the 'kafka_franz' output to send the result back to Redpanda
|
||||
# https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
|
||||
kafka_franz:
|
||||
# Define the list of seed brokers for the Kafka cluster.
|
||||
seed_brokers: [ "localhost:19092", "localhost:29092", "localhost:39092"]
|
||||
# Dynamically assign the topic based on the metadata specified in the processors.
|
||||
# In this case, it resolves to the 'profiles' topic.
|
||||
topic: ${! metadata("topic") }
|
||||
# Configure SASL authentication to securely connect to the Kafka brokers.
|
||||
sasl:
|
||||
- # Specify the SASL mechanism to use for authentication.
|
||||
mechanism: SCRAM-SHA-256
|
||||
# The password for the SASL authentication.
|
||||
password: secretpassword
|
||||
# The username for the SASL authentication.
|
||||
username: superuser
|
||||
@@ -0,0 +1,56 @@
|
||||
# =================================================================
|
||||
# This file defines initial cluster properties for a Redpanda cluster.
|
||||
# Some of these settings are intended for quickstart development and evaluation
|
||||
# and are not suitable for production environments.
|
||||
#
|
||||
# For more information on bootstrap files, see:
|
||||
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#configure-a-bootstrap-file
|
||||
# =================================================================
|
||||
|
||||
#
|
||||
# Enable SASL authentication for the Kafka and Admin APIs.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#admin_api_require_auth
|
||||
admin_api_require_auth: true
|
||||
# At least one superuser is required to be able to create other SASL users
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#superusers
|
||||
superusers:
|
||||
- superuser
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_sasl
|
||||
enable_sasl: true
|
||||
# Allow topics to be created on first access.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#auto_create_topics_enabled
|
||||
auto_create_topics_enabled: true
|
||||
# Enable data transforms.
|
||||
# https://docs.redpanda.com/current/develop/data-transforms/how-transforms-work/
|
||||
data_transforms_enabled: true
|
||||
# Enable audit logging (enterprise feature).
|
||||
# https://docs.redpanda.com/current/manage/audit-logging/
|
||||
audit_enabled: true
|
||||
# Enable Tiered Storage (enterprise feature).
|
||||
# https://docs.redpanda.com/current/manage/tiered-storage/
|
||||
cloud_storage_enabled: true
|
||||
cloud_storage_region: local
|
||||
cloud_storage_access_key: minio
|
||||
cloud_storage_secret_key: redpandaTieredStorage7
|
||||
cloud_storage_api_endpoint: minio
|
||||
cloud_storage_api_endpoint_port: 9000
|
||||
cloud_storage_disable_tls: true
|
||||
cloud_storage_bucket: redpanda
|
||||
# Forces segments to be uploaded to Tiered Storage faster for the purposes of the quickstart
|
||||
# https://docs.redpanda.com/current/reference/properties/object-storage-properties/#cloud_storage_segment_max_upload_interval_sec
|
||||
cloud_storage_segment_max_upload_interval_sec: 60
|
||||
# Continuous Data Balancing (enterprise feature) continuously monitors your node and rack availability and disk usage. This enables self-healing clusters that dynamically balance partitions, ensuring smooth operations and optimal cluster performance.
|
||||
# https://docs.redpanda.com/current/manage/cluster-maintenance/continuous-data-balancing/
|
||||
partition_autobalancing_mode: continuous
|
||||
# Enable Redpanda to collect consumer group metrics.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_consumer_group_metrics
|
||||
enable_consumer_group_metrics:
|
||||
- "group"
|
||||
- "partition"
|
||||
- "consumer_lag"
|
||||
# Lower the interval for the quickstart
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#consumer_group_lag_collection_interval_sec
|
||||
consumer_group_lag_collection_interval_sec: 60
|
||||
# Enable Redpanda to collect host metrics.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_host_metrics
|
||||
enable_host_metrics: true
|
||||
@@ -0,0 +1,403 @@
|
||||
name: redpanda-quickstart-multi-broker
|
||||
networks:
|
||||
redpanda_network:
|
||||
driver: bridge
|
||||
volumes:
|
||||
redpanda-0: null
|
||||
redpanda-1: null
|
||||
redpanda-2: null
|
||||
minio: null
|
||||
services:
|
||||
##################
|
||||
# Redpanda Brokers #
|
||||
##################
|
||||
redpanda-0:
|
||||
command:
|
||||
- redpanda
|
||||
- start
|
||||
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:19092
|
||||
# Address the broker advertises to clients that connect to the Kafka API.
|
||||
# Use the internal addresses to connect to the Redpanda brokers
|
||||
# from inside the same Docker network.
|
||||
# Use the external addresses to connect to the Redpanda brokers
|
||||
# from outside the Docker network.
|
||||
- --advertise-kafka-addr internal://redpanda-0:9092,external://localhost:19092
|
||||
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:18082
|
||||
# Address the broker advertises to clients that connect to the HTTP Proxy.
|
||||
- --advertise-pandaproxy-addr internal://redpanda-0:8082,external://localhost:18082
|
||||
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:18081
|
||||
# Redpanda brokers use the RPC API to communicate with each other internally.
|
||||
- --rpc-addr redpanda-0:33145
|
||||
- --advertise-rpc-addr redpanda-0:33145
|
||||
# Mode dev-container uses well-known configuration properties for development in containers.
|
||||
- --mode dev-container
|
||||
# Tells Seastar (the framework Redpanda uses under the hood) to use 1 core on the system.
|
||||
- --smp 1
|
||||
- --default-log-level=info
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
container_name: redpanda-0
|
||||
# Sets the username and password of the bootstrap SCRAM superuser
|
||||
# See https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#bootstrap-a-user-account
|
||||
environment:
|
||||
RP_BOOTSTRAP_USER: "superuser:secretpassword"
|
||||
volumes:
|
||||
- redpanda-0:/var/lib/redpanda/data
|
||||
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
|
||||
networks:
|
||||
- redpanda_network
|
||||
ports:
|
||||
- 18081:18081
|
||||
- 18082:18082
|
||||
- 19092:19092
|
||||
- 19644:9644
|
||||
healthcheck:
|
||||
test: ["CMD", "rpk", "cluster", "info", "-X", "user=superuser", "-X", "pass=secretpassword"]
|
||||
interval: 10s
|
||||
timeout: 15s
|
||||
retries: 10
|
||||
depends_on:
|
||||
minio:
|
||||
condition: service_healthy
|
||||
redpanda-1:
|
||||
command:
|
||||
- redpanda
|
||||
- start
|
||||
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:29092
|
||||
- --advertise-kafka-addr internal://redpanda-1:9092,external://localhost:29092
|
||||
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:28082
|
||||
- --advertise-pandaproxy-addr internal://redpanda-1:8082,external://localhost:28082
|
||||
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:28081
|
||||
- --rpc-addr redpanda-1:33145
|
||||
- --advertise-rpc-addr redpanda-1:33145
|
||||
- --mode dev-container
|
||||
- --smp 1
|
||||
- --default-log-level=info
|
||||
- --seeds redpanda-0:33145
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
container_name: redpanda-1
|
||||
environment:
|
||||
RP_BOOTSTRAP_USER: "superuser:secretpassword"
|
||||
volumes:
|
||||
- redpanda-1:/var/lib/redpanda/data
|
||||
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
|
||||
networks:
|
||||
- redpanda_network
|
||||
ports:
|
||||
- 28081:28081
|
||||
- 28082:28082
|
||||
- 29092:29092
|
||||
- 29644:9644
|
||||
depends_on:
|
||||
- redpanda-0
|
||||
- minio
|
||||
redpanda-2:
|
||||
command:
|
||||
- redpanda
|
||||
- start
|
||||
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:39092
|
||||
- --advertise-kafka-addr internal://redpanda-2:9092,external://localhost:39092
|
||||
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:38082
|
||||
- --advertise-pandaproxy-addr internal://redpanda-2:8082,external://localhost:38082
|
||||
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:38081
|
||||
- --rpc-addr redpanda-2:33145
|
||||
- --advertise-rpc-addr redpanda-2:33145
|
||||
- --mode dev-container
|
||||
- --smp 1
|
||||
- --default-log-level=info
|
||||
- --seeds redpanda-0:33145
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
container_name: redpanda-2
|
||||
environment:
|
||||
RP_BOOTSTRAP_USER: "superuser:secretpassword"
|
||||
volumes:
|
||||
- redpanda-2:/var/lib/redpanda/data
|
||||
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
|
||||
networks:
|
||||
- redpanda_network
|
||||
ports:
|
||||
- 38081:38081
|
||||
- 38082:38082
|
||||
- 39092:39092
|
||||
- 39644:9644
|
||||
depends_on:
|
||||
- redpanda-0
|
||||
- minio
|
||||
####################
|
||||
# Redpanda Console #
|
||||
####################
|
||||
console:
|
||||
container_name: redpanda-console
|
||||
image: docker.redpanda.com/redpandadata/console:v3.2.2
|
||||
networks:
|
||||
- redpanda_network
|
||||
entrypoint: /bin/sh
|
||||
command: -c 'echo "$$CONSOLE_CONFIG_FILE" > /tmp/config.yml && /app/console'
|
||||
volumes:
|
||||
- ./config:/tmp/config/
|
||||
environment:
|
||||
CONFIG_FILEPATH: ${CONFIG_FILEPATH:-/tmp/config.yml}
|
||||
CONSOLE_CONFIG_FILE: |
|
||||
# Configure a connection to the Redpanda cluster
|
||||
# See https://docs.redpanda.com/current/console/config/connect-to-redpanda/
|
||||
kafka:
|
||||
brokers: ["redpanda-0:9092"]
|
||||
sasl:
|
||||
enabled: true
|
||||
impersonateUser: true
|
||||
schemaRegistry:
|
||||
enabled: true
|
||||
urls: ["http://redpanda-0:8081","http://redpanda-1:8081","http://redpanda-2:8081"]
|
||||
authentication:
|
||||
impersonateUser: true
|
||||
redpanda:
|
||||
adminApi:
|
||||
enabled: true
|
||||
urls: ["http://redpanda-0:9644","http://redpanda-1:9644","http://redpanda-2:9644"]
|
||||
authentication:
|
||||
basic:
|
||||
username: superuser
|
||||
password: secretpassword
|
||||
impersonateUser: false
|
||||
console:
|
||||
# Configures Redpanda Console to fetch topic documentation from GitHub and display it in the UI.
|
||||
# See https://docs.redpanda.com/current/console/config/topic-documentation/
|
||||
topicDocumentation:
|
||||
enabled: true
|
||||
git:
|
||||
enabled: true
|
||||
repository:
|
||||
url: https://github.com/redpanda-data/docs
|
||||
branch: main
|
||||
baseDirectory: tests/docker-compose
|
||||
authentication:
|
||||
jwtSigningKey: vazxnT+ZHtxKslK6QlDGovcYnSjTk/lKMmZ+mHrBVE+YdVDkLgSuP6AszAKe9Gvq
|
||||
basic:
|
||||
enabled: true
|
||||
authorization:
|
||||
roleBindings:
|
||||
- roleName: admin
|
||||
users:
|
||||
- loginType: basic
|
||||
name: superuser
|
||||
ports:
|
||||
- 8080:8080
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
createtopic:
|
||||
condition: service_completed_successfully
|
||||
registerschema:
|
||||
condition: service_completed_successfully
|
||||
deploytransform:
|
||||
condition: service_completed_successfully
|
||||
####################
|
||||
# Redpanda Connect #
|
||||
####################
|
||||
connect:
|
||||
container_name: redpanda-connect
|
||||
image: docker.redpanda.com/redpandadata/connect
|
||||
networks:
|
||||
- redpanda_network
|
||||
entrypoint: /bin/sh
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
command: -c 'echo "$$CONNECT_CFG_FILE" > /tmp/connect.yml; /redpanda-connect -c /tmp/connect.yml'
|
||||
environment:
|
||||
# This Redpanda Connect configuration creates fake data,
|
||||
# processes it, and writes the output to a set of topics.
|
||||
#
|
||||
# Input:
|
||||
# - Uses Redpanda Connect's generate input to generate fake data.
|
||||
# See https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
|
||||
# Pipeline:
|
||||
# - Bloblang mapping to batch each input and map 1 message to 'logins'
|
||||
# topic, and a random number (1-3) of messages to 'transaction' topic
|
||||
# - Unarchive processor to parse the JSON array and extract each
|
||||
# element into its own message.
|
||||
# See https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
|
||||
# Output:
|
||||
# - kafka_franz output to write the messages to the Redpanda brokers.
|
||||
# See https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
|
||||
CONNECT_CFG_FILE: |
|
||||
input:
|
||||
generate:
|
||||
interval: 1s
|
||||
mapping: |
|
||||
let first_name = fake("first_name")
|
||||
let last_name = fake("last_name")
|
||||
|
||||
root.user_id = counter()
|
||||
root.name = $$first_name + " " + $$last_name
|
||||
root.email = ($$first_name.slice(0,1) + $$last_name + "@" + fake("domain_name")).lowercase()
|
||||
root.ip = fake("ipv4")
|
||||
root.login_time = now()
|
||||
pipeline:
|
||||
processors:
|
||||
- mapping: |
|
||||
root = range(0, random_int(min:2, max:4)).map_each(cust -> this)
|
||||
- unarchive:
|
||||
format: "json_array"
|
||||
- mapping: |
|
||||
if batch_index() == 0 {
|
||||
meta topic = "logins"
|
||||
root = this
|
||||
} else {
|
||||
meta topic = "transactions"
|
||||
root.user_id = this.user_id
|
||||
root.email = this.email
|
||||
root.index = batch_index() - 1
|
||||
root.product_url = fake("url")
|
||||
root.price = fake("amount_with_currency")
|
||||
root.timestamp = now()
|
||||
}
|
||||
output:
|
||||
kafka_franz:
|
||||
seed_brokers: [ "redpanda-0:9092" ]
|
||||
topic: $${! metadata("topic") }
|
||||
sasl:
|
||||
- mechanism: SCRAM-SHA-256
|
||||
password: secretpassword
|
||||
username: superuser
|
||||
####################
|
||||
# rpk container to create the edu-filtered-domains topic #
|
||||
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-create/
|
||||
####################
|
||||
createtopic:
|
||||
command:
|
||||
- topic
|
||||
- create
|
||||
- edu-filtered-domains
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X brokers=redpanda-0:9092
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
####################
|
||||
# rpk container to register the schema #
|
||||
# See https://docs.redpanda.com/current/manage/schema-reg/schema-reg-api/
|
||||
####################
|
||||
registerschema:
|
||||
command:
|
||||
- registry
|
||||
- schema
|
||||
- create
|
||||
- transactions
|
||||
- --schema
|
||||
- /etc/redpanda/transactions-schema.json
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X registry.hosts=redpanda-0:8081
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
# Mount the local directory that contains your schema to the container.
|
||||
volumes:
|
||||
- ./transactions-schema.json:/etc/redpanda/transactions-schema.json
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
####################
|
||||
# rpk container to deploy a consumer group #
|
||||
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-consume/
|
||||
####################
|
||||
consumergroup:
|
||||
command:
|
||||
- topic
|
||||
- consume
|
||||
- transactions
|
||||
- --group
|
||||
- transactions-consumer
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X brokers=redpanda-0:9092
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
createtopic:
|
||||
condition: service_completed_successfully
|
||||
deploytransform:
|
||||
condition: service_completed_successfully
|
||||
####################
|
||||
# rpk container to deploy the pre-built data transform #
|
||||
# See https://docs.redpanda.com/current/develop/data-transforms/deploy/
|
||||
####################
|
||||
deploytransform:
|
||||
command:
|
||||
- transform
|
||||
- deploy
|
||||
- --file=/etc/redpanda/regex.wasm
|
||||
- --name=regex
|
||||
- --input-topic=logins
|
||||
- --output-topic=edu-filtered-domains
|
||||
- --var=PATTERN="[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.edu"
|
||||
- --var=MATCH_VALUE=true
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X admin.hosts=redpanda-0:9644
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
volumes:
|
||||
- ./transform/regex.wasm:/etc/redpanda/regex.wasm
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
createtopic:
|
||||
condition: service_completed_successfully
|
||||
####################
|
||||
# MinIO for Tiered Storage #
|
||||
# See https://min.io/
|
||||
#
|
||||
# NOTE: MinIO is included in this quickstart for development and evaluation purposes only.
|
||||
# It is not supported for production deployments of Redpanda.
|
||||
#
|
||||
# For production environments, use one of the supported object storage providers:
|
||||
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/requirements/#object-storage-providers-for-tiered-storage
|
||||
####################
|
||||
minio:
|
||||
container_name: minio
|
||||
image: minio/minio:RELEASE.2025-05-24T17-08-30Z
|
||||
command: server --console-address ":9001" /data
|
||||
ports:
|
||||
- 9000:9000
|
||||
- 9001:9001
|
||||
environment:
|
||||
MINIO_ROOT_USER: minio
|
||||
MINIO_ROOT_PASSWORD: redpandaTieredStorage7
|
||||
MINIO_SERVER_URL: "http://minio:9000"
|
||||
MINIO_REGION_NAME: local
|
||||
MINIO_DOMAIN: minio
|
||||
volumes:
|
||||
- minio:/data
|
||||
networks:
|
||||
redpanda_network:
|
||||
aliases:
|
||||
- redpanda.minio
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/ready"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
mc:
|
||||
depends_on:
|
||||
minio:
|
||||
condition: service_healthy
|
||||
image: minio/mc:RELEASE.2025-05-21T01-59-54Z
|
||||
container_name: mc
|
||||
networks:
|
||||
- redpanda_network
|
||||
environment:
|
||||
- AWS_ACCESS_KEY_ID=minio
|
||||
- AWS_SECRET_ACCESS_KEY=redpandaTieredStorage7
|
||||
- AWS_REGION=local
|
||||
entrypoint: >
|
||||
/bin/sh -c "
|
||||
until (/usr/bin/mc alias set minio http://minio:9000 minio redpandaTieredStorage7) do echo '...waiting...' && sleep 1; done;
|
||||
/usr/bin/mc mb minio/redpanda;
|
||||
/usr/bin/mc policy set public minio/redpanda;
|
||||
tail -f /dev/null
|
||||
"
|
||||
@@ -0,0 +1,77 @@
|
||||
input:
|
||||
# Use the 'generate' input
|
||||
# https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
|
||||
generate:
|
||||
# The interval at which new records are generated.
|
||||
interval: 1s
|
||||
# The mapping section defines how each generated record is structured.
|
||||
# The language used here is called Bloblang.
|
||||
# https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
|
||||
mapping: |
|
||||
# Generate a fake first name using the 'first_name' faker function.
|
||||
let first_name = fake("first_name")
|
||||
|
||||
# Generate a fake last name using the 'last_name' faker function.
|
||||
let last_name = fake("last_name")
|
||||
|
||||
# Define possible subscription levels for users.
|
||||
let subscription_levels = ["Free", "Basic", "Premium"]
|
||||
|
||||
# Define possible notification channels for user preferences.
|
||||
let notifications = ["email", "sms", "push" ]
|
||||
|
||||
# Define supported languages for user preferences.
|
||||
let languages = ["en", "es", "fr", "de", "zh", "jp"]
|
||||
|
||||
# Assign a unique user ID using a UUID digit generator.
|
||||
root.user_id = fake("uuid_digit")
|
||||
|
||||
# Assign the generated first name to the 'first_name' field.
|
||||
root.first_name = $first_name
|
||||
|
||||
# Assign the generated last name to the 'last_name' field.
|
||||
root.last_name = $last_name
|
||||
|
||||
# Construct the user's email by combining the first initial, last name, and a fake domain name.
|
||||
# The email is converted to lowercase for consistency.
|
||||
root.email = ($first_name.slice(0,1) + $last_name + "@" + fake("domain_name")).lowercase()
|
||||
|
||||
# Assign a fake registration date using the 'date' faker function.
|
||||
root.registration_date = fake("date")
|
||||
|
||||
# Assign the current timestamp as the last login time.
|
||||
root.last_login = now()
|
||||
|
||||
# Randomly assign a subscription level by selecting an index from the 'subscription_levels' array.
|
||||
root.subscription_level = $subscription_levels.index(random_int(min: 0, max: 2))
|
||||
|
||||
# Randomly assign a language preference by selecting an index from the 'languages' array.
|
||||
root.preferences.language = $languages.index(random_int(min: 0, max: 5))
|
||||
|
||||
# Randomly assign a notification preference by selecting an index from the 'notifications' array.
|
||||
root.preferences.notifications = $notifications.index(random_int(min: 0, max: 2))
|
||||
pipeline:
|
||||
processors:
|
||||
- mapping: |
|
||||
# Set the target topic for the generated records to 'profiles'.
|
||||
meta topic = "profiles"
|
||||
|
||||
# Assign the entire record (root) to be sent to the specified topic.
|
||||
root = this
|
||||
output:
|
||||
# Use the 'kafka_franz' output to send the result back to Redpanda
|
||||
# https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
|
||||
kafka_franz:
|
||||
# Define the list of seed brokers for the Kafka cluster.
|
||||
seed_brokers: [ "localhost:19092", "localhost:29092", "localhost:39092"]
|
||||
# Dynamically assign the topic based on the metadata specified in the processors.
|
||||
# In this case, it resolves to the 'profiles' topic.
|
||||
topic: ${! metadata("topic") }
|
||||
# Configure SASL authentication to securely connect to the Kafka brokers.
|
||||
sasl:
|
||||
- # Specify the SASL mechanism to use for authentication.
|
||||
mechanism: SCRAM-SHA-256
|
||||
# The password for the SASL authentication.
|
||||
password: secretpassword
|
||||
# The username for the SASL authentication.
|
||||
username: superuser
|
||||
@@ -0,0 +1,56 @@
|
||||
# =================================================================
|
||||
# This file defines initial cluster properties for a Redpanda cluster.
|
||||
# Some of these settings are intended for quickstart development and evaluation
|
||||
# and are not suitable for production environments.
|
||||
#
|
||||
# For more information on bootstrap files, see:
|
||||
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#configure-a-bootstrap-file
|
||||
# =================================================================
|
||||
|
||||
#
|
||||
# Enable SASL authentication for the Kafka and Admin APIs.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#admin_api_require_auth
|
||||
admin_api_require_auth: true
|
||||
# At least one superuser is required to be able to create other SASL users
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#superusers
|
||||
superusers:
|
||||
- superuser
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_sasl
|
||||
enable_sasl: true
|
||||
# Allow topics to be created on first access.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#auto_create_topics_enabled
|
||||
auto_create_topics_enabled: true
|
||||
# Enable data transforms.
|
||||
# https://docs.redpanda.com/current/develop/data-transforms/how-transforms-work/
|
||||
data_transforms_enabled: true
|
||||
# Enable audit logging (enterprise feature).
|
||||
# https://docs.redpanda.com/current/manage/audit-logging/
|
||||
audit_enabled: true
|
||||
# Enable Tiered Storage (enterprise feature).
|
||||
# https://docs.redpanda.com/current/manage/tiered-storage/
|
||||
cloud_storage_enabled: true
|
||||
cloud_storage_region: local
|
||||
cloud_storage_access_key: minio
|
||||
cloud_storage_secret_key: redpandaTieredStorage7
|
||||
cloud_storage_api_endpoint: minio
|
||||
cloud_storage_api_endpoint_port: 9000
|
||||
cloud_storage_disable_tls: true
|
||||
cloud_storage_bucket: redpanda
|
||||
# Forces segments to be uploaded to Tiered Storage faster for the purposes of the quickstart
|
||||
# https://docs.redpanda.com/current/reference/properties/object-storage-properties/#cloud_storage_segment_max_upload_interval_sec
|
||||
cloud_storage_segment_max_upload_interval_sec: 60
|
||||
# Continuous Data Balancing (enterprise feature) continuously monitors your node and rack availability and disk usage. This enables self-healing clusters that dynamically balance partitions, ensuring smooth operations and optimal cluster performance.
|
||||
# https://docs.redpanda.com/current/manage/cluster-maintenance/continuous-data-balancing/
|
||||
partition_autobalancing_mode: continuous
|
||||
# Enable Redpanda to collect consumer group metrics.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_consumer_group_metrics
|
||||
enable_consumer_group_metrics:
|
||||
- "group"
|
||||
- "partition"
|
||||
- "consumer_lag"
|
||||
# Lower the interval for the quickstart
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#consumer_group_lag_collection_interval_sec
|
||||
consumer_group_lag_collection_interval_sec: 60
|
||||
# Enable Redpanda to collect host metrics.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_host_metrics
|
||||
enable_host_metrics: true
|
||||
@@ -0,0 +1,403 @@
|
||||
name: redpanda-quickstart-multi-broker
|
||||
networks:
|
||||
redpanda_network:
|
||||
driver: bridge
|
||||
volumes:
|
||||
redpanda-0: null
|
||||
redpanda-1: null
|
||||
redpanda-2: null
|
||||
minio: null
|
||||
services:
|
||||
##################
|
||||
# Redpanda Brokers #
|
||||
##################
|
||||
redpanda-0:
|
||||
command:
|
||||
- redpanda
|
||||
- start
|
||||
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:19092
|
||||
# Address the broker advertises to clients that connect to the Kafka API.
|
||||
# Use the internal addresses to connect to the Redpanda brokers
|
||||
# from inside the same Docker network.
|
||||
# Use the external addresses to connect to the Redpanda brokers
|
||||
# from outside the Docker network.
|
||||
- --advertise-kafka-addr internal://redpanda-0:9092,external://localhost:19092
|
||||
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:18082
|
||||
# Address the broker advertises to clients that connect to the HTTP Proxy.
|
||||
- --advertise-pandaproxy-addr internal://redpanda-0:8082,external://localhost:18082
|
||||
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:18081
|
||||
# Redpanda brokers use the RPC API to communicate with each other internally.
|
||||
- --rpc-addr redpanda-0:33145
|
||||
- --advertise-rpc-addr redpanda-0:33145
|
||||
# Mode dev-container uses well-known configuration properties for development in containers.
|
||||
- --mode dev-container
|
||||
# Tells Seastar (the framework Redpanda uses under the hood) to use 1 core on the system.
|
||||
- --smp 1
|
||||
- --default-log-level=info
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
container_name: redpanda-0
|
||||
# Sets the username and password of the bootstrap SCRAM superuser
|
||||
# See https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#bootstrap-a-user-account
|
||||
environment:
|
||||
RP_BOOTSTRAP_USER: "superuser:secretpassword"
|
||||
volumes:
|
||||
- redpanda-0:/var/lib/redpanda/data
|
||||
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
|
||||
networks:
|
||||
- redpanda_network
|
||||
ports:
|
||||
- 18081:18081
|
||||
- 18082:18082
|
||||
- 19092:19092
|
||||
- 19644:9644
|
||||
healthcheck:
|
||||
test: ["CMD", "rpk", "cluster", "info", "-X", "user=superuser", "-X", "pass=secretpassword"]
|
||||
interval: 10s
|
||||
timeout: 15s
|
||||
retries: 10
|
||||
depends_on:
|
||||
minio:
|
||||
condition: service_healthy
|
||||
redpanda-1:
|
||||
command:
|
||||
- redpanda
|
||||
- start
|
||||
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:29092
|
||||
- --advertise-kafka-addr internal://redpanda-1:9092,external://localhost:29092
|
||||
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:28082
|
||||
- --advertise-pandaproxy-addr internal://redpanda-1:8082,external://localhost:28082
|
||||
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:28081
|
||||
- --rpc-addr redpanda-1:33145
|
||||
- --advertise-rpc-addr redpanda-1:33145
|
||||
- --mode dev-container
|
||||
- --smp 1
|
||||
- --default-log-level=info
|
||||
- --seeds redpanda-0:33145
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
container_name: redpanda-1
|
||||
environment:
|
||||
RP_BOOTSTRAP_USER: "superuser:secretpassword"
|
||||
volumes:
|
||||
- redpanda-1:/var/lib/redpanda/data
|
||||
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
|
||||
networks:
|
||||
- redpanda_network
|
||||
ports:
|
||||
- 28081:28081
|
||||
- 28082:28082
|
||||
- 29092:29092
|
||||
- 29644:9644
|
||||
depends_on:
|
||||
- redpanda-0
|
||||
- minio
|
||||
redpanda-2:
|
||||
command:
|
||||
- redpanda
|
||||
- start
|
||||
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:39092
|
||||
- --advertise-kafka-addr internal://redpanda-2:9092,external://localhost:39092
|
||||
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:38082
|
||||
- --advertise-pandaproxy-addr internal://redpanda-2:8082,external://localhost:38082
|
||||
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:38081
|
||||
- --rpc-addr redpanda-2:33145
|
||||
- --advertise-rpc-addr redpanda-2:33145
|
||||
- --mode dev-container
|
||||
- --smp 1
|
||||
- --default-log-level=info
|
||||
- --seeds redpanda-0:33145
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
container_name: redpanda-2
|
||||
environment:
|
||||
RP_BOOTSTRAP_USER: "superuser:secretpassword"
|
||||
volumes:
|
||||
- redpanda-2:/var/lib/redpanda/data
|
||||
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
|
||||
networks:
|
||||
- redpanda_network
|
||||
ports:
|
||||
- 38081:38081
|
||||
- 38082:38082
|
||||
- 39092:39092
|
||||
- 39644:9644
|
||||
depends_on:
|
||||
- redpanda-0
|
||||
- minio
|
||||
####################
|
||||
# Redpanda Console #
|
||||
####################
|
||||
console:
|
||||
container_name: redpanda-console
|
||||
image: docker.redpanda.com/redpandadata/console:v3.2.2
|
||||
networks:
|
||||
- redpanda_network
|
||||
entrypoint: /bin/sh
|
||||
command: -c 'echo "$$CONSOLE_CONFIG_FILE" > /tmp/config.yml && /app/console'
|
||||
volumes:
|
||||
- ./config:/tmp/config/
|
||||
environment:
|
||||
CONFIG_FILEPATH: ${CONFIG_FILEPATH:-/tmp/config.yml}
|
||||
CONSOLE_CONFIG_FILE: |
|
||||
# Configure a connection to the Redpanda cluster
|
||||
# See https://docs.redpanda.com/current/console/config/connect-to-redpanda/
|
||||
kafka:
|
||||
brokers: ["redpanda-0:9092"]
|
||||
sasl:
|
||||
enabled: true
|
||||
impersonateUser: true
|
||||
schemaRegistry:
|
||||
enabled: true
|
||||
urls: ["http://redpanda-0:8081","http://redpanda-1:8081","http://redpanda-2:8081"]
|
||||
authentication:
|
||||
impersonateUser: true
|
||||
redpanda:
|
||||
adminApi:
|
||||
enabled: true
|
||||
urls: ["http://redpanda-0:9644","http://redpanda-1:9644","http://redpanda-2:9644"]
|
||||
authentication:
|
||||
basic:
|
||||
username: superuser
|
||||
password: secretpassword
|
||||
impersonateUser: false
|
||||
console:
|
||||
# Configures Redpanda Console to fetch topic documentation from GitHub and display it in the UI.
|
||||
# See https://docs.redpanda.com/current/console/config/topic-documentation/
|
||||
topicDocumentation:
|
||||
enabled: true
|
||||
git:
|
||||
enabled: true
|
||||
repository:
|
||||
url: https://github.com/redpanda-data/docs
|
||||
branch: main
|
||||
baseDirectory: tests/docker-compose
|
||||
authentication:
|
||||
jwtSigningKey: vazxnT+ZHtxKslK6QlDGovcYnSjTk/lKMmZ+mHrBVE+YdVDkLgSuP6AszAKe9Gvq
|
||||
basic:
|
||||
enabled: true
|
||||
authorization:
|
||||
roleBindings:
|
||||
- roleName: admin
|
||||
users:
|
||||
- loginType: basic
|
||||
name: superuser
|
||||
ports:
|
||||
- 8080:8080
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
createtopic:
|
||||
condition: service_completed_successfully
|
||||
registerschema:
|
||||
condition: service_completed_successfully
|
||||
deploytransform:
|
||||
condition: service_completed_successfully
|
||||
####################
|
||||
# Redpanda Connect #
|
||||
####################
|
||||
connect:
|
||||
container_name: redpanda-connect
|
||||
image: docker.redpanda.com/redpandadata/connect
|
||||
networks:
|
||||
- redpanda_network
|
||||
entrypoint: /bin/sh
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
command: -c 'echo "$$CONNECT_CFG_FILE" > /tmp/connect.yml; /redpanda-connect -c /tmp/connect.yml'
|
||||
environment:
|
||||
# This Redpanda Connect configuration creates fake data,
|
||||
# processes it, and writes the output to a set of topics.
|
||||
#
|
||||
# Input:
|
||||
# - Uses Redpanda Connect's generate input to generate fake data.
|
||||
# See https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
|
||||
# Pipeline:
|
||||
# - Bloblang mapping to batch each input and map 1 message to 'logins'
|
||||
# topic, and a random number (1-3) of messages to 'transaction' topic
|
||||
# - Unarchive processor to parse the JSON array and extract each
|
||||
# element into its own message.
|
||||
# See https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
|
||||
# Output:
|
||||
# - kafka_franz output to write the messages to the Redpanda brokers.
|
||||
# See https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
|
||||
CONNECT_CFG_FILE: |
|
||||
input:
|
||||
generate:
|
||||
interval: 1s
|
||||
mapping: |
|
||||
let first_name = fake("first_name")
|
||||
let last_name = fake("last_name")
|
||||
|
||||
root.user_id = counter()
|
||||
root.name = $$first_name + " " + $$last_name
|
||||
root.email = ($$first_name.slice(0,1) + $$last_name + "@" + fake("domain_name")).lowercase()
|
||||
root.ip = fake("ipv4")
|
||||
root.login_time = now()
|
||||
pipeline:
|
||||
processors:
|
||||
- mapping: |
|
||||
root = range(0, random_int(min:2, max:4)).map_each(cust -> this)
|
||||
- unarchive:
|
||||
format: "json_array"
|
||||
- mapping: |
|
||||
if batch_index() == 0 {
|
||||
meta topic = "logins"
|
||||
root = this
|
||||
} else {
|
||||
meta topic = "transactions"
|
||||
root.user_id = this.user_id
|
||||
root.email = this.email
|
||||
root.index = batch_index() - 1
|
||||
root.product_url = fake("url")
|
||||
root.price = fake("amount_with_currency")
|
||||
root.timestamp = now()
|
||||
}
|
||||
output:
|
||||
kafka_franz:
|
||||
seed_brokers: [ "redpanda-0:9092" ]
|
||||
topic: $${! metadata("topic") }
|
||||
sasl:
|
||||
- mechanism: SCRAM-SHA-256
|
||||
password: secretpassword
|
||||
username: superuser
|
||||
####################
|
||||
# rpk container to create the edu-filtered-domains topic #
|
||||
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-create/
|
||||
####################
|
||||
createtopic:
|
||||
command:
|
||||
- topic
|
||||
- create
|
||||
- edu-filtered-domains
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X brokers=redpanda-0:9092
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
####################
|
||||
# rpk container to register the schema #
|
||||
# See https://docs.redpanda.com/current/manage/schema-reg/schema-reg-api/
|
||||
####################
|
||||
registerschema:
|
||||
command:
|
||||
- registry
|
||||
- schema
|
||||
- create
|
||||
- transactions
|
||||
- --schema
|
||||
- /etc/redpanda/transactions-schema.json
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X registry.hosts=redpanda-0:8081
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
# Mount the local directory that contains your schema to the container.
|
||||
volumes:
|
||||
- ./transactions-schema.json:/etc/redpanda/transactions-schema.json
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
####################
|
||||
# rpk container to deploy a consumer group #
|
||||
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-consume/
|
||||
####################
|
||||
consumergroup:
|
||||
command:
|
||||
- topic
|
||||
- consume
|
||||
- transactions
|
||||
- --group
|
||||
- transactions-consumer
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X brokers=redpanda-0:9092
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
createtopic:
|
||||
condition: service_completed_successfully
|
||||
deploytransform:
|
||||
condition: service_completed_successfully
|
||||
####################
|
||||
# rpk container to deploy the pre-built data transform #
|
||||
# See https://docs.redpanda.com/current/develop/data-transforms/deploy/
|
||||
####################
|
||||
deploytransform:
|
||||
command:
|
||||
- transform
|
||||
- deploy
|
||||
- --file=/etc/redpanda/regex.wasm
|
||||
- --name=regex
|
||||
- --input-topic=logins
|
||||
- --output-topic=edu-filtered-domains
|
||||
- --var=PATTERN="[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.edu"
|
||||
- --var=MATCH_VALUE=true
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X admin.hosts=redpanda-0:9644
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
volumes:
|
||||
- ./transform/regex.wasm:/etc/redpanda/regex.wasm
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
createtopic:
|
||||
condition: service_completed_successfully
|
||||
####################
|
||||
# MinIO for Tiered Storage #
|
||||
# See https://min.io/
|
||||
#
|
||||
# NOTE: MinIO is included in this quickstart for development and evaluation purposes only.
|
||||
# It is not supported for production deployments of Redpanda.
|
||||
#
|
||||
# For production environments, use one of the supported object storage providers:
|
||||
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/requirements/#object-storage-providers-for-tiered-storage
|
||||
####################
|
||||
minio:
|
||||
container_name: minio
|
||||
image: minio/minio:RELEASE.2025-05-24T17-08-30Z
|
||||
command: server --console-address ":9001" /data
|
||||
ports:
|
||||
- 9000:9000
|
||||
- 9001:9001
|
||||
environment:
|
||||
MINIO_ROOT_USER: minio
|
||||
MINIO_ROOT_PASSWORD: redpandaTieredStorage7
|
||||
MINIO_SERVER_URL: "http://minio:9000"
|
||||
MINIO_REGION_NAME: local
|
||||
MINIO_DOMAIN: minio
|
||||
volumes:
|
||||
- minio:/data
|
||||
networks:
|
||||
redpanda_network:
|
||||
aliases:
|
||||
- redpanda.minio
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/ready"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
mc:
|
||||
depends_on:
|
||||
minio:
|
||||
condition: service_healthy
|
||||
image: minio/mc:RELEASE.2025-05-21T01-59-54Z
|
||||
container_name: mc
|
||||
networks:
|
||||
- redpanda_network
|
||||
environment:
|
||||
- AWS_ACCESS_KEY_ID=minio
|
||||
- AWS_SECRET_ACCESS_KEY=redpandaTieredStorage7
|
||||
- AWS_REGION=local
|
||||
entrypoint: >
|
||||
/bin/sh -c "
|
||||
until (/usr/bin/mc alias set minio http://minio:9000 minio redpandaTieredStorage7) do echo '...waiting...' && sleep 1; done;
|
||||
/usr/bin/mc mb minio/redpanda;
|
||||
/usr/bin/mc policy set public minio/redpanda;
|
||||
tail -f /dev/null
|
||||
"
|
||||
@@ -0,0 +1,77 @@
|
||||
input:
|
||||
# Use the 'generate' input
|
||||
# https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
|
||||
generate:
|
||||
# The interval at which new records are generated.
|
||||
interval: 1s
|
||||
# The mapping section defines how each generated record is structured.
|
||||
# The language used here is called Bloblang.
|
||||
# https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
|
||||
mapping: |
|
||||
# Generate a fake first name using the 'first_name' faker function.
|
||||
let first_name = fake("first_name")
|
||||
|
||||
# Generate a fake last name using the 'last_name' faker function.
|
||||
let last_name = fake("last_name")
|
||||
|
||||
# Define possible subscription levels for users.
|
||||
let subscription_levels = ["Free", "Basic", "Premium"]
|
||||
|
||||
# Define possible notification channels for user preferences.
|
||||
let notifications = ["email", "sms", "push" ]
|
||||
|
||||
# Define supported languages for user preferences.
|
||||
let languages = ["en", "es", "fr", "de", "zh", "jp"]
|
||||
|
||||
# Assign a unique user ID using a UUID digit generator.
|
||||
root.user_id = fake("uuid_digit")
|
||||
|
||||
# Assign the generated first name to the 'first_name' field.
|
||||
root.first_name = $first_name
|
||||
|
||||
# Assign the generated last name to the 'last_name' field.
|
||||
root.last_name = $last_name
|
||||
|
||||
# Construct the user's email by combining the first initial, last name, and a fake domain name.
|
||||
# The email is converted to lowercase for consistency.
|
||||
root.email = ($first_name.slice(0,1) + $last_name + "@" + fake("domain_name")).lowercase()
|
||||
|
||||
# Assign a fake registration date using the 'date' faker function.
|
||||
root.registration_date = fake("date")
|
||||
|
||||
# Assign the current timestamp as the last login time.
|
||||
root.last_login = now()
|
||||
|
||||
# Randomly assign a subscription level by selecting an index from the 'subscription_levels' array.
|
||||
root.subscription_level = $subscription_levels.index(random_int(min: 0, max: 2))
|
||||
|
||||
# Randomly assign a language preference by selecting an index from the 'languages' array.
|
||||
root.preferences.language = $languages.index(random_int(min: 0, max: 5))
|
||||
|
||||
# Randomly assign a notification preference by selecting an index from the 'notifications' array.
|
||||
root.preferences.notifications = $notifications.index(random_int(min: 0, max: 2))
|
||||
pipeline:
|
||||
processors:
|
||||
- mapping: |
|
||||
# Set the target topic for the generated records to 'profiles'.
|
||||
meta topic = "profiles"
|
||||
|
||||
# Assign the entire record (root) to be sent to the specified topic.
|
||||
root = this
|
||||
output:
|
||||
# Use the 'kafka_franz' output to send the result back to Redpanda
|
||||
# https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
|
||||
kafka_franz:
|
||||
# Define the list of seed brokers for the Kafka cluster.
|
||||
seed_brokers: [ "localhost:19092", "localhost:29092", "localhost:39092"]
|
||||
# Dynamically assign the topic based on the metadata specified in the processors.
|
||||
# In this case, it resolves to the 'profiles' topic.
|
||||
topic: ${! metadata("topic") }
|
||||
# Configure SASL authentication to securely connect to the Kafka brokers.
|
||||
sasl:
|
||||
- # Specify the SASL mechanism to use for authentication.
|
||||
mechanism: SCRAM-SHA-256
|
||||
# The password for the SASL authentication.
|
||||
password: secretpassword
|
||||
# The username for the SASL authentication.
|
||||
username: superuser
|
||||
@@ -0,0 +1,56 @@
|
||||
# =================================================================
|
||||
# This file defines initial cluster properties for a Redpanda cluster.
|
||||
# Some of these settings are intended for quickstart development and evaluation
|
||||
# and are not suitable for production environments.
|
||||
#
|
||||
# For more information on bootstrap files, see:
|
||||
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#configure-a-bootstrap-file
|
||||
# =================================================================
|
||||
|
||||
#
|
||||
# Enable SASL authentication for the Kafka and Admin APIs.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#admin_api_require_auth
|
||||
admin_api_require_auth: true
|
||||
# At least one superuser is required to be able to create other SASL users
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#superusers
|
||||
superusers:
|
||||
- superuser
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_sasl
|
||||
enable_sasl: true
|
||||
# Allow topics to be created on first access.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#auto_create_topics_enabled
|
||||
auto_create_topics_enabled: true
|
||||
# Enable data transforms.
|
||||
# https://docs.redpanda.com/current/develop/data-transforms/how-transforms-work/
|
||||
data_transforms_enabled: true
|
||||
# Enable audit logging (enterprise feature).
|
||||
# https://docs.redpanda.com/current/manage/audit-logging/
|
||||
audit_enabled: true
|
||||
# Enable Tiered Storage (enterprise feature).
|
||||
# https://docs.redpanda.com/current/manage/tiered-storage/
|
||||
cloud_storage_enabled: true
|
||||
cloud_storage_region: local
|
||||
cloud_storage_access_key: minio
|
||||
cloud_storage_secret_key: redpandaTieredStorage7
|
||||
cloud_storage_api_endpoint: minio
|
||||
cloud_storage_api_endpoint_port: 9000
|
||||
cloud_storage_disable_tls: true
|
||||
cloud_storage_bucket: redpanda
|
||||
# Forces segments to be uploaded to Tiered Storage faster for the purposes of the quickstart
|
||||
# https://docs.redpanda.com/current/reference/properties/object-storage-properties/#cloud_storage_segment_max_upload_interval_sec
|
||||
cloud_storage_segment_max_upload_interval_sec: 60
|
||||
# Continuous Data Balancing (enterprise feature) continuously monitors your node and rack availability and disk usage. This enables self-healing clusters that dynamically balance partitions, ensuring smooth operations and optimal cluster performance.
|
||||
# https://docs.redpanda.com/current/manage/cluster-maintenance/continuous-data-balancing/
|
||||
partition_autobalancing_mode: continuous
|
||||
# Enable Redpanda to collect consumer group metrics.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_consumer_group_metrics
|
||||
enable_consumer_group_metrics:
|
||||
- "group"
|
||||
- "partition"
|
||||
- "consumer_lag"
|
||||
# Lower the interval for the quickstart
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#consumer_group_lag_collection_interval_sec
|
||||
consumer_group_lag_collection_interval_sec: 60
|
||||
# Enable Redpanda to collect host metrics.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_host_metrics
|
||||
enable_host_metrics: true
|
||||
@@ -0,0 +1,403 @@
|
||||
name: redpanda-quickstart-multi-broker
|
||||
networks:
|
||||
redpanda_network:
|
||||
driver: bridge
|
||||
volumes:
|
||||
redpanda-0: null
|
||||
redpanda-1: null
|
||||
redpanda-2: null
|
||||
minio: null
|
||||
services:
|
||||
##################
|
||||
# Redpanda Brokers #
|
||||
##################
|
||||
redpanda-0:
|
||||
command:
|
||||
- redpanda
|
||||
- start
|
||||
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:19092
|
||||
# Address the broker advertises to clients that connect to the Kafka API.
|
||||
# Use the internal addresses to connect to the Redpanda brokers
|
||||
# from inside the same Docker network.
|
||||
# Use the external addresses to connect to the Redpanda brokers
|
||||
# from outside the Docker network.
|
||||
- --advertise-kafka-addr internal://redpanda-0:9092,external://localhost:19092
|
||||
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:18082
|
||||
# Address the broker advertises to clients that connect to the HTTP Proxy.
|
||||
- --advertise-pandaproxy-addr internal://redpanda-0:8082,external://localhost:18082
|
||||
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:18081
|
||||
# Redpanda brokers use the RPC API to communicate with each other internally.
|
||||
- --rpc-addr redpanda-0:33145
|
||||
- --advertise-rpc-addr redpanda-0:33145
|
||||
# Mode dev-container uses well-known configuration properties for development in containers.
|
||||
- --mode dev-container
|
||||
# Tells Seastar (the framework Redpanda uses under the hood) to use 1 core on the system.
|
||||
- --smp 1
|
||||
- --default-log-level=info
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
container_name: redpanda-0
|
||||
# Sets the username and password of the bootstrap SCRAM superuser
|
||||
# See https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#bootstrap-a-user-account
|
||||
environment:
|
||||
RP_BOOTSTRAP_USER: "superuser:secretpassword"
|
||||
volumes:
|
||||
- redpanda-0:/var/lib/redpanda/data
|
||||
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
|
||||
networks:
|
||||
- redpanda_network
|
||||
ports:
|
||||
- 18081:18081
|
||||
- 18082:18082
|
||||
- 19092:19092
|
||||
- 19644:9644
|
||||
healthcheck:
|
||||
test: ["CMD", "rpk", "cluster", "info", "-X", "user=superuser", "-X", "pass=secretpassword"]
|
||||
interval: 10s
|
||||
timeout: 15s
|
||||
retries: 10
|
||||
depends_on:
|
||||
minio:
|
||||
condition: service_healthy
|
||||
redpanda-1:
|
||||
command:
|
||||
- redpanda
|
||||
- start
|
||||
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:29092
|
||||
- --advertise-kafka-addr internal://redpanda-1:9092,external://localhost:29092
|
||||
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:28082
|
||||
- --advertise-pandaproxy-addr internal://redpanda-1:8082,external://localhost:28082
|
||||
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:28081
|
||||
- --rpc-addr redpanda-1:33145
|
||||
- --advertise-rpc-addr redpanda-1:33145
|
||||
- --mode dev-container
|
||||
- --smp 1
|
||||
- --default-log-level=info
|
||||
- --seeds redpanda-0:33145
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
container_name: redpanda-1
|
||||
environment:
|
||||
RP_BOOTSTRAP_USER: "superuser:secretpassword"
|
||||
volumes:
|
||||
- redpanda-1:/var/lib/redpanda/data
|
||||
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
|
||||
networks:
|
||||
- redpanda_network
|
||||
ports:
|
||||
- 28081:28081
|
||||
- 28082:28082
|
||||
- 29092:29092
|
||||
- 29644:9644
|
||||
depends_on:
|
||||
- redpanda-0
|
||||
- minio
|
||||
redpanda-2:
|
||||
command:
|
||||
- redpanda
|
||||
- start
|
||||
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:39092
|
||||
- --advertise-kafka-addr internal://redpanda-2:9092,external://localhost:39092
|
||||
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:38082
|
||||
- --advertise-pandaproxy-addr internal://redpanda-2:8082,external://localhost:38082
|
||||
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:38081
|
||||
- --rpc-addr redpanda-2:33145
|
||||
- --advertise-rpc-addr redpanda-2:33145
|
||||
- --mode dev-container
|
||||
- --smp 1
|
||||
- --default-log-level=info
|
||||
- --seeds redpanda-0:33145
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
container_name: redpanda-2
|
||||
environment:
|
||||
RP_BOOTSTRAP_USER: "superuser:secretpassword"
|
||||
volumes:
|
||||
- redpanda-2:/var/lib/redpanda/data
|
||||
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
|
||||
networks:
|
||||
- redpanda_network
|
||||
ports:
|
||||
- 38081:38081
|
||||
- 38082:38082
|
||||
- 39092:39092
|
||||
- 39644:9644
|
||||
depends_on:
|
||||
- redpanda-0
|
||||
- minio
|
||||
####################
|
||||
# Redpanda Console #
|
||||
####################
|
||||
console:
|
||||
container_name: redpanda-console
|
||||
image: docker.redpanda.com/redpandadata/console:v3.2.2
|
||||
networks:
|
||||
- redpanda_network
|
||||
entrypoint: /bin/sh
|
||||
command: -c 'echo "$$CONSOLE_CONFIG_FILE" > /tmp/config.yml && /app/console'
|
||||
volumes:
|
||||
- ./config:/tmp/config/
|
||||
environment:
|
||||
CONFIG_FILEPATH: ${CONFIG_FILEPATH:-/tmp/config.yml}
|
||||
CONSOLE_CONFIG_FILE: |
|
||||
# Configure a connection to the Redpanda cluster
|
||||
# See https://docs.redpanda.com/current/console/config/connect-to-redpanda/
|
||||
kafka:
|
||||
brokers: ["redpanda-0:9092"]
|
||||
sasl:
|
||||
enabled: true
|
||||
impersonateUser: true
|
||||
schemaRegistry:
|
||||
enabled: true
|
||||
urls: ["http://redpanda-0:8081","http://redpanda-1:8081","http://redpanda-2:8081"]
|
||||
authentication:
|
||||
impersonateUser: true
|
||||
redpanda:
|
||||
adminApi:
|
||||
enabled: true
|
||||
urls: ["http://redpanda-0:9644","http://redpanda-1:9644","http://redpanda-2:9644"]
|
||||
authentication:
|
||||
basic:
|
||||
username: superuser
|
||||
password: secretpassword
|
||||
impersonateUser: false
|
||||
console:
|
||||
# Configures Redpanda Console to fetch topic documentation from GitHub and display it in the UI.
|
||||
# See https://docs.redpanda.com/current/console/config/topic-documentation/
|
||||
topicDocumentation:
|
||||
enabled: true
|
||||
git:
|
||||
enabled: true
|
||||
repository:
|
||||
url: https://github.com/redpanda-data/docs
|
||||
branch: main
|
||||
baseDirectory: tests/docker-compose
|
||||
authentication:
|
||||
jwtSigningKey: vazxnT+ZHtxKslK6QlDGovcYnSjTk/lKMmZ+mHrBVE+YdVDkLgSuP6AszAKe9Gvq
|
||||
basic:
|
||||
enabled: true
|
||||
authorization:
|
||||
roleBindings:
|
||||
- roleName: admin
|
||||
users:
|
||||
- loginType: basic
|
||||
name: superuser
|
||||
ports:
|
||||
- 8080:8080
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
createtopic:
|
||||
condition: service_completed_successfully
|
||||
registerschema:
|
||||
condition: service_completed_successfully
|
||||
deploytransform:
|
||||
condition: service_completed_successfully
|
||||
####################
|
||||
# Redpanda Connect #
|
||||
####################
|
||||
connect:
|
||||
container_name: redpanda-connect
|
||||
image: docker.redpanda.com/redpandadata/connect
|
||||
networks:
|
||||
- redpanda_network
|
||||
entrypoint: /bin/sh
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
command: -c 'echo "$$CONNECT_CFG_FILE" > /tmp/connect.yml; /redpanda-connect -c /tmp/connect.yml'
|
||||
environment:
|
||||
# This Redpanda Connect configuration creates fake data,
|
||||
# processes it, and writes the output to a set of topics.
|
||||
#
|
||||
# Input:
|
||||
# - Uses Redpanda Connect's generate input to generate fake data.
|
||||
# See https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
|
||||
# Pipeline:
|
||||
# - Bloblang mapping to batch each input and map 1 message to 'logins'
|
||||
# topic, and a random number (1-3) of messages to 'transaction' topic
|
||||
# - Unarchive processor to parse the JSON array and extract each
|
||||
# element into its own message.
|
||||
# See https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
|
||||
# Output:
|
||||
# - kafka_franz output to write the messages to the Redpanda brokers.
|
||||
# See https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
|
||||
CONNECT_CFG_FILE: |
|
||||
input:
|
||||
generate:
|
||||
interval: 1s
|
||||
mapping: |
|
||||
let first_name = fake("first_name")
|
||||
let last_name = fake("last_name")
|
||||
|
||||
root.user_id = counter()
|
||||
root.name = $$first_name + " " + $$last_name
|
||||
root.email = ($$first_name.slice(0,1) + $$last_name + "@" + fake("domain_name")).lowercase()
|
||||
root.ip = fake("ipv4")
|
||||
root.login_time = now()
|
||||
pipeline:
|
||||
processors:
|
||||
- mapping: |
|
||||
root = range(0, random_int(min:2, max:4)).map_each(cust -> this)
|
||||
- unarchive:
|
||||
format: "json_array"
|
||||
- mapping: |
|
||||
if batch_index() == 0 {
|
||||
meta topic = "logins"
|
||||
root = this
|
||||
} else {
|
||||
meta topic = "transactions"
|
||||
root.user_id = this.user_id
|
||||
root.email = this.email
|
||||
root.index = batch_index() - 1
|
||||
root.product_url = fake("url")
|
||||
root.price = fake("amount_with_currency")
|
||||
root.timestamp = now()
|
||||
}
|
||||
output:
|
||||
kafka_franz:
|
||||
seed_brokers: [ "redpanda-0:9092" ]
|
||||
topic: $${! metadata("topic") }
|
||||
sasl:
|
||||
- mechanism: SCRAM-SHA-256
|
||||
password: secretpassword
|
||||
username: superuser
|
||||
####################
|
||||
# rpk container to create the edu-filtered-domains topic #
|
||||
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-create/
|
||||
####################
|
||||
createtopic:
|
||||
command:
|
||||
- topic
|
||||
- create
|
||||
- edu-filtered-domains
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X brokers=redpanda-0:9092
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
####################
|
||||
# rpk container to register the schema #
|
||||
# See https://docs.redpanda.com/current/manage/schema-reg/schema-reg-api/
|
||||
####################
|
||||
registerschema:
|
||||
command:
|
||||
- registry
|
||||
- schema
|
||||
- create
|
||||
- transactions
|
||||
- --schema
|
||||
- /etc/redpanda/transactions-schema.json
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X registry.hosts=redpanda-0:8081
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
# Mount the local directory that contains your schema to the container.
|
||||
volumes:
|
||||
- ./transactions-schema.json:/etc/redpanda/transactions-schema.json
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
####################
|
||||
# rpk container to deploy a consumer group #
|
||||
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-consume/
|
||||
####################
|
||||
consumergroup:
|
||||
command:
|
||||
- topic
|
||||
- consume
|
||||
- transactions
|
||||
- --group
|
||||
- transactions-consumer
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X brokers=redpanda-0:9092
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
createtopic:
|
||||
condition: service_completed_successfully
|
||||
deploytransform:
|
||||
condition: service_completed_successfully
|
||||
####################
|
||||
# rpk container to deploy the pre-built data transform #
|
||||
# See https://docs.redpanda.com/current/develop/data-transforms/deploy/
|
||||
####################
|
||||
deploytransform:
|
||||
command:
|
||||
- transform
|
||||
- deploy
|
||||
- --file=/etc/redpanda/regex.wasm
|
||||
- --name=regex
|
||||
- --input-topic=logins
|
||||
- --output-topic=edu-filtered-domains
|
||||
- --var=PATTERN="[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.edu"
|
||||
- --var=MATCH_VALUE=true
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X admin.hosts=redpanda-0:9644
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
volumes:
|
||||
- ./transform/regex.wasm:/etc/redpanda/regex.wasm
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
createtopic:
|
||||
condition: service_completed_successfully
|
||||
####################
|
||||
# MinIO for Tiered Storage #
|
||||
# See https://min.io/
|
||||
#
|
||||
# NOTE: MinIO is included in this quickstart for development and evaluation purposes only.
|
||||
# It is not supported for production deployments of Redpanda.
|
||||
#
|
||||
# For production environments, use one of the supported object storage providers:
|
||||
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/requirements/#object-storage-providers-for-tiered-storage
|
||||
####################
|
||||
minio:
|
||||
container_name: minio
|
||||
image: minio/minio:RELEASE.2025-05-24T17-08-30Z
|
||||
command: server --console-address ":9001" /data
|
||||
ports:
|
||||
- 9000:9000
|
||||
- 9001:9001
|
||||
environment:
|
||||
MINIO_ROOT_USER: minio
|
||||
MINIO_ROOT_PASSWORD: redpandaTieredStorage7
|
||||
MINIO_SERVER_URL: "http://minio:9000"
|
||||
MINIO_REGION_NAME: local
|
||||
MINIO_DOMAIN: minio
|
||||
volumes:
|
||||
- minio:/data
|
||||
networks:
|
||||
redpanda_network:
|
||||
aliases:
|
||||
- redpanda.minio
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/ready"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
mc:
|
||||
depends_on:
|
||||
minio:
|
||||
condition: service_healthy
|
||||
image: minio/mc:RELEASE.2025-05-21T01-59-54Z
|
||||
container_name: mc
|
||||
networks:
|
||||
- redpanda_network
|
||||
environment:
|
||||
- AWS_ACCESS_KEY_ID=minio
|
||||
- AWS_SECRET_ACCESS_KEY=redpandaTieredStorage7
|
||||
- AWS_REGION=local
|
||||
entrypoint: >
|
||||
/bin/sh -c "
|
||||
until (/usr/bin/mc alias set minio http://minio:9000 minio redpandaTieredStorage7) do echo '...waiting...' && sleep 1; done;
|
||||
/usr/bin/mc mb minio/redpanda;
|
||||
/usr/bin/mc policy set public minio/redpanda;
|
||||
tail -f /dev/null
|
||||
"
|
||||
@@ -0,0 +1,77 @@
|
||||
input:
|
||||
# Use the 'generate' input
|
||||
# https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
|
||||
generate:
|
||||
# The interval at which new records are generated.
|
||||
interval: 1s
|
||||
# The mapping section defines how each generated record is structured.
|
||||
# The language used here is called Bloblang.
|
||||
# https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
|
||||
mapping: |
|
||||
# Generate a fake first name using the 'first_name' faker function.
|
||||
let first_name = fake("first_name")
|
||||
|
||||
# Generate a fake last name using the 'last_name' faker function.
|
||||
let last_name = fake("last_name")
|
||||
|
||||
# Define possible subscription levels for users.
|
||||
let subscription_levels = ["Free", "Basic", "Premium"]
|
||||
|
||||
# Define possible notification channels for user preferences.
|
||||
let notifications = ["email", "sms", "push" ]
|
||||
|
||||
# Define supported languages for user preferences.
|
||||
let languages = ["en", "es", "fr", "de", "zh", "jp"]
|
||||
|
||||
# Assign a unique user ID using a UUID digit generator.
|
||||
root.user_id = fake("uuid_digit")
|
||||
|
||||
# Assign the generated first name to the 'first_name' field.
|
||||
root.first_name = $first_name
|
||||
|
||||
# Assign the generated last name to the 'last_name' field.
|
||||
root.last_name = $last_name
|
||||
|
||||
# Construct the user's email by combining the first initial, last name, and a fake domain name.
|
||||
# The email is converted to lowercase for consistency.
|
||||
root.email = ($first_name.slice(0,1) + $last_name + "@" + fake("domain_name")).lowercase()
|
||||
|
||||
# Assign a fake registration date using the 'date' faker function.
|
||||
root.registration_date = fake("date")
|
||||
|
||||
# Assign the current timestamp as the last login time.
|
||||
root.last_login = now()
|
||||
|
||||
# Randomly assign a subscription level by selecting an index from the 'subscription_levels' array.
|
||||
root.subscription_level = $subscription_levels.index(random_int(min: 0, max: 2))
|
||||
|
||||
# Randomly assign a language preference by selecting an index from the 'languages' array.
|
||||
root.preferences.language = $languages.index(random_int(min: 0, max: 5))
|
||||
|
||||
# Randomly assign a notification preference by selecting an index from the 'notifications' array.
|
||||
root.preferences.notifications = $notifications.index(random_int(min: 0, max: 2))
|
||||
pipeline:
|
||||
processors:
|
||||
- mapping: |
|
||||
# Set the target topic for the generated records to 'profiles'.
|
||||
meta topic = "profiles"
|
||||
|
||||
# Assign the entire record (root) to be sent to the specified topic.
|
||||
root = this
|
||||
output:
|
||||
# Use the 'kafka_franz' output to send the result back to Redpanda
|
||||
# https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
|
||||
kafka_franz:
|
||||
# Define the list of seed brokers for the Kafka cluster.
|
||||
seed_brokers: [ "localhost:19092", "localhost:29092", "localhost:39092"]
|
||||
# Dynamically assign the topic based on the metadata specified in the processors.
|
||||
# In this case, it resolves to the 'profiles' topic.
|
||||
topic: ${! metadata("topic") }
|
||||
# Configure SASL authentication to securely connect to the Kafka brokers.
|
||||
sasl:
|
||||
- # Specify the SASL mechanism to use for authentication.
|
||||
mechanism: SCRAM-SHA-256
|
||||
# The password for the SASL authentication.
|
||||
password: secretpassword
|
||||
# The username for the SASL authentication.
|
||||
username: superuser
|
||||
@@ -0,0 +1,56 @@
|
||||
# =================================================================
|
||||
# This file defines initial cluster properties for a Redpanda cluster.
|
||||
# Some of these settings are intended for quickstart development and evaluation
|
||||
# and are not suitable for production environments.
|
||||
#
|
||||
# For more information on bootstrap files, see:
|
||||
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#configure-a-bootstrap-file
|
||||
# =================================================================
|
||||
|
||||
#
|
||||
# Enable SASL authentication for the Kafka and Admin APIs.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#admin_api_require_auth
|
||||
admin_api_require_auth: true
|
||||
# At least one superuser is required to be able to create other SASL users
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#superusers
|
||||
superusers:
|
||||
- superuser
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_sasl
|
||||
enable_sasl: true
|
||||
# Allow topics to be created on first access.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#auto_create_topics_enabled
|
||||
auto_create_topics_enabled: true
|
||||
# Enable data transforms.
|
||||
# https://docs.redpanda.com/current/develop/data-transforms/how-transforms-work/
|
||||
data_transforms_enabled: true
|
||||
# Enable audit logging (enterprise feature).
|
||||
# https://docs.redpanda.com/current/manage/audit-logging/
|
||||
audit_enabled: true
|
||||
# Enable Tiered Storage (enterprise feature).
|
||||
# https://docs.redpanda.com/current/manage/tiered-storage/
|
||||
cloud_storage_enabled: true
|
||||
cloud_storage_region: local
|
||||
cloud_storage_access_key: minio
|
||||
cloud_storage_secret_key: redpandaTieredStorage7
|
||||
cloud_storage_api_endpoint: minio
|
||||
cloud_storage_api_endpoint_port: 9000
|
||||
cloud_storage_disable_tls: true
|
||||
cloud_storage_bucket: redpanda
|
||||
# Forces segments to be uploaded to Tiered Storage faster for the purposes of the quickstart
|
||||
# https://docs.redpanda.com/current/reference/properties/object-storage-properties/#cloud_storage_segment_max_upload_interval_sec
|
||||
cloud_storage_segment_max_upload_interval_sec: 60
|
||||
# Continuous Data Balancing (enterprise feature) continuously monitors your node and rack availability and disk usage. This enables self-healing clusters that dynamically balance partitions, ensuring smooth operations and optimal cluster performance.
|
||||
# https://docs.redpanda.com/current/manage/cluster-maintenance/continuous-data-balancing/
|
||||
partition_autobalancing_mode: continuous
|
||||
# Enable Redpanda to collect consumer group metrics.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_consumer_group_metrics
|
||||
enable_consumer_group_metrics:
|
||||
- "group"
|
||||
- "partition"
|
||||
- "consumer_lag"
|
||||
# Lower the interval for the quickstart
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#consumer_group_lag_collection_interval_sec
|
||||
consumer_group_lag_collection_interval_sec: 60
|
||||
# Enable Redpanda to collect host metrics.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_host_metrics
|
||||
enable_host_metrics: true
|
||||
@@ -0,0 +1,403 @@
|
||||
name: redpanda-quickstart-multi-broker
|
||||
networks:
|
||||
redpanda_network:
|
||||
driver: bridge
|
||||
volumes:
|
||||
redpanda-0: null
|
||||
redpanda-1: null
|
||||
redpanda-2: null
|
||||
minio: null
|
||||
services:
|
||||
##################
|
||||
# Redpanda Brokers #
|
||||
##################
|
||||
redpanda-0:
|
||||
command:
|
||||
- redpanda
|
||||
- start
|
||||
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:19092
|
||||
# Address the broker advertises to clients that connect to the Kafka API.
|
||||
# Use the internal addresses to connect to the Redpanda brokers
|
||||
# from inside the same Docker network.
|
||||
# Use the external addresses to connect to the Redpanda brokers
|
||||
# from outside the Docker network.
|
||||
- --advertise-kafka-addr internal://redpanda-0:9092,external://localhost:19092
|
||||
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:18082
|
||||
# Address the broker advertises to clients that connect to the HTTP Proxy.
|
||||
- --advertise-pandaproxy-addr internal://redpanda-0:8082,external://localhost:18082
|
||||
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:18081
|
||||
# Redpanda brokers use the RPC API to communicate with each other internally.
|
||||
- --rpc-addr redpanda-0:33145
|
||||
- --advertise-rpc-addr redpanda-0:33145
|
||||
# Mode dev-container uses well-known configuration properties for development in containers.
|
||||
- --mode dev-container
|
||||
# Tells Seastar (the framework Redpanda uses under the hood) to use 1 core on the system.
|
||||
- --smp 1
|
||||
- --default-log-level=info
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
container_name: redpanda-0
|
||||
# Sets the username and password of the bootstrap SCRAM superuser
|
||||
# See https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#bootstrap-a-user-account
|
||||
environment:
|
||||
RP_BOOTSTRAP_USER: "superuser:secretpassword"
|
||||
volumes:
|
||||
- redpanda-0:/var/lib/redpanda/data
|
||||
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
|
||||
networks:
|
||||
- redpanda_network
|
||||
ports:
|
||||
- 18081:18081
|
||||
- 18082:18082
|
||||
- 19092:19092
|
||||
- 19644:9644
|
||||
healthcheck:
|
||||
test: ["CMD", "rpk", "cluster", "info", "-X", "user=superuser", "-X", "pass=secretpassword"]
|
||||
interval: 10s
|
||||
timeout: 15s
|
||||
retries: 10
|
||||
depends_on:
|
||||
minio:
|
||||
condition: service_healthy
|
||||
redpanda-1:
|
||||
command:
|
||||
- redpanda
|
||||
- start
|
||||
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:29092
|
||||
- --advertise-kafka-addr internal://redpanda-1:9092,external://localhost:29092
|
||||
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:28082
|
||||
- --advertise-pandaproxy-addr internal://redpanda-1:8082,external://localhost:28082
|
||||
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:28081
|
||||
- --rpc-addr redpanda-1:33145
|
||||
- --advertise-rpc-addr redpanda-1:33145
|
||||
- --mode dev-container
|
||||
- --smp 1
|
||||
- --default-log-level=info
|
||||
- --seeds redpanda-0:33145
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
container_name: redpanda-1
|
||||
environment:
|
||||
RP_BOOTSTRAP_USER: "superuser:secretpassword"
|
||||
volumes:
|
||||
- redpanda-1:/var/lib/redpanda/data
|
||||
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
|
||||
networks:
|
||||
- redpanda_network
|
||||
ports:
|
||||
- 28081:28081
|
||||
- 28082:28082
|
||||
- 29092:29092
|
||||
- 29644:9644
|
||||
depends_on:
|
||||
- redpanda-0
|
||||
- minio
|
||||
redpanda-2:
|
||||
command:
|
||||
- redpanda
|
||||
- start
|
||||
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:39092
|
||||
- --advertise-kafka-addr internal://redpanda-2:9092,external://localhost:39092
|
||||
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:38082
|
||||
- --advertise-pandaproxy-addr internal://redpanda-2:8082,external://localhost:38082
|
||||
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:38081
|
||||
- --rpc-addr redpanda-2:33145
|
||||
- --advertise-rpc-addr redpanda-2:33145
|
||||
- --mode dev-container
|
||||
- --smp 1
|
||||
- --default-log-level=info
|
||||
- --seeds redpanda-0:33145
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
container_name: redpanda-2
|
||||
environment:
|
||||
RP_BOOTSTRAP_USER: "superuser:secretpassword"
|
||||
volumes:
|
||||
- redpanda-2:/var/lib/redpanda/data
|
||||
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
|
||||
networks:
|
||||
- redpanda_network
|
||||
ports:
|
||||
- 38081:38081
|
||||
- 38082:38082
|
||||
- 39092:39092
|
||||
- 39644:9644
|
||||
depends_on:
|
||||
- redpanda-0
|
||||
- minio
|
||||
####################
|
||||
# Redpanda Console #
|
||||
####################
|
||||
console:
|
||||
container_name: redpanda-console
|
||||
image: docker.redpanda.com/redpandadata/console:v3.2.2
|
||||
networks:
|
||||
- redpanda_network
|
||||
entrypoint: /bin/sh
|
||||
command: -c 'echo "$$CONSOLE_CONFIG_FILE" > /tmp/config.yml && /app/console'
|
||||
volumes:
|
||||
- ./config:/tmp/config/
|
||||
environment:
|
||||
CONFIG_FILEPATH: ${CONFIG_FILEPATH:-/tmp/config.yml}
|
||||
CONSOLE_CONFIG_FILE: |
|
||||
# Configure a connection to the Redpanda cluster
|
||||
# See https://docs.redpanda.com/current/console/config/connect-to-redpanda/
|
||||
kafka:
|
||||
brokers: ["redpanda-0:9092"]
|
||||
sasl:
|
||||
enabled: true
|
||||
impersonateUser: true
|
||||
schemaRegistry:
|
||||
enabled: true
|
||||
urls: ["http://redpanda-0:8081","http://redpanda-1:8081","http://redpanda-2:8081"]
|
||||
authentication:
|
||||
impersonateUser: true
|
||||
redpanda:
|
||||
adminApi:
|
||||
enabled: true
|
||||
urls: ["http://redpanda-0:9644","http://redpanda-1:9644","http://redpanda-2:9644"]
|
||||
authentication:
|
||||
basic:
|
||||
username: superuser
|
||||
password: secretpassword
|
||||
impersonateUser: false
|
||||
console:
|
||||
# Configures Redpanda Console to fetch topic documentation from GitHub and display it in the UI.
|
||||
# See https://docs.redpanda.com/current/console/config/topic-documentation/
|
||||
topicDocumentation:
|
||||
enabled: true
|
||||
git:
|
||||
enabled: true
|
||||
repository:
|
||||
url: https://github.com/redpanda-data/docs
|
||||
branch: main
|
||||
baseDirectory: tests/docker-compose
|
||||
authentication:
|
||||
jwtSigningKey: vazxnT+ZHtxKslK6QlDGovcYnSjTk/lKMmZ+mHrBVE+YdVDkLgSuP6AszAKe9Gvq
|
||||
basic:
|
||||
enabled: true
|
||||
authorization:
|
||||
roleBindings:
|
||||
- roleName: admin
|
||||
users:
|
||||
- loginType: basic
|
||||
name: superuser
|
||||
ports:
|
||||
- 8080:8080
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
createtopic:
|
||||
condition: service_completed_successfully
|
||||
registerschema:
|
||||
condition: service_completed_successfully
|
||||
deploytransform:
|
||||
condition: service_completed_successfully
|
||||
####################
|
||||
# Redpanda Connect #
|
||||
####################
|
||||
connect:
|
||||
container_name: redpanda-connect
|
||||
image: docker.redpanda.com/redpandadata/connect
|
||||
networks:
|
||||
- redpanda_network
|
||||
entrypoint: /bin/sh
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
command: -c 'echo "$$CONNECT_CFG_FILE" > /tmp/connect.yml; /redpanda-connect -c /tmp/connect.yml'
|
||||
environment:
|
||||
# This Redpanda Connect configuration creates fake data,
|
||||
# processes it, and writes the output to a set of topics.
|
||||
#
|
||||
# Input:
|
||||
# - Uses Redpanda Connect's generate input to generate fake data.
|
||||
# See https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
|
||||
# Pipeline:
|
||||
# - Bloblang mapping to batch each input and map 1 message to 'logins'
|
||||
# topic, and a random number (1-3) of messages to 'transaction' topic
|
||||
# - Unarchive processor to parse the JSON array and extract each
|
||||
# element into its own message.
|
||||
# See https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
|
||||
# Output:
|
||||
# - kafka_franz output to write the messages to the Redpanda brokers.
|
||||
# See https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
|
||||
CONNECT_CFG_FILE: |
|
||||
input:
|
||||
generate:
|
||||
interval: 1s
|
||||
mapping: |
|
||||
let first_name = fake("first_name")
|
||||
let last_name = fake("last_name")
|
||||
|
||||
root.user_id = counter()
|
||||
root.name = $$first_name + " " + $$last_name
|
||||
root.email = ($$first_name.slice(0,1) + $$last_name + "@" + fake("domain_name")).lowercase()
|
||||
root.ip = fake("ipv4")
|
||||
root.login_time = now()
|
||||
pipeline:
|
||||
processors:
|
||||
- mapping: |
|
||||
root = range(0, random_int(min:2, max:4)).map_each(cust -> this)
|
||||
- unarchive:
|
||||
format: "json_array"
|
||||
- mapping: |
|
||||
if batch_index() == 0 {
|
||||
meta topic = "logins"
|
||||
root = this
|
||||
} else {
|
||||
meta topic = "transactions"
|
||||
root.user_id = this.user_id
|
||||
root.email = this.email
|
||||
root.index = batch_index() - 1
|
||||
root.product_url = fake("url")
|
||||
root.price = fake("amount_with_currency")
|
||||
root.timestamp = now()
|
||||
}
|
||||
output:
|
||||
kafka_franz:
|
||||
seed_brokers: [ "redpanda-0:9092" ]
|
||||
topic: $${! metadata("topic") }
|
||||
sasl:
|
||||
- mechanism: SCRAM-SHA-256
|
||||
password: secretpassword
|
||||
username: superuser
|
||||
####################
|
||||
# rpk container to create the edu-filtered-domains topic #
|
||||
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-create/
|
||||
####################
|
||||
createtopic:
|
||||
command:
|
||||
- topic
|
||||
- create
|
||||
- edu-filtered-domains
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X brokers=redpanda-0:9092
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
####################
|
||||
# rpk container to register the schema #
|
||||
# See https://docs.redpanda.com/current/manage/schema-reg/schema-reg-api/
|
||||
####################
|
||||
registerschema:
|
||||
command:
|
||||
- registry
|
||||
- schema
|
||||
- create
|
||||
- transactions
|
||||
- --schema
|
||||
- /etc/redpanda/transactions-schema.json
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X registry.hosts=redpanda-0:8081
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
# Mount the local directory that contains your schema to the container.
|
||||
volumes:
|
||||
- ./transactions-schema.json:/etc/redpanda/transactions-schema.json
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
####################
|
||||
# rpk container to deploy a consumer group #
|
||||
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-consume/
|
||||
####################
|
||||
consumergroup:
|
||||
command:
|
||||
- topic
|
||||
- consume
|
||||
- transactions
|
||||
- --group
|
||||
- transactions-consumer
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X brokers=redpanda-0:9092
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
createtopic:
|
||||
condition: service_completed_successfully
|
||||
deploytransform:
|
||||
condition: service_completed_successfully
|
||||
####################
|
||||
# rpk container to deploy the pre-built data transform #
|
||||
# See https://docs.redpanda.com/current/develop/data-transforms/deploy/
|
||||
####################
|
||||
deploytransform:
|
||||
command:
|
||||
- transform
|
||||
- deploy
|
||||
- --file=/etc/redpanda/regex.wasm
|
||||
- --name=regex
|
||||
- --input-topic=logins
|
||||
- --output-topic=edu-filtered-domains
|
||||
- --var=PATTERN="[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.edu"
|
||||
- --var=MATCH_VALUE=true
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X admin.hosts=redpanda-0:9644
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
volumes:
|
||||
- ./transform/regex.wasm:/etc/redpanda/regex.wasm
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
createtopic:
|
||||
condition: service_completed_successfully
|
||||
####################
|
||||
# MinIO for Tiered Storage #
|
||||
# See https://min.io/
|
||||
#
|
||||
# NOTE: MinIO is included in this quickstart for development and evaluation purposes only.
|
||||
# It is not supported for production deployments of Redpanda.
|
||||
#
|
||||
# For production environments, use one of the supported object storage providers:
|
||||
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/requirements/#object-storage-providers-for-tiered-storage
|
||||
####################
|
||||
minio:
|
||||
container_name: minio
|
||||
image: minio/minio:RELEASE.2025-05-24T17-08-30Z
|
||||
command: server --console-address ":9001" /data
|
||||
ports:
|
||||
- 9000:9000
|
||||
- 9001:9001
|
||||
environment:
|
||||
MINIO_ROOT_USER: minio
|
||||
MINIO_ROOT_PASSWORD: redpandaTieredStorage7
|
||||
MINIO_SERVER_URL: "http://minio:9000"
|
||||
MINIO_REGION_NAME: local
|
||||
MINIO_DOMAIN: minio
|
||||
volumes:
|
||||
- minio:/data
|
||||
networks:
|
||||
redpanda_network:
|
||||
aliases:
|
||||
- redpanda.minio
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/ready"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
mc:
|
||||
depends_on:
|
||||
minio:
|
||||
condition: service_healthy
|
||||
image: minio/mc:RELEASE.2025-05-21T01-59-54Z
|
||||
container_name: mc
|
||||
networks:
|
||||
- redpanda_network
|
||||
environment:
|
||||
- AWS_ACCESS_KEY_ID=minio
|
||||
- AWS_SECRET_ACCESS_KEY=redpandaTieredStorage7
|
||||
- AWS_REGION=local
|
||||
entrypoint: >
|
||||
/bin/sh -c "
|
||||
until (/usr/bin/mc alias set minio http://minio:9000 minio redpandaTieredStorage7) do echo '...waiting...' && sleep 1; done;
|
||||
/usr/bin/mc mb minio/redpanda;
|
||||
/usr/bin/mc policy set public minio/redpanda;
|
||||
tail -f /dev/null
|
||||
"
|
||||
@@ -0,0 +1,77 @@
|
||||
input:
|
||||
# Use the 'generate' input
|
||||
# https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
|
||||
generate:
|
||||
# The interval at which new records are generated.
|
||||
interval: 1s
|
||||
# The mapping section defines how each generated record is structured.
|
||||
# The language used here is called Bloblang.
|
||||
# https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
|
||||
mapping: |
|
||||
# Generate a fake first name using the 'first_name' faker function.
|
||||
let first_name = fake("first_name")
|
||||
|
||||
# Generate a fake last name using the 'last_name' faker function.
|
||||
let last_name = fake("last_name")
|
||||
|
||||
# Define possible subscription levels for users.
|
||||
let subscription_levels = ["Free", "Basic", "Premium"]
|
||||
|
||||
# Define possible notification channels for user preferences.
|
||||
let notifications = ["email", "sms", "push" ]
|
||||
|
||||
# Define supported languages for user preferences.
|
||||
let languages = ["en", "es", "fr", "de", "zh", "jp"]
|
||||
|
||||
# Assign a unique user ID using a UUID digit generator.
|
||||
root.user_id = fake("uuid_digit")
|
||||
|
||||
# Assign the generated first name to the 'first_name' field.
|
||||
root.first_name = $first_name
|
||||
|
||||
# Assign the generated last name to the 'last_name' field.
|
||||
root.last_name = $last_name
|
||||
|
||||
# Construct the user's email by combining the first initial, last name, and a fake domain name.
|
||||
# The email is converted to lowercase for consistency.
|
||||
root.email = ($first_name.slice(0,1) + $last_name + "@" + fake("domain_name")).lowercase()
|
||||
|
||||
# Assign a fake registration date using the 'date' faker function.
|
||||
root.registration_date = fake("date")
|
||||
|
||||
# Assign the current timestamp as the last login time.
|
||||
root.last_login = now()
|
||||
|
||||
# Randomly assign a subscription level by selecting an index from the 'subscription_levels' array.
|
||||
root.subscription_level = $subscription_levels.index(random_int(min: 0, max: 2))
|
||||
|
||||
# Randomly assign a language preference by selecting an index from the 'languages' array.
|
||||
root.preferences.language = $languages.index(random_int(min: 0, max: 5))
|
||||
|
||||
# Randomly assign a notification preference by selecting an index from the 'notifications' array.
|
||||
root.preferences.notifications = $notifications.index(random_int(min: 0, max: 2))
|
||||
pipeline:
|
||||
processors:
|
||||
- mapping: |
|
||||
# Set the target topic for the generated records to 'profiles'.
|
||||
meta topic = "profiles"
|
||||
|
||||
# Assign the entire record (root) to be sent to the specified topic.
|
||||
root = this
|
||||
output:
|
||||
# Use the 'kafka_franz' output to send the result back to Redpanda
|
||||
# https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
|
||||
kafka_franz:
|
||||
# Define the list of seed brokers for the Kafka cluster.
|
||||
seed_brokers: [ "localhost:19092", "localhost:29092", "localhost:39092"]
|
||||
# Dynamically assign the topic based on the metadata specified in the processors.
|
||||
# In this case, it resolves to the 'profiles' topic.
|
||||
topic: ${! metadata("topic") }
|
||||
# Configure SASL authentication to securely connect to the Kafka brokers.
|
||||
sasl:
|
||||
- # Specify the SASL mechanism to use for authentication.
|
||||
mechanism: SCRAM-SHA-256
|
||||
# The password for the SASL authentication.
|
||||
password: secretpassword
|
||||
# The username for the SASL authentication.
|
||||
username: superuser
|
||||
@@ -0,0 +1,56 @@
|
||||
# =================================================================
|
||||
# This file defines initial cluster properties for a Redpanda cluster.
|
||||
# Some of these settings are intended for quickstart development and evaluation
|
||||
# and are not suitable for production environments.
|
||||
#
|
||||
# For more information on bootstrap files, see:
|
||||
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#configure-a-bootstrap-file
|
||||
# =================================================================
|
||||
|
||||
#
|
||||
# Enable SASL authentication for the Kafka and Admin APIs.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#admin_api_require_auth
|
||||
admin_api_require_auth: true
|
||||
# At least one superuser is required to be able to create other SASL users
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#superusers
|
||||
superusers:
|
||||
- superuser
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_sasl
|
||||
enable_sasl: true
|
||||
# Allow topics to be created on first access.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#auto_create_topics_enabled
|
||||
auto_create_topics_enabled: true
|
||||
# Enable data transforms.
|
||||
# https://docs.redpanda.com/current/develop/data-transforms/how-transforms-work/
|
||||
data_transforms_enabled: true
|
||||
# Enable audit logging (enterprise feature).
|
||||
# https://docs.redpanda.com/current/manage/audit-logging/
|
||||
audit_enabled: true
|
||||
# Enable Tiered Storage (enterprise feature).
|
||||
# https://docs.redpanda.com/current/manage/tiered-storage/
|
||||
cloud_storage_enabled: true
|
||||
cloud_storage_region: local
|
||||
cloud_storage_access_key: minio
|
||||
cloud_storage_secret_key: redpandaTieredStorage7
|
||||
cloud_storage_api_endpoint: minio
|
||||
cloud_storage_api_endpoint_port: 9000
|
||||
cloud_storage_disable_tls: true
|
||||
cloud_storage_bucket: redpanda
|
||||
# Forces segments to be uploaded to Tiered Storage faster for the purposes of the quickstart
|
||||
# https://docs.redpanda.com/current/reference/properties/object-storage-properties/#cloud_storage_segment_max_upload_interval_sec
|
||||
cloud_storage_segment_max_upload_interval_sec: 60
|
||||
# Continuous Data Balancing (enterprise feature) continuously monitors your node and rack availability and disk usage. This enables self-healing clusters that dynamically balance partitions, ensuring smooth operations and optimal cluster performance.
|
||||
# https://docs.redpanda.com/current/manage/cluster-maintenance/continuous-data-balancing/
|
||||
partition_autobalancing_mode: continuous
|
||||
# Enable Redpanda to collect consumer group metrics.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_consumer_group_metrics
|
||||
enable_consumer_group_metrics:
|
||||
- "group"
|
||||
- "partition"
|
||||
- "consumer_lag"
|
||||
# Lower the interval for the quickstart
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#consumer_group_lag_collection_interval_sec
|
||||
consumer_group_lag_collection_interval_sec: 60
|
||||
# Enable Redpanda to collect host metrics.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_host_metrics
|
||||
enable_host_metrics: true
|
||||
@@ -0,0 +1,403 @@
|
||||
name: redpanda-quickstart-multi-broker
|
||||
networks:
|
||||
redpanda_network:
|
||||
driver: bridge
|
||||
volumes:
|
||||
redpanda-0: null
|
||||
redpanda-1: null
|
||||
redpanda-2: null
|
||||
minio: null
|
||||
services:
|
||||
##################
|
||||
# Redpanda Brokers #
|
||||
##################
|
||||
redpanda-0:
|
||||
command:
|
||||
- redpanda
|
||||
- start
|
||||
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:19092
|
||||
# Address the broker advertises to clients that connect to the Kafka API.
|
||||
# Use the internal addresses to connect to the Redpanda brokers
|
||||
# from inside the same Docker network.
|
||||
# Use the external addresses to connect to the Redpanda brokers
|
||||
# from outside the Docker network.
|
||||
- --advertise-kafka-addr internal://redpanda-0:9092,external://localhost:19092
|
||||
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:18082
|
||||
# Address the broker advertises to clients that connect to the HTTP Proxy.
|
||||
- --advertise-pandaproxy-addr internal://redpanda-0:8082,external://localhost:18082
|
||||
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:18081
|
||||
# Redpanda brokers use the RPC API to communicate with each other internally.
|
||||
- --rpc-addr redpanda-0:33145
|
||||
- --advertise-rpc-addr redpanda-0:33145
|
||||
# Mode dev-container uses well-known configuration properties for development in containers.
|
||||
- --mode dev-container
|
||||
# Tells Seastar (the framework Redpanda uses under the hood) to use 1 core on the system.
|
||||
- --smp 1
|
||||
- --default-log-level=info
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
container_name: redpanda-0
|
||||
# Sets the username and password of the bootstrap SCRAM superuser
|
||||
# See https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#bootstrap-a-user-account
|
||||
environment:
|
||||
RP_BOOTSTRAP_USER: "superuser:secretpassword"
|
||||
volumes:
|
||||
- redpanda-0:/var/lib/redpanda/data
|
||||
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
|
||||
networks:
|
||||
- redpanda_network
|
||||
ports:
|
||||
- 18081:18081
|
||||
- 18082:18082
|
||||
- 19092:19092
|
||||
- 19644:9644
|
||||
healthcheck:
|
||||
test: ["CMD", "rpk", "cluster", "info", "-X", "user=superuser", "-X", "pass=secretpassword"]
|
||||
interval: 10s
|
||||
timeout: 15s
|
||||
retries: 10
|
||||
depends_on:
|
||||
minio:
|
||||
condition: service_healthy
|
||||
redpanda-1:
|
||||
command:
|
||||
- redpanda
|
||||
- start
|
||||
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:29092
|
||||
- --advertise-kafka-addr internal://redpanda-1:9092,external://localhost:29092
|
||||
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:28082
|
||||
- --advertise-pandaproxy-addr internal://redpanda-1:8082,external://localhost:28082
|
||||
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:28081
|
||||
- --rpc-addr redpanda-1:33145
|
||||
- --advertise-rpc-addr redpanda-1:33145
|
||||
- --mode dev-container
|
||||
- --smp 1
|
||||
- --default-log-level=info
|
||||
- --seeds redpanda-0:33145
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
container_name: redpanda-1
|
||||
environment:
|
||||
RP_BOOTSTRAP_USER: "superuser:secretpassword"
|
||||
volumes:
|
||||
- redpanda-1:/var/lib/redpanda/data
|
||||
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
|
||||
networks:
|
||||
- redpanda_network
|
||||
ports:
|
||||
- 28081:28081
|
||||
- 28082:28082
|
||||
- 29092:29092
|
||||
- 29644:9644
|
||||
depends_on:
|
||||
- redpanda-0
|
||||
- minio
|
||||
redpanda-2:
|
||||
command:
|
||||
- redpanda
|
||||
- start
|
||||
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:39092
|
||||
- --advertise-kafka-addr internal://redpanda-2:9092,external://localhost:39092
|
||||
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:38082
|
||||
- --advertise-pandaproxy-addr internal://redpanda-2:8082,external://localhost:38082
|
||||
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:38081
|
||||
- --rpc-addr redpanda-2:33145
|
||||
- --advertise-rpc-addr redpanda-2:33145
|
||||
- --mode dev-container
|
||||
- --smp 1
|
||||
- --default-log-level=info
|
||||
- --seeds redpanda-0:33145
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
container_name: redpanda-2
|
||||
environment:
|
||||
RP_BOOTSTRAP_USER: "superuser:secretpassword"
|
||||
volumes:
|
||||
- redpanda-2:/var/lib/redpanda/data
|
||||
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
|
||||
networks:
|
||||
- redpanda_network
|
||||
ports:
|
||||
- 38081:38081
|
||||
- 38082:38082
|
||||
- 39092:39092
|
||||
- 39644:9644
|
||||
depends_on:
|
||||
- redpanda-0
|
||||
- minio
|
||||
####################
|
||||
# Redpanda Console #
|
||||
####################
|
||||
console:
|
||||
container_name: redpanda-console
|
||||
image: docker.redpanda.com/redpandadata/console:v3.2.2
|
||||
networks:
|
||||
- redpanda_network
|
||||
entrypoint: /bin/sh
|
||||
command: -c 'echo "$$CONSOLE_CONFIG_FILE" > /tmp/config.yml && /app/console'
|
||||
volumes:
|
||||
- ./config:/tmp/config/
|
||||
environment:
|
||||
CONFIG_FILEPATH: ${CONFIG_FILEPATH:-/tmp/config.yml}
|
||||
CONSOLE_CONFIG_FILE: |
|
||||
# Configure a connection to the Redpanda cluster
|
||||
# See https://docs.redpanda.com/current/console/config/connect-to-redpanda/
|
||||
kafka:
|
||||
brokers: ["redpanda-0:9092"]
|
||||
sasl:
|
||||
enabled: true
|
||||
impersonateUser: true
|
||||
schemaRegistry:
|
||||
enabled: true
|
||||
urls: ["http://redpanda-0:8081","http://redpanda-1:8081","http://redpanda-2:8081"]
|
||||
authentication:
|
||||
impersonateUser: true
|
||||
redpanda:
|
||||
adminApi:
|
||||
enabled: true
|
||||
urls: ["http://redpanda-0:9644","http://redpanda-1:9644","http://redpanda-2:9644"]
|
||||
authentication:
|
||||
basic:
|
||||
username: superuser
|
||||
password: secretpassword
|
||||
impersonateUser: false
|
||||
console:
|
||||
# Configures Redpanda Console to fetch topic documentation from GitHub and display it in the UI.
|
||||
# See https://docs.redpanda.com/current/console/config/topic-documentation/
|
||||
topicDocumentation:
|
||||
enabled: true
|
||||
git:
|
||||
enabled: true
|
||||
repository:
|
||||
url: https://github.com/redpanda-data/docs
|
||||
branch: main
|
||||
baseDirectory: tests/docker-compose
|
||||
authentication:
|
||||
jwtSigningKey: vazxnT+ZHtxKslK6QlDGovcYnSjTk/lKMmZ+mHrBVE+YdVDkLgSuP6AszAKe9Gvq
|
||||
basic:
|
||||
enabled: true
|
||||
authorization:
|
||||
roleBindings:
|
||||
- roleName: admin
|
||||
users:
|
||||
- loginType: basic
|
||||
name: superuser
|
||||
ports:
|
||||
- 8080:8080
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
createtopic:
|
||||
condition: service_completed_successfully
|
||||
registerschema:
|
||||
condition: service_completed_successfully
|
||||
deploytransform:
|
||||
condition: service_completed_successfully
|
||||
####################
|
||||
# Redpanda Connect #
|
||||
####################
|
||||
connect:
|
||||
container_name: redpanda-connect
|
||||
image: docker.redpanda.com/redpandadata/connect
|
||||
networks:
|
||||
- redpanda_network
|
||||
entrypoint: /bin/sh
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
command: -c 'echo "$$CONNECT_CFG_FILE" > /tmp/connect.yml; /redpanda-connect -c /tmp/connect.yml'
|
||||
environment:
|
||||
# This Redpanda Connect configuration creates fake data,
|
||||
# processes it, and writes the output to a set of topics.
|
||||
#
|
||||
# Input:
|
||||
# - Uses Redpanda Connect's generate input to generate fake data.
|
||||
# See https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
|
||||
# Pipeline:
|
||||
# - Bloblang mapping to batch each input and map 1 message to 'logins'
|
||||
# topic, and a random number (1-3) of messages to 'transaction' topic
|
||||
# - Unarchive processor to parse the JSON array and extract each
|
||||
# element into its own message.
|
||||
# See https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
|
||||
# Output:
|
||||
# - kafka_franz output to write the messages to the Redpanda brokers.
|
||||
# See https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
|
||||
CONNECT_CFG_FILE: |
|
||||
input:
|
||||
generate:
|
||||
interval: 1s
|
||||
mapping: |
|
||||
let first_name = fake("first_name")
|
||||
let last_name = fake("last_name")
|
||||
|
||||
root.user_id = counter()
|
||||
root.name = $$first_name + " " + $$last_name
|
||||
root.email = ($$first_name.slice(0,1) + $$last_name + "@" + fake("domain_name")).lowercase()
|
||||
root.ip = fake("ipv4")
|
||||
root.login_time = now()
|
||||
pipeline:
|
||||
processors:
|
||||
- mapping: |
|
||||
root = range(0, random_int(min:2, max:4)).map_each(cust -> this)
|
||||
- unarchive:
|
||||
format: "json_array"
|
||||
- mapping: |
|
||||
if batch_index() == 0 {
|
||||
meta topic = "logins"
|
||||
root = this
|
||||
} else {
|
||||
meta topic = "transactions"
|
||||
root.user_id = this.user_id
|
||||
root.email = this.email
|
||||
root.index = batch_index() - 1
|
||||
root.product_url = fake("url")
|
||||
root.price = fake("amount_with_currency")
|
||||
root.timestamp = now()
|
||||
}
|
||||
output:
|
||||
kafka_franz:
|
||||
seed_brokers: [ "redpanda-0:9092" ]
|
||||
topic: $${! metadata("topic") }
|
||||
sasl:
|
||||
- mechanism: SCRAM-SHA-256
|
||||
password: secretpassword
|
||||
username: superuser
|
||||
####################
|
||||
# rpk container to create the edu-filtered-domains topic #
|
||||
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-create/
|
||||
####################
|
||||
createtopic:
|
||||
command:
|
||||
- topic
|
||||
- create
|
||||
- edu-filtered-domains
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X brokers=redpanda-0:9092
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
####################
|
||||
# rpk container to register the schema #
|
||||
# See https://docs.redpanda.com/current/manage/schema-reg/schema-reg-api/
|
||||
####################
|
||||
registerschema:
|
||||
command:
|
||||
- registry
|
||||
- schema
|
||||
- create
|
||||
- transactions
|
||||
- --schema
|
||||
- /etc/redpanda/transactions-schema.json
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X registry.hosts=redpanda-0:8081
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
# Mount the local directory that contains your schema to the container.
|
||||
volumes:
|
||||
- ./transactions-schema.json:/etc/redpanda/transactions-schema.json
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
####################
|
||||
# rpk container to deploy a consumer group #
|
||||
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-consume/
|
||||
####################
|
||||
consumergroup:
|
||||
command:
|
||||
- topic
|
||||
- consume
|
||||
- transactions
|
||||
- --group
|
||||
- transactions-consumer
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X brokers=redpanda-0:9092
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
createtopic:
|
||||
condition: service_completed_successfully
|
||||
deploytransform:
|
||||
condition: service_completed_successfully
|
||||
####################
|
||||
# rpk container to deploy the pre-built data transform #
|
||||
# See https://docs.redpanda.com/current/develop/data-transforms/deploy/
|
||||
####################
|
||||
deploytransform:
|
||||
command:
|
||||
- transform
|
||||
- deploy
|
||||
- --file=/etc/redpanda/regex.wasm
|
||||
- --name=regex
|
||||
- --input-topic=logins
|
||||
- --output-topic=edu-filtered-domains
|
||||
- --var=PATTERN="[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.edu"
|
||||
- --var=MATCH_VALUE=true
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X admin.hosts=redpanda-0:9644
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
volumes:
|
||||
- ./transform/regex.wasm:/etc/redpanda/regex.wasm
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
createtopic:
|
||||
condition: service_completed_successfully
|
||||
####################
|
||||
# MinIO for Tiered Storage #
|
||||
# See https://min.io/
|
||||
#
|
||||
# NOTE: MinIO is included in this quickstart for development and evaluation purposes only.
|
||||
# It is not supported for production deployments of Redpanda.
|
||||
#
|
||||
# For production environments, use one of the supported object storage providers:
|
||||
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/requirements/#object-storage-providers-for-tiered-storage
|
||||
####################
|
||||
minio:
|
||||
container_name: minio
|
||||
image: minio/minio:RELEASE.2025-05-24T17-08-30Z
|
||||
command: server --console-address ":9001" /data
|
||||
ports:
|
||||
- 9000:9000
|
||||
- 9001:9001
|
||||
environment:
|
||||
MINIO_ROOT_USER: minio
|
||||
MINIO_ROOT_PASSWORD: redpandaTieredStorage7
|
||||
MINIO_SERVER_URL: "http://minio:9000"
|
||||
MINIO_REGION_NAME: local
|
||||
MINIO_DOMAIN: minio
|
||||
volumes:
|
||||
- minio:/data
|
||||
networks:
|
||||
redpanda_network:
|
||||
aliases:
|
||||
- redpanda.minio
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/ready"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
mc:
|
||||
depends_on:
|
||||
minio:
|
||||
condition: service_healthy
|
||||
image: minio/mc:RELEASE.2025-05-21T01-59-54Z
|
||||
container_name: mc
|
||||
networks:
|
||||
- redpanda_network
|
||||
environment:
|
||||
- AWS_ACCESS_KEY_ID=minio
|
||||
- AWS_SECRET_ACCESS_KEY=redpandaTieredStorage7
|
||||
- AWS_REGION=local
|
||||
entrypoint: >
|
||||
/bin/sh -c "
|
||||
until (/usr/bin/mc alias set minio http://minio:9000 minio redpandaTieredStorage7) do echo '...waiting...' && sleep 1; done;
|
||||
/usr/bin/mc mb minio/redpanda;
|
||||
/usr/bin/mc policy set public minio/redpanda;
|
||||
tail -f /dev/null
|
||||
"
|
||||
@@ -0,0 +1,77 @@
|
||||
input:
|
||||
# Use the 'generate' input
|
||||
# https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
|
||||
generate:
|
||||
# The interval at which new records are generated.
|
||||
interval: 1s
|
||||
# The mapping section defines how each generated record is structured.
|
||||
# The language used here is called Bloblang.
|
||||
# https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
|
||||
mapping: |
|
||||
# Generate a fake first name using the 'first_name' faker function.
|
||||
let first_name = fake("first_name")
|
||||
|
||||
# Generate a fake last name using the 'last_name' faker function.
|
||||
let last_name = fake("last_name")
|
||||
|
||||
# Define possible subscription levels for users.
|
||||
let subscription_levels = ["Free", "Basic", "Premium"]
|
||||
|
||||
# Define possible notification channels for user preferences.
|
||||
let notifications = ["email", "sms", "push" ]
|
||||
|
||||
# Define supported languages for user preferences.
|
||||
let languages = ["en", "es", "fr", "de", "zh", "jp"]
|
||||
|
||||
# Assign a unique user ID using a UUID digit generator.
|
||||
root.user_id = fake("uuid_digit")
|
||||
|
||||
# Assign the generated first name to the 'first_name' field.
|
||||
root.first_name = $first_name
|
||||
|
||||
# Assign the generated last name to the 'last_name' field.
|
||||
root.last_name = $last_name
|
||||
|
||||
# Construct the user's email by combining the first initial, last name, and a fake domain name.
|
||||
# The email is converted to lowercase for consistency.
|
||||
root.email = ($first_name.slice(0,1) + $last_name + "@" + fake("domain_name")).lowercase()
|
||||
|
||||
# Assign a fake registration date using the 'date' faker function.
|
||||
root.registration_date = fake("date")
|
||||
|
||||
# Assign the current timestamp as the last login time.
|
||||
root.last_login = now()
|
||||
|
||||
# Randomly assign a subscription level by selecting an index from the 'subscription_levels' array.
|
||||
root.subscription_level = $subscription_levels.index(random_int(min: 0, max: 2))
|
||||
|
||||
# Randomly assign a language preference by selecting an index from the 'languages' array.
|
||||
root.preferences.language = $languages.index(random_int(min: 0, max: 5))
|
||||
|
||||
# Randomly assign a notification preference by selecting an index from the 'notifications' array.
|
||||
root.preferences.notifications = $notifications.index(random_int(min: 0, max: 2))
|
||||
pipeline:
|
||||
processors:
|
||||
- mapping: |
|
||||
# Set the target topic for the generated records to 'profiles'.
|
||||
meta topic = "profiles"
|
||||
|
||||
# Assign the entire record (root) to be sent to the specified topic.
|
||||
root = this
|
||||
output:
|
||||
# Use the 'kafka_franz' output to send the result back to Redpanda
|
||||
# https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
|
||||
kafka_franz:
|
||||
# Define the list of seed brokers for the Kafka cluster.
|
||||
seed_brokers: [ "localhost:19092", "localhost:29092", "localhost:39092"]
|
||||
# Dynamically assign the topic based on the metadata specified in the processors.
|
||||
# In this case, it resolves to the 'profiles' topic.
|
||||
topic: ${! metadata("topic") }
|
||||
# Configure SASL authentication to securely connect to the Kafka brokers.
|
||||
sasl:
|
||||
- # Specify the SASL mechanism to use for authentication.
|
||||
mechanism: SCRAM-SHA-256
|
||||
# The password for the SASL authentication.
|
||||
password: secretpassword
|
||||
# The username for the SASL authentication.
|
||||
username: superuser
|
||||
@@ -0,0 +1,56 @@
|
||||
# =================================================================
|
||||
# This file defines initial cluster properties for a Redpanda cluster.
|
||||
# Some of these settings are intended for quickstart development and evaluation
|
||||
# and are not suitable for production environments.
|
||||
#
|
||||
# For more information on bootstrap files, see:
|
||||
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#configure-a-bootstrap-file
|
||||
# =================================================================
|
||||
|
||||
#
|
||||
# Enable SASL authentication for the Kafka and Admin APIs.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#admin_api_require_auth
|
||||
admin_api_require_auth: true
|
||||
# At least one superuser is required to be able to create other SASL users
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#superusers
|
||||
superusers:
|
||||
- superuser
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_sasl
|
||||
enable_sasl: true
|
||||
# Allow topics to be created on first access.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#auto_create_topics_enabled
|
||||
auto_create_topics_enabled: true
|
||||
# Enable data transforms.
|
||||
# https://docs.redpanda.com/current/develop/data-transforms/how-transforms-work/
|
||||
data_transforms_enabled: true
|
||||
# Enable audit logging (enterprise feature).
|
||||
# https://docs.redpanda.com/current/manage/audit-logging/
|
||||
audit_enabled: true
|
||||
# Enable Tiered Storage (enterprise feature).
|
||||
# https://docs.redpanda.com/current/manage/tiered-storage/
|
||||
cloud_storage_enabled: true
|
||||
cloud_storage_region: local
|
||||
cloud_storage_access_key: minio
|
||||
cloud_storage_secret_key: redpandaTieredStorage7
|
||||
cloud_storage_api_endpoint: minio
|
||||
cloud_storage_api_endpoint_port: 9000
|
||||
cloud_storage_disable_tls: true
|
||||
cloud_storage_bucket: redpanda
|
||||
# Forces segments to be uploaded to Tiered Storage faster for the purposes of the quickstart
|
||||
# https://docs.redpanda.com/current/reference/properties/object-storage-properties/#cloud_storage_segment_max_upload_interval_sec
|
||||
cloud_storage_segment_max_upload_interval_sec: 60
|
||||
# Continuous Data Balancing (enterprise feature) continuously monitors your node and rack availability and disk usage. This enables self-healing clusters that dynamically balance partitions, ensuring smooth operations and optimal cluster performance.
|
||||
# https://docs.redpanda.com/current/manage/cluster-maintenance/continuous-data-balancing/
|
||||
partition_autobalancing_mode: continuous
|
||||
# Enable Redpanda to collect consumer group metrics.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_consumer_group_metrics
|
||||
enable_consumer_group_metrics:
|
||||
- "group"
|
||||
- "partition"
|
||||
- "consumer_lag"
|
||||
# Lower the interval for the quickstart
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#consumer_group_lag_collection_interval_sec
|
||||
consumer_group_lag_collection_interval_sec: 60
|
||||
# Enable Redpanda to collect host metrics.
|
||||
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_host_metrics
|
||||
enable_host_metrics: true
|
||||
@@ -0,0 +1,403 @@
|
||||
name: redpanda-quickstart-multi-broker
|
||||
networks:
|
||||
redpanda_network:
|
||||
driver: bridge
|
||||
volumes:
|
||||
redpanda-0: null
|
||||
redpanda-1: null
|
||||
redpanda-2: null
|
||||
minio: null
|
||||
services:
|
||||
##################
|
||||
# Redpanda Brokers #
|
||||
##################
|
||||
redpanda-0:
|
||||
command:
|
||||
- redpanda
|
||||
- start
|
||||
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:19092
|
||||
# Address the broker advertises to clients that connect to the Kafka API.
|
||||
# Use the internal addresses to connect to the Redpanda brokers
|
||||
# from inside the same Docker network.
|
||||
# Use the external addresses to connect to the Redpanda brokers
|
||||
# from outside the Docker network.
|
||||
- --advertise-kafka-addr internal://redpanda-0:9092,external://localhost:19092
|
||||
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:18082
|
||||
# Address the broker advertises to clients that connect to the HTTP Proxy.
|
||||
- --advertise-pandaproxy-addr internal://redpanda-0:8082,external://localhost:18082
|
||||
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:18081
|
||||
# Redpanda brokers use the RPC API to communicate with each other internally.
|
||||
- --rpc-addr redpanda-0:33145
|
||||
- --advertise-rpc-addr redpanda-0:33145
|
||||
# Mode dev-container uses well-known configuration properties for development in containers.
|
||||
- --mode dev-container
|
||||
# Tells Seastar (the framework Redpanda uses under the hood) to use 1 core on the system.
|
||||
- --smp 1
|
||||
- --default-log-level=info
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
container_name: redpanda-0
|
||||
# Sets the username and password of the bootstrap SCRAM superuser
|
||||
# See https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#bootstrap-a-user-account
|
||||
environment:
|
||||
RP_BOOTSTRAP_USER: "superuser:secretpassword"
|
||||
volumes:
|
||||
- redpanda-0:/var/lib/redpanda/data
|
||||
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
|
||||
networks:
|
||||
- redpanda_network
|
||||
ports:
|
||||
- 18081:18081
|
||||
- 18082:18082
|
||||
- 19092:19092
|
||||
- 19644:9644
|
||||
healthcheck:
|
||||
test: ["CMD", "rpk", "cluster", "info", "-X", "user=superuser", "-X", "pass=secretpassword"]
|
||||
interval: 10s
|
||||
timeout: 15s
|
||||
retries: 10
|
||||
depends_on:
|
||||
minio:
|
||||
condition: service_healthy
|
||||
redpanda-1:
|
||||
command:
|
||||
- redpanda
|
||||
- start
|
||||
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:29092
|
||||
- --advertise-kafka-addr internal://redpanda-1:9092,external://localhost:29092
|
||||
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:28082
|
||||
- --advertise-pandaproxy-addr internal://redpanda-1:8082,external://localhost:28082
|
||||
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:28081
|
||||
- --rpc-addr redpanda-1:33145
|
||||
- --advertise-rpc-addr redpanda-1:33145
|
||||
- --mode dev-container
|
||||
- --smp 1
|
||||
- --default-log-level=info
|
||||
- --seeds redpanda-0:33145
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
container_name: redpanda-1
|
||||
environment:
|
||||
RP_BOOTSTRAP_USER: "superuser:secretpassword"
|
||||
volumes:
|
||||
- redpanda-1:/var/lib/redpanda/data
|
||||
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
|
||||
networks:
|
||||
- redpanda_network
|
||||
ports:
|
||||
- 28081:28081
|
||||
- 28082:28082
|
||||
- 29092:29092
|
||||
- 29644:9644
|
||||
depends_on:
|
||||
- redpanda-0
|
||||
- minio
|
||||
redpanda-2:
|
||||
command:
|
||||
- redpanda
|
||||
- start
|
||||
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:39092
|
||||
- --advertise-kafka-addr internal://redpanda-2:9092,external://localhost:39092
|
||||
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:38082
|
||||
- --advertise-pandaproxy-addr internal://redpanda-2:8082,external://localhost:38082
|
||||
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:38081
|
||||
- --rpc-addr redpanda-2:33145
|
||||
- --advertise-rpc-addr redpanda-2:33145
|
||||
- --mode dev-container
|
||||
- --smp 1
|
||||
- --default-log-level=info
|
||||
- --seeds redpanda-0:33145
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
container_name: redpanda-2
|
||||
environment:
|
||||
RP_BOOTSTRAP_USER: "superuser:secretpassword"
|
||||
volumes:
|
||||
- redpanda-2:/var/lib/redpanda/data
|
||||
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
|
||||
networks:
|
||||
- redpanda_network
|
||||
ports:
|
||||
- 38081:38081
|
||||
- 38082:38082
|
||||
- 39092:39092
|
||||
- 39644:9644
|
||||
depends_on:
|
||||
- redpanda-0
|
||||
- minio
|
||||
####################
|
||||
# Redpanda Console #
|
||||
####################
|
||||
console:
|
||||
container_name: redpanda-console
|
||||
image: docker.redpanda.com/redpandadata/console:v3.2.2
|
||||
networks:
|
||||
- redpanda_network
|
||||
entrypoint: /bin/sh
|
||||
command: -c 'echo "$$CONSOLE_CONFIG_FILE" > /tmp/config.yml && /app/console'
|
||||
volumes:
|
||||
- ./config:/tmp/config/
|
||||
environment:
|
||||
CONFIG_FILEPATH: ${CONFIG_FILEPATH:-/tmp/config.yml}
|
||||
CONSOLE_CONFIG_FILE: |
|
||||
# Configure a connection to the Redpanda cluster
|
||||
# See https://docs.redpanda.com/current/console/config/connect-to-redpanda/
|
||||
kafka:
|
||||
brokers: ["redpanda-0:9092"]
|
||||
sasl:
|
||||
enabled: true
|
||||
impersonateUser: true
|
||||
schemaRegistry:
|
||||
enabled: true
|
||||
urls: ["http://redpanda-0:8081","http://redpanda-1:8081","http://redpanda-2:8081"]
|
||||
authentication:
|
||||
impersonateUser: true
|
||||
redpanda:
|
||||
adminApi:
|
||||
enabled: true
|
||||
urls: ["http://redpanda-0:9644","http://redpanda-1:9644","http://redpanda-2:9644"]
|
||||
authentication:
|
||||
basic:
|
||||
username: superuser
|
||||
password: secretpassword
|
||||
impersonateUser: false
|
||||
console:
|
||||
# Configures Redpanda Console to fetch topic documentation from GitHub and display it in the UI.
|
||||
# See https://docs.redpanda.com/current/console/config/topic-documentation/
|
||||
topicDocumentation:
|
||||
enabled: true
|
||||
git:
|
||||
enabled: true
|
||||
repository:
|
||||
url: https://github.com/redpanda-data/docs
|
||||
branch: main
|
||||
baseDirectory: tests/docker-compose
|
||||
authentication:
|
||||
jwtSigningKey: vazxnT+ZHtxKslK6QlDGovcYnSjTk/lKMmZ+mHrBVE+YdVDkLgSuP6AszAKe9Gvq
|
||||
basic:
|
||||
enabled: true
|
||||
authorization:
|
||||
roleBindings:
|
||||
- roleName: admin
|
||||
users:
|
||||
- loginType: basic
|
||||
name: superuser
|
||||
ports:
|
||||
- 8080:8080
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
createtopic:
|
||||
condition: service_completed_successfully
|
||||
registerschema:
|
||||
condition: service_completed_successfully
|
||||
deploytransform:
|
||||
condition: service_completed_successfully
|
||||
####################
|
||||
# Redpanda Connect #
|
||||
####################
|
||||
connect:
|
||||
container_name: redpanda-connect
|
||||
image: docker.redpanda.com/redpandadata/connect
|
||||
networks:
|
||||
- redpanda_network
|
||||
entrypoint: /bin/sh
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
command: -c 'echo "$$CONNECT_CFG_FILE" > /tmp/connect.yml; /redpanda-connect -c /tmp/connect.yml'
|
||||
environment:
|
||||
# This Redpanda Connect configuration creates fake data,
|
||||
# processes it, and writes the output to a set of topics.
|
||||
#
|
||||
# Input:
|
||||
# - Uses Redpanda Connect's generate input to generate fake data.
|
||||
# See https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
|
||||
# Pipeline:
|
||||
# - Bloblang mapping to batch each input and map 1 message to 'logins'
|
||||
# topic, and a random number (1-3) of messages to 'transaction' topic
|
||||
# - Unarchive processor to parse the JSON array and extract each
|
||||
# element into its own message.
|
||||
# See https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
|
||||
# Output:
|
||||
# - kafka_franz output to write the messages to the Redpanda brokers.
|
||||
# See https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
|
||||
CONNECT_CFG_FILE: |
|
||||
input:
|
||||
generate:
|
||||
interval: 1s
|
||||
mapping: |
|
||||
let first_name = fake("first_name")
|
||||
let last_name = fake("last_name")
|
||||
|
||||
root.user_id = counter()
|
||||
root.name = $$first_name + " " + $$last_name
|
||||
root.email = ($$first_name.slice(0,1) + $$last_name + "@" + fake("domain_name")).lowercase()
|
||||
root.ip = fake("ipv4")
|
||||
root.login_time = now()
|
||||
pipeline:
|
||||
processors:
|
||||
- mapping: |
|
||||
root = range(0, random_int(min:2, max:4)).map_each(cust -> this)
|
||||
- unarchive:
|
||||
format: "json_array"
|
||||
- mapping: |
|
||||
if batch_index() == 0 {
|
||||
meta topic = "logins"
|
||||
root = this
|
||||
} else {
|
||||
meta topic = "transactions"
|
||||
root.user_id = this.user_id
|
||||
root.email = this.email
|
||||
root.index = batch_index() - 1
|
||||
root.product_url = fake("url")
|
||||
root.price = fake("amount_with_currency")
|
||||
root.timestamp = now()
|
||||
}
|
||||
output:
|
||||
kafka_franz:
|
||||
seed_brokers: [ "redpanda-0:9092" ]
|
||||
topic: $${! metadata("topic") }
|
||||
sasl:
|
||||
- mechanism: SCRAM-SHA-256
|
||||
password: secretpassword
|
||||
username: superuser
|
||||
####################
|
||||
# rpk container to create the edu-filtered-domains topic #
|
||||
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-create/
|
||||
####################
|
||||
createtopic:
|
||||
command:
|
||||
- topic
|
||||
- create
|
||||
- edu-filtered-domains
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X brokers=redpanda-0:9092
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
####################
|
||||
# rpk container to register the schema #
|
||||
# See https://docs.redpanda.com/current/manage/schema-reg/schema-reg-api/
|
||||
####################
|
||||
registerschema:
|
||||
command:
|
||||
- registry
|
||||
- schema
|
||||
- create
|
||||
- transactions
|
||||
- --schema
|
||||
- /etc/redpanda/transactions-schema.json
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X registry.hosts=redpanda-0:8081
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
# Mount the local directory that contains your schema to the container.
|
||||
volumes:
|
||||
- ./transactions-schema.json:/etc/redpanda/transactions-schema.json
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
redpanda-0:
|
||||
condition: service_healthy
|
||||
####################
|
||||
# rpk container to deploy a consumer group #
|
||||
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-consume/
|
||||
####################
|
||||
consumergroup:
|
||||
command:
|
||||
- topic
|
||||
- consume
|
||||
- transactions
|
||||
- --group
|
||||
- transactions-consumer
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X brokers=redpanda-0:9092
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
createtopic:
|
||||
condition: service_completed_successfully
|
||||
deploytransform:
|
||||
condition: service_completed_successfully
|
||||
####################
|
||||
# rpk container to deploy the pre-built data transform #
|
||||
# See https://docs.redpanda.com/current/develop/data-transforms/deploy/
|
||||
####################
|
||||
deploytransform:
|
||||
command:
|
||||
- transform
|
||||
- deploy
|
||||
- --file=/etc/redpanda/regex.wasm
|
||||
- --name=regex
|
||||
- --input-topic=logins
|
||||
- --output-topic=edu-filtered-domains
|
||||
- --var=PATTERN="[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.edu"
|
||||
- --var=MATCH_VALUE=true
|
||||
- -X user=superuser
|
||||
- -X pass=secretpassword
|
||||
- -X admin.hosts=redpanda-0:9644
|
||||
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
|
||||
volumes:
|
||||
- ./transform/regex.wasm:/etc/redpanda/regex.wasm
|
||||
networks:
|
||||
- redpanda_network
|
||||
depends_on:
|
||||
createtopic:
|
||||
condition: service_completed_successfully
|
||||
####################
|
||||
# MinIO for Tiered Storage #
|
||||
# See https://min.io/
|
||||
#
|
||||
# NOTE: MinIO is included in this quickstart for development and evaluation purposes only.
|
||||
# It is not supported for production deployments of Redpanda.
|
||||
#
|
||||
# For production environments, use one of the supported object storage providers:
|
||||
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/requirements/#object-storage-providers-for-tiered-storage
|
||||
####################
|
||||
minio:
|
||||
container_name: minio
|
||||
image: minio/minio:RELEASE.2025-05-24T17-08-30Z
|
||||
command: server --console-address ":9001" /data
|
||||
ports:
|
||||
- 9000:9000
|
||||
- 9001:9001
|
||||
environment:
|
||||
MINIO_ROOT_USER: minio
|
||||
MINIO_ROOT_PASSWORD: redpandaTieredStorage7
|
||||
MINIO_SERVER_URL: "http://minio:9000"
|
||||
MINIO_REGION_NAME: local
|
||||
MINIO_DOMAIN: minio
|
||||
volumes:
|
||||
- minio:/data
|
||||
networks:
|
||||
redpanda_network:
|
||||
aliases:
|
||||
- redpanda.minio
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/ready"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
mc:
|
||||
depends_on:
|
||||
minio:
|
||||
condition: service_healthy
|
||||
image: minio/mc:RELEASE.2025-05-21T01-59-54Z
|
||||
container_name: mc
|
||||
networks:
|
||||
- redpanda_network
|
||||
environment:
|
||||
- AWS_ACCESS_KEY_ID=minio
|
||||
- AWS_SECRET_ACCESS_KEY=redpandaTieredStorage7
|
||||
- AWS_REGION=local
|
||||
entrypoint: >
|
||||
/bin/sh -c "
|
||||
until (/usr/bin/mc alias set minio http://minio:9000 minio redpandaTieredStorage7) do echo '...waiting...' && sleep 1; done;
|
||||
/usr/bin/mc mb minio/redpanda;
|
||||
/usr/bin/mc policy set public minio/redpanda;
|
||||
tail -f /dev/null
|
||||
"
|
||||
@@ -0,0 +1,77 @@
|
||||
input:
|
||||
# Use the 'generate' input
|
||||
# https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
|
||||
generate:
|
||||
# The interval at which new records are generated.
|
||||
interval: 1s
|
||||
# The mapping section defines how each generated record is structured.
|
||||
# The language used here is called Bloblang.
|
||||
# https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
|
||||
mapping: |
|
||||
# Generate a fake first name using the 'first_name' faker function.
|
||||
let first_name = fake("first_name")
|
||||
|
||||
# Generate a fake last name using the 'last_name' faker function.
|
||||
let last_name = fake("last_name")
|
||||
|
||||
# Define possible subscription levels for users.
|
||||
let subscription_levels = ["Free", "Basic", "Premium"]
|
||||
|
||||
# Define possible notification channels for user preferences.
|
||||
let notifications = ["email", "sms", "push" ]
|
||||
|
||||
# Define supported languages for user preferences.
|
||||
let languages = ["en", "es", "fr", "de", "zh", "jp"]
|
||||
|
||||
# Assign a unique user ID using a UUID digit generator.
|
||||
root.user_id = fake("uuid_digit")
|
||||
|
||||
# Assign the generated first name to the 'first_name' field.
|
||||
root.first_name = $first_name
|
||||
|
||||
# Assign the generated last name to the 'last_name' field.
|
||||
root.last_name = $last_name
|
||||
|
||||
# Construct the user's email by combining the first initial, last name, and a fake domain name.
|
||||
# The email is converted to lowercase for consistency.
|
||||
root.email = ($first_name.slice(0,1) + $last_name + "@" + fake("domain_name")).lowercase()
|
||||
|
||||
# Assign a fake registration date using the 'date' faker function.
|
||||
root.registration_date = fake("date")
|
||||
|
||||
# Assign the current timestamp as the last login time.
|
||||
root.last_login = now()
|
||||
|
||||
# Randomly assign a subscription level by selecting an index from the 'subscription_levels' array.
|
||||
root.subscription_level = $subscription_levels.index(random_int(min: 0, max: 2))
|
||||
|
||||
# Randomly assign a language preference by selecting an index from the 'languages' array.
|
||||
root.preferences.language = $languages.index(random_int(min: 0, max: 5))
|
||||
|
||||
# Randomly assign a notification preference by selecting an index from the 'notifications' array.
|
||||
root.preferences.notifications = $notifications.index(random_int(min: 0, max: 2))
|
||||
pipeline:
|
||||
processors:
|
||||
- mapping: |
|
||||
# Set the target topic for the generated records to 'profiles'.
|
||||
meta topic = "profiles"
|
||||
|
||||
# Assign the entire record (root) to be sent to the specified topic.
|
||||
root = this
|
||||
output:
|
||||
# Use the 'kafka_franz' output to send the result back to Redpanda
|
||||
# https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
|
||||
kafka_franz:
|
||||
# Define the list of seed brokers for the Kafka cluster.
|
||||
seed_brokers: [ "localhost:19092", "localhost:29092", "localhost:39092"]
|
||||
# Dynamically assign the topic based on the metadata specified in the processors.
|
||||
# In this case, it resolves to the 'profiles' topic.
|
||||
topic: ${! metadata("topic") }
|
||||
# Configure SASL authentication to securely connect to the Kafka brokers.
|
||||
sasl:
|
||||
- # Specify the SASL mechanism to use for authentication.
|
||||
mechanism: SCRAM-SHA-256
|
||||
# The password for the SASL authentication.
|
||||
password: secretpassword
|
||||
# The username for the SASL authentication.
|
||||
username: superuser
|
||||
@@ -0,0 +1,24 @@
|
||||
# This file configures `rpk` to connect to a remote Redpanda cluster running in the same local network as `rpk`.
|
||||
|
||||
# Configuration for connecting to the Kafka API of the Redpanda cluster.
|
||||
kafka_api:
|
||||
# SASL (Simple Authentication and Security Layer) settings for authentication.
|
||||
sasl:
|
||||
user: superuser # The username used for authentication
|
||||
password: secretpassword # The password associated with the username
|
||||
mechanism: scram-sha-256 # Authentication mechanism; SCRAM-SHA-256 provides secure password-based authentication
|
||||
# List of Kafka brokers in the Redpanda cluster.
|
||||
# These brokers ensure high availability and fault tolerance for Kafka-based communication.
|
||||
brokers:
|
||||
- 127.0.0.1:19092 # Broker 1: Accessible on localhost, port 19092
|
||||
- 127.0.0.1:29092 # Broker 2: Accessible on localhost, port 29092
|
||||
- 127.0.0.1:39092 # Broker 3: Accessible on localhost, port 39092
|
||||
|
||||
# Configuration for connecting to the Redpanda Admin API.
|
||||
# The Admin API allows you to perform administrative tasks such as managing configurations, monitoring, and scaling.
|
||||
admin_api:
|
||||
# List of Admin API endpoints for managing the cluster.
|
||||
addresses:
|
||||
- 127.0.0.1:19644 # Admin API for Broker 1: Accessible on localhost, port 19644
|
||||
- 127.0.0.1:29644 # Admin API for Broker 2: Accessible on localhost, port 29644
|
||||
- 127.0.0.1:39644 # Admin API for Broker 3: Accessible on localhost, port 39644
|
||||
@@ -0,0 +1,37 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"title": "Transactions",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"email": {
|
||||
"type": "string",
|
||||
"format": "email",
|
||||
"description": "The email address of the user involved in the transaction."
|
||||
},
|
||||
"index": {
|
||||
"type": "integer",
|
||||
"description": "A numeric index associated with the transaction."
|
||||
},
|
||||
"price": {
|
||||
"type": "string",
|
||||
"pattern": "^XXX \\d+\\.\\d{6}$",
|
||||
"description": "A string representing the price of the product, including a currency code followed by the amount."
|
||||
},
|
||||
"product_url": {
|
||||
"type": "string",
|
||||
"format": "uri",
|
||||
"description": "A URL that points to the product involved in the transaction."
|
||||
},
|
||||
"timestamp": {
|
||||
"type": "string",
|
||||
"format": "date-time",
|
||||
"description": "The timestamp of when the transaction occurred, formatted in ISO 8601."
|
||||
},
|
||||
"user_id": {
|
||||
"type": "integer",
|
||||
"description": "A numeric identifier for the user."
|
||||
}
|
||||
},
|
||||
"required": ["email", "index", "price", "product_url", "timestamp", "user_id"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
# Transactions Topic Documentation
|
||||
|
||||
This document provides an overview of the `transactions` topic in the Redpanda cluster. The topic is designed to capture autogenerated transaction events with various attributes.
|
||||
|
||||
## Schema Overview
|
||||
|
||||
Each message in the `transactions` topic adheres to the following JSON schema:
|
||||
|
||||
```json
|
||||
{
|
||||
"email": "string",
|
||||
"index": "integer",
|
||||
"price": "string",
|
||||
"product_url": "string",
|
||||
"timestamp": "string",
|
||||
"user_id": "integer"
|
||||
}
|
||||
```
|
||||
|
||||
- **email**: The email address of the user involved in the transaction.
|
||||
- **index**: A numeric index associated with the transaction. This could represent the position or order of the transaction in a sequence.
|
||||
- **price**: A string representing the price of the product. It includes a currency code (e.g., "XXX") followed by the amount.
|
||||
- **product_url**: A URL that points to the product involved in the transaction.
|
||||
- **timestamp**: The timestamp of when the transaction occurred, formatted in ISO 8601.
|
||||
- **user_id**: A numeric identifier for the user. This is typically a unique ID assigned to each user in the system.
|
||||
|
||||
## Example message
|
||||
|
||||
```json
|
||||
{
|
||||
"email": "wzieme@ykczius.edu",
|
||||
"index": 0,
|
||||
"price": "XXX 5651308.100000",
|
||||
"product_url": "http://yjomdta.top/DxvGsCn.php",
|
||||
"timestamp": "2024-08-16T15:51:19.799474084Z",
|
||||
"user_id": 1
|
||||
}
|
||||
```
|
||||
|
||||
## Use cases
|
||||
|
||||
You can use the `transactions` topic for various purposes, including:
|
||||
|
||||
- **Analytics**: Tracking and analyzing user transactions to understand buying behavior, popular products, etc.
|
||||
- **Monitoring**: Observing transaction patterns to detect anomalies, such as unusual spikes or drops in transaction volume.
|
||||
- **Data Processing**: Feeding transaction data into other systems, such as a data warehouse or real-time processing pipelines, for further processing and analysis.
|
||||
@@ -0,0 +1,73 @@
|
||||
= Modify the Wasm Transform in the Quickstart
|
||||
|
||||
This directory contains the Go source code (`transform.go`) for the data transform that is used in the Redpanda Self-Managed quickstart.
|
||||
If you're following the quickstart, you *do not* need to modify or rebuild this code. The Docker Compose configuration automatically deploys a pre-built transform called `regex.wasm`.
|
||||
|
||||
However, if you want to customize the data transform logic, continue reading.
|
||||
|
||||
== Why customize the transform?
|
||||
|
||||
- **Custom filtering**: Filter by a different regex or apply multiple conditions.
|
||||
- **Data manipulation**: Transform records before writing them out. For example, redacting sensitive data or combining fields.
|
||||
- **Extended functionality**: Add advanced logging, error handling, or multi-topic routing.
|
||||
|
||||
== Prerequisites
|
||||
|
||||
You need the following:
|
||||
|
||||
- At least Go 1.20 installed.
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
go version
|
||||
----
|
||||
|
||||
- The Redpanda CLI (`rpk`) installed.
|
||||
|
||||
- A running Redpanda cluster. If you're using the local quickstart with Docker Compose, ensure the cluster is up and running. Or, point `rpk` to another Redpanda environment.
|
||||
|
||||
== Modify and deploy your transform
|
||||
|
||||
. Open link:transform.go[transform.go] and make your changes. For example:
|
||||
+
|
||||
--
|
||||
- Change the regex logic to handle different use cases.
|
||||
- Add environment variables to control new features.
|
||||
- Extend the `doRegexFilter()` function to manipulate records.
|
||||
--
|
||||
|
||||
. Compile your Go code into a `.wasm` file:
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
rpk transform build
|
||||
----
|
||||
+
|
||||
This command compiles your Go source and produces a `.wasm` file that you can deploy to Redpanda.
|
||||
|
||||
. Deploy the new transform.
|
||||
+
|
||||
If your Docker Compose setup already has a service to deploy the transform, you can restart that service.
|
||||
+
|
||||
Otherwise, you can deploy your updated `.wasm` manually using `rpk transform deploy`.
|
||||
|
||||
. Produce messages into the input topic. For example:
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
echo '{"key":"alice@university.edu","value":"test message"}' | rpk topic produce logins
|
||||
----
|
||||
|
||||
. Consume from the output topic. For example:
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
rpk topic consume edu-filtered-domains --num 1
|
||||
----
|
||||
|
||||
== Suggested reading
|
||||
|
||||
- link:https://docs.redpanda.com/current/reference/rpk/[Redpanda `rpk` CLI Reference^].
|
||||
- link:https://docs.redpanda.com/current/develop/data-transforms/build/[Develop Data Transforms^].
|
||||
- https://golang.org/ref/mod[Go Modules^] for managing dependencies and builds in Go.
|
||||
- https://docs.docker.com/compose/[Docker Compose^] for customizing your environment.
|
||||
@@ -0,0 +1,5 @@
|
||||
module regex
|
||||
|
||||
go 1.20
|
||||
|
||||
require github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0
|
||||
@@ -0,0 +1,2 @@
|
||||
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 h1:KxgHJZsHsrT3YX7DMpu/vJN4TZN3KFm1jzrCFLyOepA=
|
||||
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0/go.mod h1:QGgiwwf/BIsD1b7EiyQ/Apzw+RLSpasRDdpOCiefQFQ=
|
||||
Binary file not shown.
@@ -0,0 +1,122 @@
|
||||
package main
|
||||
// This data transform filters records based on a customizable regex pattern.
|
||||
// If a record's key or value
|
||||
// (determined by an environment variable) matches the specified regex,
|
||||
// the record is forwarded to the output.
|
||||
// Otherwise, it is dropped.
|
||||
//
|
||||
// Usage:
|
||||
// 1. Provide the following environment variables in your Docker or configuration setup:
|
||||
// - PATTERN : (required) a regular expression that determines what you want to match.
|
||||
// - MATCH_VALUE : (optional) a boolean to decide whether to check the record value. If false,
|
||||
// the record key is checked. Default is false.
|
||||
//
|
||||
// Example environment variables:
|
||||
// PATTERN=".*\\.edu$"
|
||||
// MATCH_VALUE="true"
|
||||
//
|
||||
// Logs:
|
||||
// This transform logs information about each record and whether it matched.
|
||||
// The logs appear in the _redpanda.transform_logs topic, so you can debug how your records are being processed.
|
||||
//
|
||||
// Build instructions:
|
||||
// go mod tidy
|
||||
// rpk transform build
|
||||
//
|
||||
// For more details on building transforms with the Redpanda SDK, see:
|
||||
// https://docs.redpanda.com/current/develop/data-transforms
|
||||
//
|
||||
|
||||
import (
|
||||
"log"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/redpanda-data/redpanda/src/transform-sdk/go/transform"
|
||||
)
|
||||
|
||||
var (
|
||||
re *regexp.Regexp
|
||||
checkValue bool
|
||||
)
|
||||
|
||||
func isTrueVar(v string) bool {
|
||||
switch strings.ToLower(v) {
|
||||
case "yes", "ok", "1", "true":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// The main() function runs only once at startup. It performs all initialization steps:
|
||||
// - Reads and compiles the regex pattern.
|
||||
// - Determines whether to match on the key or value.
|
||||
// - Registers the doRegexFilter() function to process records.
|
||||
func main() {
|
||||
// Set logging preferences, including timestamp and UTC time.
|
||||
log.SetPrefix("[regex-transform] ")
|
||||
log.SetFlags(log.Ldate | log.Ltime | log.LUTC | log.Lmicroseconds)
|
||||
|
||||
// Start logging the transformation process
|
||||
log.Println("Starting transform...")
|
||||
|
||||
// Read the PATTERN environment variable to get the regex pattern.
|
||||
pattern, ok := os.LookupEnv("PATTERN")
|
||||
if !ok {
|
||||
log.Fatal("Missing PATTERN environment variable")
|
||||
}
|
||||
// Log the regex pattern being used.
|
||||
log.Printf("Using PATTERN: %q\n", pattern)
|
||||
// Compile the regex pattern for later use.
|
||||
re = regexp.MustCompile(pattern)
|
||||
|
||||
// Read the MATCH_VALUE environment variable to determine whether to check the record's value.
|
||||
mk, ok := os.LookupEnv("MATCH_VALUE")
|
||||
checkValue = ok && isTrueVar(mk)
|
||||
log.Printf("MATCH_VALUE set to: %t\n", checkValue)
|
||||
|
||||
log.Println("Initialization complete, waiting for records...")
|
||||
|
||||
// Listen for records to be written, calling doRegexFilter() for each record.
|
||||
transform.OnRecordWritten(doRegexFilter)
|
||||
}
|
||||
|
||||
// The doRegexFilter() function executes each time a new record is written.
|
||||
// It checks whether the record's key or value (based on MATCH_VALUE) matches the compiled regex.
|
||||
// If it matches, the record is forwarded, if not, it's dropped.
|
||||
func doRegexFilter(e transform.WriteEvent, w transform.RecordWriter) error {
|
||||
// This stores the data to be checked (either the key or value).
|
||||
var dataToCheck []byte
|
||||
|
||||
// Depending on the MATCH_VALUE environment variable, decide whether to check the record's key or value.
|
||||
if checkValue {
|
||||
// Use the value of the record if MATCH_VALUE is true.
|
||||
dataToCheck = e.Record().Value
|
||||
log.Printf("Checking record value: %s\n", string(dataToCheck))
|
||||
} else {
|
||||
// Use the key of the record if MATCH_VALUE is false.
|
||||
dataToCheck = e.Record().Key
|
||||
log.Printf("Checking record key: %s\n", string(dataToCheck))
|
||||
}
|
||||
|
||||
// If there is no key or value to check, log and skip the record.
|
||||
if dataToCheck == nil {
|
||||
log.Println("Record has no key/value to check, skipping.")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check if the data matches the regex pattern.
|
||||
pass := re.Match(dataToCheck)
|
||||
if pass {
|
||||
// If the record matches the pattern, log and write the record to the output topic.
|
||||
log.Printf("Record matched pattern, passing through. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
|
||||
return w.Write(e.Record())
|
||||
} else {
|
||||
// If the record does not match the pattern, log and drop the record.
|
||||
log.Printf("Record did not match pattern, dropping. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
|
||||
// Do not write the record if it doesn't match the pattern.
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
# Transform metadata used by the rpk transform build command.
|
||||
# This metadata file tells rpk:
|
||||
# 1) The transform’s display name, which also becomes the base for the .wasm file name.
|
||||
# 2) A brief description of what it does.
|
||||
# 3) Defaults for environment variables.
|
||||
# 4) Input and output topics (if you want to define them here rather than in the deploy command).
|
||||
|
||||
# Human-readable name of the transform. rpk transform build uses this for the generated .wasm file.
|
||||
name: regex
|
||||
|
||||
description: |
|
||||
Filters the input topic to records that only match a regular expression.
|
||||
|
||||
Regular expressions are implemented using Go's regexp library, which uses the syntax of RE2.
|
||||
See the RE2 wiki for allowed syntax: https://github.com/google/re2/wiki/Syntax
|
||||
|
||||
Environment variables:
|
||||
- PATTERN: The regular expression that will match against records (required).
|
||||
- MATCH_VALUE: By default, the regex matches keys, but if set to "true", the regex matches values.
|
||||
|
||||
# By default, no input topic is set here. (You can set it in your deploy command if preferred.)
|
||||
input-topic: ""
|
||||
|
||||
# By default, no output topic is set here. (You can set it in your deploy command if preferred.)
|
||||
output-topic: ""
|
||||
|
||||
# Indicates the specific TinyGo environment used to compile your transform.
|
||||
language: tinygo-no-goroutines
|
||||
|
||||
env:
|
||||
# The PATTERN variable must be provided at deploy time.
|
||||
# Example: --var=PATTERN=".*@example.com"
|
||||
PATTERN: '<required>'
|
||||
@@ -0,0 +1,24 @@
|
||||
# This file configures `rpk` to connect to a remote Redpanda cluster running in the same local network as `rpk`.
|
||||
|
||||
# Configuration for connecting to the Kafka API of the Redpanda cluster.
|
||||
kafka_api:
|
||||
# SASL (Simple Authentication and Security Layer) settings for authentication.
|
||||
sasl:
|
||||
user: superuser # The username used for authentication
|
||||
password: secretpassword # The password associated with the username
|
||||
mechanism: scram-sha-256 # Authentication mechanism; SCRAM-SHA-256 provides secure password-based authentication
|
||||
# List of Kafka brokers in the Redpanda cluster.
|
||||
# These brokers ensure high availability and fault tolerance for Kafka-based communication.
|
||||
brokers:
|
||||
- 127.0.0.1:19092 # Broker 1: Accessible on localhost, port 19092
|
||||
- 127.0.0.1:29092 # Broker 2: Accessible on localhost, port 29092
|
||||
- 127.0.0.1:39092 # Broker 3: Accessible on localhost, port 39092
|
||||
|
||||
# Configuration for connecting to the Redpanda Admin API.
|
||||
# The Admin API allows you to perform administrative tasks such as managing configurations, monitoring, and scaling.
|
||||
admin_api:
|
||||
# List of Admin API endpoints for managing the cluster.
|
||||
addresses:
|
||||
- 127.0.0.1:19644 # Admin API for Broker 1: Accessible on localhost, port 19644
|
||||
- 127.0.0.1:29644 # Admin API for Broker 2: Accessible on localhost, port 29644
|
||||
- 127.0.0.1:39644 # Admin API for Broker 3: Accessible on localhost, port 39644
|
||||
@@ -0,0 +1,37 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"title": "Transactions",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"email": {
|
||||
"type": "string",
|
||||
"format": "email",
|
||||
"description": "The email address of the user involved in the transaction."
|
||||
},
|
||||
"index": {
|
||||
"type": "integer",
|
||||
"description": "A numeric index associated with the transaction."
|
||||
},
|
||||
"price": {
|
||||
"type": "string",
|
||||
"pattern": "^XXX \\d+\\.\\d{6}$",
|
||||
"description": "A string representing the price of the product, including a currency code followed by the amount."
|
||||
},
|
||||
"product_url": {
|
||||
"type": "string",
|
||||
"format": "uri",
|
||||
"description": "A URL that points to the product involved in the transaction."
|
||||
},
|
||||
"timestamp": {
|
||||
"type": "string",
|
||||
"format": "date-time",
|
||||
"description": "The timestamp of when the transaction occurred, formatted in ISO 8601."
|
||||
},
|
||||
"user_id": {
|
||||
"type": "integer",
|
||||
"description": "A numeric identifier for the user."
|
||||
}
|
||||
},
|
||||
"required": ["email", "index", "price", "product_url", "timestamp", "user_id"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
# Transactions Topic Documentation
|
||||
|
||||
This document provides an overview of the `transactions` topic in the Redpanda cluster. The topic is designed to capture autogenerated transaction events with various attributes.
|
||||
|
||||
## Schema Overview
|
||||
|
||||
Each message in the `transactions` topic adheres to the following JSON schema:
|
||||
|
||||
```json
|
||||
{
|
||||
"email": "string",
|
||||
"index": "integer",
|
||||
"price": "string",
|
||||
"product_url": "string",
|
||||
"timestamp": "string",
|
||||
"user_id": "integer"
|
||||
}
|
||||
```
|
||||
|
||||
- **email**: The email address of the user involved in the transaction.
|
||||
- **index**: A numeric index associated with the transaction. This could represent the position or order of the transaction in a sequence.
|
||||
- **price**: A string representing the price of the product. It includes a currency code (e.g., "XXX") followed by the amount.
|
||||
- **product_url**: A URL that points to the product involved in the transaction.
|
||||
- **timestamp**: The timestamp of when the transaction occurred, formatted in ISO 8601.
|
||||
- **user_id**: A numeric identifier for the user. This is typically a unique ID assigned to each user in the system.
|
||||
|
||||
## Example message
|
||||
|
||||
```json
|
||||
{
|
||||
"email": "wzieme@ykczius.edu",
|
||||
"index": 0,
|
||||
"price": "XXX 5651308.100000",
|
||||
"product_url": "http://yjomdta.top/DxvGsCn.php",
|
||||
"timestamp": "2024-08-16T15:51:19.799474084Z",
|
||||
"user_id": 1
|
||||
}
|
||||
```
|
||||
|
||||
## Use cases
|
||||
|
||||
You can use the `transactions` topic for various purposes, including:
|
||||
|
||||
- **Analytics**: Tracking and analyzing user transactions to understand buying behavior, popular products, etc.
|
||||
- **Monitoring**: Observing transaction patterns to detect anomalies, such as unusual spikes or drops in transaction volume.
|
||||
- **Data Processing**: Feeding transaction data into other systems, such as a data warehouse or real-time processing pipelines, for further processing and analysis.
|
||||
@@ -0,0 +1,73 @@
|
||||
= Modify the Wasm Transform in the Quickstart
|
||||
|
||||
This directory contains the Go source code (`transform.go`) for the data transform that is used in the Redpanda Self-Managed quickstart.
|
||||
If you're following the quickstart, you *do not* need to modify or rebuild this code. The Docker Compose configuration automatically deploys a pre-built transform called `regex.wasm`.
|
||||
|
||||
However, if you want to customize the data transform logic, continue reading.
|
||||
|
||||
== Why customize the transform?
|
||||
|
||||
- **Custom filtering**: Filter by a different regex or apply multiple conditions.
|
||||
- **Data manipulation**: Transform records before writing them out. For example, redacting sensitive data or combining fields.
|
||||
- **Extended functionality**: Add advanced logging, error handling, or multi-topic routing.
|
||||
|
||||
== Prerequisites
|
||||
|
||||
You need the following:
|
||||
|
||||
- At least Go 1.20 installed.
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
go version
|
||||
----
|
||||
|
||||
- The Redpanda CLI (`rpk`) installed.
|
||||
|
||||
- A running Redpanda cluster. If you're using the local quickstart with Docker Compose, ensure the cluster is up and running. Or, point `rpk` to another Redpanda environment.
|
||||
|
||||
== Modify and deploy your transform
|
||||
|
||||
. Open link:transform.go[transform.go] and make your changes. For example:
|
||||
+
|
||||
--
|
||||
- Change the regex logic to handle different use cases.
|
||||
- Add environment variables to control new features.
|
||||
- Extend the `doRegexFilter()` function to manipulate records.
|
||||
--
|
||||
|
||||
. Compile your Go code into a `.wasm` file:
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
rpk transform build
|
||||
----
|
||||
+
|
||||
This command compiles your Go source and produces a `.wasm` file that you can deploy to Redpanda.
|
||||
|
||||
. Deploy the new transform.
|
||||
+
|
||||
If your Docker Compose setup already has a service to deploy the transform, you can restart that service.
|
||||
+
|
||||
Otherwise, you can deploy your updated `.wasm` manually using `rpk transform deploy`.
|
||||
|
||||
. Produce messages into the input topic. For example:
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
echo '{"key":"alice@university.edu","value":"test message"}' | rpk topic produce logins
|
||||
----
|
||||
|
||||
. Consume from the output topic. For example:
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
rpk topic consume edu-filtered-domains --num 1
|
||||
----
|
||||
|
||||
== Suggested reading
|
||||
|
||||
- link:https://docs.redpanda.com/current/reference/rpk/[Redpanda `rpk` CLI Reference^].
|
||||
- link:https://docs.redpanda.com/current/develop/data-transforms/build/[Develop Data Transforms^].
|
||||
- https://golang.org/ref/mod[Go Modules^] for managing dependencies and builds in Go.
|
||||
- https://docs.docker.com/compose/[Docker Compose^] for customizing your environment.
|
||||
@@ -0,0 +1,5 @@
|
||||
module regex
|
||||
|
||||
go 1.20
|
||||
|
||||
require github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0
|
||||
@@ -0,0 +1,2 @@
|
||||
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 h1:KxgHJZsHsrT3YX7DMpu/vJN4TZN3KFm1jzrCFLyOepA=
|
||||
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0/go.mod h1:QGgiwwf/BIsD1b7EiyQ/Apzw+RLSpasRDdpOCiefQFQ=
|
||||
Binary file not shown.
@@ -0,0 +1,122 @@
|
||||
package main
|
||||
// This data transform filters records based on a customizable regex pattern.
|
||||
// If a record's key or value
|
||||
// (determined by an environment variable) matches the specified regex,
|
||||
// the record is forwarded to the output.
|
||||
// Otherwise, it is dropped.
|
||||
//
|
||||
// Usage:
|
||||
// 1. Provide the following environment variables in your Docker or configuration setup:
|
||||
// - PATTERN : (required) a regular expression that determines what you want to match.
|
||||
// - MATCH_VALUE : (optional) a boolean to decide whether to check the record value. If false,
|
||||
// the record key is checked. Default is false.
|
||||
//
|
||||
// Example environment variables:
|
||||
// PATTERN=".*\\.edu$"
|
||||
// MATCH_VALUE="true"
|
||||
//
|
||||
// Logs:
|
||||
// This transform logs information about each record and whether it matched.
|
||||
// The logs appear in the _redpanda.transform_logs topic, so you can debug how your records are being processed.
|
||||
//
|
||||
// Build instructions:
|
||||
// go mod tidy
|
||||
// rpk transform build
|
||||
//
|
||||
// For more details on building transforms with the Redpanda SDK, see:
|
||||
// https://docs.redpanda.com/current/develop/data-transforms
|
||||
//
|
||||
|
||||
import (
|
||||
"log"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/redpanda-data/redpanda/src/transform-sdk/go/transform"
|
||||
)
|
||||
|
||||
var (
|
||||
re *regexp.Regexp
|
||||
checkValue bool
|
||||
)
|
||||
|
||||
func isTrueVar(v string) bool {
|
||||
switch strings.ToLower(v) {
|
||||
case "yes", "ok", "1", "true":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// The main() function runs only once at startup. It performs all initialization steps:
|
||||
// - Reads and compiles the regex pattern.
|
||||
// - Determines whether to match on the key or value.
|
||||
// - Registers the doRegexFilter() function to process records.
|
||||
func main() {
|
||||
// Set logging preferences, including timestamp and UTC time.
|
||||
log.SetPrefix("[regex-transform] ")
|
||||
log.SetFlags(log.Ldate | log.Ltime | log.LUTC | log.Lmicroseconds)
|
||||
|
||||
// Start logging the transformation process
|
||||
log.Println("Starting transform...")
|
||||
|
||||
// Read the PATTERN environment variable to get the regex pattern.
|
||||
pattern, ok := os.LookupEnv("PATTERN")
|
||||
if !ok {
|
||||
log.Fatal("Missing PATTERN environment variable")
|
||||
}
|
||||
// Log the regex pattern being used.
|
||||
log.Printf("Using PATTERN: %q\n", pattern)
|
||||
// Compile the regex pattern for later use.
|
||||
re = regexp.MustCompile(pattern)
|
||||
|
||||
// Read the MATCH_VALUE environment variable to determine whether to check the record's value.
|
||||
mk, ok := os.LookupEnv("MATCH_VALUE")
|
||||
checkValue = ok && isTrueVar(mk)
|
||||
log.Printf("MATCH_VALUE set to: %t\n", checkValue)
|
||||
|
||||
log.Println("Initialization complete, waiting for records...")
|
||||
|
||||
// Listen for records to be written, calling doRegexFilter() for each record.
|
||||
transform.OnRecordWritten(doRegexFilter)
|
||||
}
|
||||
|
||||
// The doRegexFilter() function executes each time a new record is written.
|
||||
// It checks whether the record's key or value (based on MATCH_VALUE) matches the compiled regex.
|
||||
// If it matches, the record is forwarded, if not, it's dropped.
|
||||
func doRegexFilter(e transform.WriteEvent, w transform.RecordWriter) error {
|
||||
// This stores the data to be checked (either the key or value).
|
||||
var dataToCheck []byte
|
||||
|
||||
// Depending on the MATCH_VALUE environment variable, decide whether to check the record's key or value.
|
||||
if checkValue {
|
||||
// Use the value of the record if MATCH_VALUE is true.
|
||||
dataToCheck = e.Record().Value
|
||||
log.Printf("Checking record value: %s\n", string(dataToCheck))
|
||||
} else {
|
||||
// Use the key of the record if MATCH_VALUE is false.
|
||||
dataToCheck = e.Record().Key
|
||||
log.Printf("Checking record key: %s\n", string(dataToCheck))
|
||||
}
|
||||
|
||||
// If there is no key or value to check, log and skip the record.
|
||||
if dataToCheck == nil {
|
||||
log.Println("Record has no key/value to check, skipping.")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check if the data matches the regex pattern.
|
||||
pass := re.Match(dataToCheck)
|
||||
if pass {
|
||||
// If the record matches the pattern, log and write the record to the output topic.
|
||||
log.Printf("Record matched pattern, passing through. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
|
||||
return w.Write(e.Record())
|
||||
} else {
|
||||
// If the record does not match the pattern, log and drop the record.
|
||||
log.Printf("Record did not match pattern, dropping. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
|
||||
// Do not write the record if it doesn't match the pattern.
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
# Transform metadata used by the rpk transform build command.
|
||||
# This metadata file tells rpk:
|
||||
# 1) The transform’s display name, which also becomes the base for the .wasm file name.
|
||||
# 2) A brief description of what it does.
|
||||
# 3) Defaults for environment variables.
|
||||
# 4) Input and output topics (if you want to define them here rather than in the deploy command).
|
||||
|
||||
# Human-readable name of the transform. rpk transform build uses this for the generated .wasm file.
|
||||
name: regex
|
||||
|
||||
description: |
|
||||
Filters the input topic to records that only match a regular expression.
|
||||
|
||||
Regular expressions are implemented using Go's regexp library, which uses the syntax of RE2.
|
||||
See the RE2 wiki for allowed syntax: https://github.com/google/re2/wiki/Syntax
|
||||
|
||||
Environment variables:
|
||||
- PATTERN: The regular expression that will match against records (required).
|
||||
- MATCH_VALUE: By default, the regex matches keys, but if set to "true", the regex matches values.
|
||||
|
||||
# By default, no input topic is set here. (You can set it in your deploy command if preferred.)
|
||||
input-topic: ""
|
||||
|
||||
# By default, no output topic is set here. (You can set it in your deploy command if preferred.)
|
||||
output-topic: ""
|
||||
|
||||
# Indicates the specific TinyGo environment used to compile your transform.
|
||||
language: tinygo-no-goroutines
|
||||
|
||||
env:
|
||||
# The PATTERN variable must be provided at deploy time.
|
||||
# Example: --var=PATTERN=".*@example.com"
|
||||
PATTERN: '<required>'
|
||||
@@ -0,0 +1,24 @@
|
||||
# This file configures `rpk` to connect to a remote Redpanda cluster running in the same local network as `rpk`.
|
||||
|
||||
# Configuration for connecting to the Kafka API of the Redpanda cluster.
|
||||
kafka_api:
|
||||
# SASL (Simple Authentication and Security Layer) settings for authentication.
|
||||
sasl:
|
||||
user: superuser # The username used for authentication
|
||||
password: secretpassword # The password associated with the username
|
||||
mechanism: scram-sha-256 # Authentication mechanism; SCRAM-SHA-256 provides secure password-based authentication
|
||||
# List of Kafka brokers in the Redpanda cluster.
|
||||
# These brokers ensure high availability and fault tolerance for Kafka-based communication.
|
||||
brokers:
|
||||
- 127.0.0.1:19092 # Broker 1: Accessible on localhost, port 19092
|
||||
- 127.0.0.1:29092 # Broker 2: Accessible on localhost, port 29092
|
||||
- 127.0.0.1:39092 # Broker 3: Accessible on localhost, port 39092
|
||||
|
||||
# Configuration for connecting to the Redpanda Admin API.
|
||||
# The Admin API allows you to perform administrative tasks such as managing configurations, monitoring, and scaling.
|
||||
admin_api:
|
||||
# List of Admin API endpoints for managing the cluster.
|
||||
addresses:
|
||||
- 127.0.0.1:19644 # Admin API for Broker 1: Accessible on localhost, port 19644
|
||||
- 127.0.0.1:29644 # Admin API for Broker 2: Accessible on localhost, port 29644
|
||||
- 127.0.0.1:39644 # Admin API for Broker 3: Accessible on localhost, port 39644
|
||||
@@ -0,0 +1,37 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"title": "Transactions",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"email": {
|
||||
"type": "string",
|
||||
"format": "email",
|
||||
"description": "The email address of the user involved in the transaction."
|
||||
},
|
||||
"index": {
|
||||
"type": "integer",
|
||||
"description": "A numeric index associated with the transaction."
|
||||
},
|
||||
"price": {
|
||||
"type": "string",
|
||||
"pattern": "^XXX \\d+\\.\\d{6}$",
|
||||
"description": "A string representing the price of the product, including a currency code followed by the amount."
|
||||
},
|
||||
"product_url": {
|
||||
"type": "string",
|
||||
"format": "uri",
|
||||
"description": "A URL that points to the product involved in the transaction."
|
||||
},
|
||||
"timestamp": {
|
||||
"type": "string",
|
||||
"format": "date-time",
|
||||
"description": "The timestamp of when the transaction occurred, formatted in ISO 8601."
|
||||
},
|
||||
"user_id": {
|
||||
"type": "integer",
|
||||
"description": "A numeric identifier for the user."
|
||||
}
|
||||
},
|
||||
"required": ["email", "index", "price", "product_url", "timestamp", "user_id"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
# Transactions Topic Documentation
|
||||
|
||||
This document provides an overview of the `transactions` topic in the Redpanda cluster. The topic is designed to capture autogenerated transaction events with various attributes.
|
||||
|
||||
## Schema Overview
|
||||
|
||||
Each message in the `transactions` topic adheres to the following JSON schema:
|
||||
|
||||
```json
|
||||
{
|
||||
"email": "string",
|
||||
"index": "integer",
|
||||
"price": "string",
|
||||
"product_url": "string",
|
||||
"timestamp": "string",
|
||||
"user_id": "integer"
|
||||
}
|
||||
```
|
||||
|
||||
- **email**: The email address of the user involved in the transaction.
|
||||
- **index**: A numeric index associated with the transaction. This could represent the position or order of the transaction in a sequence.
|
||||
- **price**: A string representing the price of the product. It includes a currency code (e.g., "XXX") followed by the amount.
|
||||
- **product_url**: A URL that points to the product involved in the transaction.
|
||||
- **timestamp**: The timestamp of when the transaction occurred, formatted in ISO 8601.
|
||||
- **user_id**: A numeric identifier for the user. This is typically a unique ID assigned to each user in the system.
|
||||
|
||||
## Example message
|
||||
|
||||
```json
|
||||
{
|
||||
"email": "wzieme@ykczius.edu",
|
||||
"index": 0,
|
||||
"price": "XXX 5651308.100000",
|
||||
"product_url": "http://yjomdta.top/DxvGsCn.php",
|
||||
"timestamp": "2024-08-16T15:51:19.799474084Z",
|
||||
"user_id": 1
|
||||
}
|
||||
```
|
||||
|
||||
## Use cases
|
||||
|
||||
You can use the `transactions` topic for various purposes, including:
|
||||
|
||||
- **Analytics**: Tracking and analyzing user transactions to understand buying behavior, popular products, etc.
|
||||
- **Monitoring**: Observing transaction patterns to detect anomalies, such as unusual spikes or drops in transaction volume.
|
||||
- **Data Processing**: Feeding transaction data into other systems, such as a data warehouse or real-time processing pipelines, for further processing and analysis.
|
||||
@@ -0,0 +1,73 @@
|
||||
= Modify the Wasm Transform in the Quickstart
|
||||
|
||||
This directory contains the Go source code (`transform.go`) for the data transform that is used in the Redpanda Self-Managed quickstart.
|
||||
If you're following the quickstart, you *do not* need to modify or rebuild this code. The Docker Compose configuration automatically deploys a pre-built transform called `regex.wasm`.
|
||||
|
||||
However, if you want to customize the data transform logic, continue reading.
|
||||
|
||||
== Why customize the transform?
|
||||
|
||||
- **Custom filtering**: Filter by a different regex or apply multiple conditions.
|
||||
- **Data manipulation**: Transform records before writing them out. For example, redacting sensitive data or combining fields.
|
||||
- **Extended functionality**: Add advanced logging, error handling, or multi-topic routing.
|
||||
|
||||
== Prerequisites
|
||||
|
||||
You need the following:
|
||||
|
||||
- At least Go 1.20 installed.
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
go version
|
||||
----
|
||||
|
||||
- The Redpanda CLI (`rpk`) installed.
|
||||
|
||||
- A running Redpanda cluster. If you're using the local quickstart with Docker Compose, ensure the cluster is up and running. Or, point `rpk` to another Redpanda environment.
|
||||
|
||||
== Modify and deploy your transform
|
||||
|
||||
. Open link:transform.go[transform.go] and make your changes. For example:
|
||||
+
|
||||
--
|
||||
- Change the regex logic to handle different use cases.
|
||||
- Add environment variables to control new features.
|
||||
- Extend the `doRegexFilter()` function to manipulate records.
|
||||
--
|
||||
|
||||
. Compile your Go code into a `.wasm` file:
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
rpk transform build
|
||||
----
|
||||
+
|
||||
This command compiles your Go source and produces a `.wasm` file that you can deploy to Redpanda.
|
||||
|
||||
. Deploy the new transform.
|
||||
+
|
||||
If your Docker Compose setup already has a service to deploy the transform, you can restart that service.
|
||||
+
|
||||
Otherwise, you can deploy your updated `.wasm` manually using `rpk transform deploy`.
|
||||
|
||||
. Produce messages into the input topic. For example:
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
echo '{"key":"alice@university.edu","value":"test message"}' | rpk topic produce logins
|
||||
----
|
||||
|
||||
. Consume from the output topic. For example:
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
rpk topic consume edu-filtered-domains --num 1
|
||||
----
|
||||
|
||||
== Suggested reading
|
||||
|
||||
- link:https://docs.redpanda.com/current/reference/rpk/[Redpanda `rpk` CLI Reference^].
|
||||
- link:https://docs.redpanda.com/current/develop/data-transforms/build/[Develop Data Transforms^].
|
||||
- https://golang.org/ref/mod[Go Modules^] for managing dependencies and builds in Go.
|
||||
- https://docs.docker.com/compose/[Docker Compose^] for customizing your environment.
|
||||
@@ -0,0 +1,5 @@
|
||||
module regex
|
||||
|
||||
go 1.20
|
||||
|
||||
require github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0
|
||||
@@ -0,0 +1,2 @@
|
||||
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 h1:KxgHJZsHsrT3YX7DMpu/vJN4TZN3KFm1jzrCFLyOepA=
|
||||
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0/go.mod h1:QGgiwwf/BIsD1b7EiyQ/Apzw+RLSpasRDdpOCiefQFQ=
|
||||
Binary file not shown.
@@ -0,0 +1,122 @@
|
||||
package main
|
||||
// This data transform filters records based on a customizable regex pattern.
|
||||
// If a record's key or value
|
||||
// (determined by an environment variable) matches the specified regex,
|
||||
// the record is forwarded to the output.
|
||||
// Otherwise, it is dropped.
|
||||
//
|
||||
// Usage:
|
||||
// 1. Provide the following environment variables in your Docker or configuration setup:
|
||||
// - PATTERN : (required) a regular expression that determines what you want to match.
|
||||
// - MATCH_VALUE : (optional) a boolean to decide whether to check the record value. If false,
|
||||
// the record key is checked. Default is false.
|
||||
//
|
||||
// Example environment variables:
|
||||
// PATTERN=".*\\.edu$"
|
||||
// MATCH_VALUE="true"
|
||||
//
|
||||
// Logs:
|
||||
// This transform logs information about each record and whether it matched.
|
||||
// The logs appear in the _redpanda.transform_logs topic, so you can debug how your records are being processed.
|
||||
//
|
||||
// Build instructions:
|
||||
// go mod tidy
|
||||
// rpk transform build
|
||||
//
|
||||
// For more details on building transforms with the Redpanda SDK, see:
|
||||
// https://docs.redpanda.com/current/develop/data-transforms
|
||||
//
|
||||
|
||||
import (
|
||||
"log"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/redpanda-data/redpanda/src/transform-sdk/go/transform"
|
||||
)
|
||||
|
||||
var (
|
||||
re *regexp.Regexp
|
||||
checkValue bool
|
||||
)
|
||||
|
||||
func isTrueVar(v string) bool {
|
||||
switch strings.ToLower(v) {
|
||||
case "yes", "ok", "1", "true":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// The main() function runs only once at startup. It performs all initialization steps:
|
||||
// - Reads and compiles the regex pattern.
|
||||
// - Determines whether to match on the key or value.
|
||||
// - Registers the doRegexFilter() function to process records.
|
||||
func main() {
|
||||
// Set logging preferences, including timestamp and UTC time.
|
||||
log.SetPrefix("[regex-transform] ")
|
||||
log.SetFlags(log.Ldate | log.Ltime | log.LUTC | log.Lmicroseconds)
|
||||
|
||||
// Start logging the transformation process
|
||||
log.Println("Starting transform...")
|
||||
|
||||
// Read the PATTERN environment variable to get the regex pattern.
|
||||
pattern, ok := os.LookupEnv("PATTERN")
|
||||
if !ok {
|
||||
log.Fatal("Missing PATTERN environment variable")
|
||||
}
|
||||
// Log the regex pattern being used.
|
||||
log.Printf("Using PATTERN: %q\n", pattern)
|
||||
// Compile the regex pattern for later use.
|
||||
re = regexp.MustCompile(pattern)
|
||||
|
||||
// Read the MATCH_VALUE environment variable to determine whether to check the record's value.
|
||||
mk, ok := os.LookupEnv("MATCH_VALUE")
|
||||
checkValue = ok && isTrueVar(mk)
|
||||
log.Printf("MATCH_VALUE set to: %t\n", checkValue)
|
||||
|
||||
log.Println("Initialization complete, waiting for records...")
|
||||
|
||||
// Listen for records to be written, calling doRegexFilter() for each record.
|
||||
transform.OnRecordWritten(doRegexFilter)
|
||||
}
|
||||
|
||||
// The doRegexFilter() function executes each time a new record is written.
|
||||
// It checks whether the record's key or value (based on MATCH_VALUE) matches the compiled regex.
|
||||
// If it matches, the record is forwarded, if not, it's dropped.
|
||||
func doRegexFilter(e transform.WriteEvent, w transform.RecordWriter) error {
|
||||
// This stores the data to be checked (either the key or value).
|
||||
var dataToCheck []byte
|
||||
|
||||
// Depending on the MATCH_VALUE environment variable, decide whether to check the record's key or value.
|
||||
if checkValue {
|
||||
// Use the value of the record if MATCH_VALUE is true.
|
||||
dataToCheck = e.Record().Value
|
||||
log.Printf("Checking record value: %s\n", string(dataToCheck))
|
||||
} else {
|
||||
// Use the key of the record if MATCH_VALUE is false.
|
||||
dataToCheck = e.Record().Key
|
||||
log.Printf("Checking record key: %s\n", string(dataToCheck))
|
||||
}
|
||||
|
||||
// If there is no key or value to check, log and skip the record.
|
||||
if dataToCheck == nil {
|
||||
log.Println("Record has no key/value to check, skipping.")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check if the data matches the regex pattern.
|
||||
pass := re.Match(dataToCheck)
|
||||
if pass {
|
||||
// If the record matches the pattern, log and write the record to the output topic.
|
||||
log.Printf("Record matched pattern, passing through. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
|
||||
return w.Write(e.Record())
|
||||
} else {
|
||||
// If the record does not match the pattern, log and drop the record.
|
||||
log.Printf("Record did not match pattern, dropping. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
|
||||
// Do not write the record if it doesn't match the pattern.
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
# Transform metadata used by the rpk transform build command.
|
||||
# This metadata file tells rpk:
|
||||
# 1) The transform’s display name, which also becomes the base for the .wasm file name.
|
||||
# 2) A brief description of what it does.
|
||||
# 3) Defaults for environment variables.
|
||||
# 4) Input and output topics (if you want to define them here rather than in the deploy command).
|
||||
|
||||
# Human-readable name of the transform. rpk transform build uses this for the generated .wasm file.
|
||||
name: regex
|
||||
|
||||
description: |
|
||||
Filters the input topic to records that only match a regular expression.
|
||||
|
||||
Regular expressions are implemented using Go's regexp library, which uses the syntax of RE2.
|
||||
See the RE2 wiki for allowed syntax: https://github.com/google/re2/wiki/Syntax
|
||||
|
||||
Environment variables:
|
||||
- PATTERN: The regular expression that will match against records (required).
|
||||
- MATCH_VALUE: By default, the regex matches keys, but if set to "true", the regex matches values.
|
||||
|
||||
# By default, no input topic is set here. (You can set it in your deploy command if preferred.)
|
||||
input-topic: ""
|
||||
|
||||
# By default, no output topic is set here. (You can set it in your deploy command if preferred.)
|
||||
output-topic: ""
|
||||
|
||||
# Indicates the specific TinyGo environment used to compile your transform.
|
||||
language: tinygo-no-goroutines
|
||||
|
||||
env:
|
||||
# The PATTERN variable must be provided at deploy time.
|
||||
# Example: --var=PATTERN=".*@example.com"
|
||||
PATTERN: '<required>'
|
||||
@@ -0,0 +1,24 @@
|
||||
# This file configures `rpk` to connect to a remote Redpanda cluster running in the same local network as `rpk`.
|
||||
|
||||
# Configuration for connecting to the Kafka API of the Redpanda cluster.
|
||||
kafka_api:
|
||||
# SASL (Simple Authentication and Security Layer) settings for authentication.
|
||||
sasl:
|
||||
user: superuser # The username used for authentication
|
||||
password: secretpassword # The password associated with the username
|
||||
mechanism: scram-sha-256 # Authentication mechanism; SCRAM-SHA-256 provides secure password-based authentication
|
||||
# List of Kafka brokers in the Redpanda cluster.
|
||||
# These brokers ensure high availability and fault tolerance for Kafka-based communication.
|
||||
brokers:
|
||||
- 127.0.0.1:19092 # Broker 1: Accessible on localhost, port 19092
|
||||
- 127.0.0.1:29092 # Broker 2: Accessible on localhost, port 29092
|
||||
- 127.0.0.1:39092 # Broker 3: Accessible on localhost, port 39092
|
||||
|
||||
# Configuration for connecting to the Redpanda Admin API.
|
||||
# The Admin API allows you to perform administrative tasks such as managing configurations, monitoring, and scaling.
|
||||
admin_api:
|
||||
# List of Admin API endpoints for managing the cluster.
|
||||
addresses:
|
||||
- 127.0.0.1:19644 # Admin API for Broker 1: Accessible on localhost, port 19644
|
||||
- 127.0.0.1:29644 # Admin API for Broker 2: Accessible on localhost, port 29644
|
||||
- 127.0.0.1:39644 # Admin API for Broker 3: Accessible on localhost, port 39644
|
||||
@@ -0,0 +1,37 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"title": "Transactions",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"email": {
|
||||
"type": "string",
|
||||
"format": "email",
|
||||
"description": "The email address of the user involved in the transaction."
|
||||
},
|
||||
"index": {
|
||||
"type": "integer",
|
||||
"description": "A numeric index associated with the transaction."
|
||||
},
|
||||
"price": {
|
||||
"type": "string",
|
||||
"pattern": "^XXX \\d+\\.\\d{6}$",
|
||||
"description": "A string representing the price of the product, including a currency code followed by the amount."
|
||||
},
|
||||
"product_url": {
|
||||
"type": "string",
|
||||
"format": "uri",
|
||||
"description": "A URL that points to the product involved in the transaction."
|
||||
},
|
||||
"timestamp": {
|
||||
"type": "string",
|
||||
"format": "date-time",
|
||||
"description": "The timestamp of when the transaction occurred, formatted in ISO 8601."
|
||||
},
|
||||
"user_id": {
|
||||
"type": "integer",
|
||||
"description": "A numeric identifier for the user."
|
||||
}
|
||||
},
|
||||
"required": ["email", "index", "price", "product_url", "timestamp", "user_id"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
# Transactions Topic Documentation
|
||||
|
||||
This document provides an overview of the `transactions` topic in the Redpanda cluster. The topic is designed to capture autogenerated transaction events with various attributes.
|
||||
|
||||
## Schema Overview
|
||||
|
||||
Each message in the `transactions` topic adheres to the following JSON schema:
|
||||
|
||||
```json
|
||||
{
|
||||
"email": "string",
|
||||
"index": "integer",
|
||||
"price": "string",
|
||||
"product_url": "string",
|
||||
"timestamp": "string",
|
||||
"user_id": "integer"
|
||||
}
|
||||
```
|
||||
|
||||
- **email**: The email address of the user involved in the transaction.
|
||||
- **index**: A numeric index associated with the transaction. This could represent the position or order of the transaction in a sequence.
|
||||
- **price**: A string representing the price of the product. It includes a currency code (e.g., "XXX") followed by the amount.
|
||||
- **product_url**: A URL that points to the product involved in the transaction.
|
||||
- **timestamp**: The timestamp of when the transaction occurred, formatted in ISO 8601.
|
||||
- **user_id**: A numeric identifier for the user. This is typically a unique ID assigned to each user in the system.
|
||||
|
||||
## Example message
|
||||
|
||||
```json
|
||||
{
|
||||
"email": "wzieme@ykczius.edu",
|
||||
"index": 0,
|
||||
"price": "XXX 5651308.100000",
|
||||
"product_url": "http://yjomdta.top/DxvGsCn.php",
|
||||
"timestamp": "2024-08-16T15:51:19.799474084Z",
|
||||
"user_id": 1
|
||||
}
|
||||
```
|
||||
|
||||
## Use cases
|
||||
|
||||
You can use the `transactions` topic for various purposes, including:
|
||||
|
||||
- **Analytics**: Tracking and analyzing user transactions to understand buying behavior, popular products, etc.
|
||||
- **Monitoring**: Observing transaction patterns to detect anomalies, such as unusual spikes or drops in transaction volume.
|
||||
- **Data Processing**: Feeding transaction data into other systems, such as a data warehouse or real-time processing pipelines, for further processing and analysis.
|
||||
@@ -0,0 +1,73 @@
|
||||
= Modify the Wasm Transform in the Quickstart
|
||||
|
||||
This directory contains the Go source code (`transform.go`) for the data transform that is used in the Redpanda Self-Managed quickstart.
|
||||
If you're following the quickstart, you *do not* need to modify or rebuild this code. The Docker Compose configuration automatically deploys a pre-built transform called `regex.wasm`.
|
||||
|
||||
However, if you want to customize the data transform logic, continue reading.
|
||||
|
||||
== Why customize the transform?
|
||||
|
||||
- **Custom filtering**: Filter by a different regex or apply multiple conditions.
|
||||
- **Data manipulation**: Transform records before writing them out. For example, redacting sensitive data or combining fields.
|
||||
- **Extended functionality**: Add advanced logging, error handling, or multi-topic routing.
|
||||
|
||||
== Prerequisites
|
||||
|
||||
You need the following:
|
||||
|
||||
- At least Go 1.20 installed.
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
go version
|
||||
----
|
||||
|
||||
- The Redpanda CLI (`rpk`) installed.
|
||||
|
||||
- A running Redpanda cluster. If you're using the local quickstart with Docker Compose, ensure the cluster is up and running. Or, point `rpk` to another Redpanda environment.
|
||||
|
||||
== Modify and deploy your transform
|
||||
|
||||
. Open link:transform.go[transform.go] and make your changes. For example:
|
||||
+
|
||||
--
|
||||
- Change the regex logic to handle different use cases.
|
||||
- Add environment variables to control new features.
|
||||
- Extend the `doRegexFilter()` function to manipulate records.
|
||||
--
|
||||
|
||||
. Compile your Go code into a `.wasm` file:
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
rpk transform build
|
||||
----
|
||||
+
|
||||
This command compiles your Go source and produces a `.wasm` file that you can deploy to Redpanda.
|
||||
|
||||
. Deploy the new transform.
|
||||
+
|
||||
If your Docker Compose setup already has a service to deploy the transform, you can restart that service.
|
||||
+
|
||||
Otherwise, you can deploy your updated `.wasm` manually using `rpk transform deploy`.
|
||||
|
||||
. Produce messages into the input topic. For example:
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
echo '{"key":"alice@university.edu","value":"test message"}' | rpk topic produce logins
|
||||
----
|
||||
|
||||
. Consume from the output topic. For example:
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
rpk topic consume edu-filtered-domains --num 1
|
||||
----
|
||||
|
||||
== Suggested reading
|
||||
|
||||
- link:https://docs.redpanda.com/current/reference/rpk/[Redpanda `rpk` CLI Reference^].
|
||||
- link:https://docs.redpanda.com/current/develop/data-transforms/build/[Develop Data Transforms^].
|
||||
- https://golang.org/ref/mod[Go Modules^] for managing dependencies and builds in Go.
|
||||
- https://docs.docker.com/compose/[Docker Compose^] for customizing your environment.
|
||||
@@ -0,0 +1,5 @@
|
||||
module regex
|
||||
|
||||
go 1.20
|
||||
|
||||
require github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0
|
||||
@@ -0,0 +1,2 @@
|
||||
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 h1:KxgHJZsHsrT3YX7DMpu/vJN4TZN3KFm1jzrCFLyOepA=
|
||||
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0/go.mod h1:QGgiwwf/BIsD1b7EiyQ/Apzw+RLSpasRDdpOCiefQFQ=
|
||||
Binary file not shown.
@@ -0,0 +1,122 @@
|
||||
package main
|
||||
// This data transform filters records based on a customizable regex pattern.
|
||||
// If a record's key or value
|
||||
// (determined by an environment variable) matches the specified regex,
|
||||
// the record is forwarded to the output.
|
||||
// Otherwise, it is dropped.
|
||||
//
|
||||
// Usage:
|
||||
// 1. Provide the following environment variables in your Docker or configuration setup:
|
||||
// - PATTERN : (required) a regular expression that determines what you want to match.
|
||||
// - MATCH_VALUE : (optional) a boolean to decide whether to check the record value. If false,
|
||||
// the record key is checked. Default is false.
|
||||
//
|
||||
// Example environment variables:
|
||||
// PATTERN=".*\\.edu$"
|
||||
// MATCH_VALUE="true"
|
||||
//
|
||||
// Logs:
|
||||
// This transform logs information about each record and whether it matched.
|
||||
// The logs appear in the _redpanda.transform_logs topic, so you can debug how your records are being processed.
|
||||
//
|
||||
// Build instructions:
|
||||
// go mod tidy
|
||||
// rpk transform build
|
||||
//
|
||||
// For more details on building transforms with the Redpanda SDK, see:
|
||||
// https://docs.redpanda.com/current/develop/data-transforms
|
||||
//
|
||||
|
||||
import (
|
||||
"log"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/redpanda-data/redpanda/src/transform-sdk/go/transform"
|
||||
)
|
||||
|
||||
var (
|
||||
re *regexp.Regexp
|
||||
checkValue bool
|
||||
)
|
||||
|
||||
func isTrueVar(v string) bool {
|
||||
switch strings.ToLower(v) {
|
||||
case "yes", "ok", "1", "true":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// The main() function runs only once at startup. It performs all initialization steps:
|
||||
// - Reads and compiles the regex pattern.
|
||||
// - Determines whether to match on the key or value.
|
||||
// - Registers the doRegexFilter() function to process records.
|
||||
func main() {
|
||||
// Set logging preferences, including timestamp and UTC time.
|
||||
log.SetPrefix("[regex-transform] ")
|
||||
log.SetFlags(log.Ldate | log.Ltime | log.LUTC | log.Lmicroseconds)
|
||||
|
||||
// Start logging the transformation process
|
||||
log.Println("Starting transform...")
|
||||
|
||||
// Read the PATTERN environment variable to get the regex pattern.
|
||||
pattern, ok := os.LookupEnv("PATTERN")
|
||||
if !ok {
|
||||
log.Fatal("Missing PATTERN environment variable")
|
||||
}
|
||||
// Log the regex pattern being used.
|
||||
log.Printf("Using PATTERN: %q\n", pattern)
|
||||
// Compile the regex pattern for later use.
|
||||
re = regexp.MustCompile(pattern)
|
||||
|
||||
// Read the MATCH_VALUE environment variable to determine whether to check the record's value.
|
||||
mk, ok := os.LookupEnv("MATCH_VALUE")
|
||||
checkValue = ok && isTrueVar(mk)
|
||||
log.Printf("MATCH_VALUE set to: %t\n", checkValue)
|
||||
|
||||
log.Println("Initialization complete, waiting for records...")
|
||||
|
||||
// Listen for records to be written, calling doRegexFilter() for each record.
|
||||
transform.OnRecordWritten(doRegexFilter)
|
||||
}
|
||||
|
||||
// The doRegexFilter() function executes each time a new record is written.
|
||||
// It checks whether the record's key or value (based on MATCH_VALUE) matches the compiled regex.
|
||||
// If it matches, the record is forwarded, if not, it's dropped.
|
||||
func doRegexFilter(e transform.WriteEvent, w transform.RecordWriter) error {
|
||||
// This stores the data to be checked (either the key or value).
|
||||
var dataToCheck []byte
|
||||
|
||||
// Depending on the MATCH_VALUE environment variable, decide whether to check the record's key or value.
|
||||
if checkValue {
|
||||
// Use the value of the record if MATCH_VALUE is true.
|
||||
dataToCheck = e.Record().Value
|
||||
log.Printf("Checking record value: %s\n", string(dataToCheck))
|
||||
} else {
|
||||
// Use the key of the record if MATCH_VALUE is false.
|
||||
dataToCheck = e.Record().Key
|
||||
log.Printf("Checking record key: %s\n", string(dataToCheck))
|
||||
}
|
||||
|
||||
// If there is no key or value to check, log and skip the record.
|
||||
if dataToCheck == nil {
|
||||
log.Println("Record has no key/value to check, skipping.")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check if the data matches the regex pattern.
|
||||
pass := re.Match(dataToCheck)
|
||||
if pass {
|
||||
// If the record matches the pattern, log and write the record to the output topic.
|
||||
log.Printf("Record matched pattern, passing through. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
|
||||
return w.Write(e.Record())
|
||||
} else {
|
||||
// If the record does not match the pattern, log and drop the record.
|
||||
log.Printf("Record did not match pattern, dropping. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
|
||||
// Do not write the record if it doesn't match the pattern.
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
# Transform metadata used by the rpk transform build command.
|
||||
# This metadata file tells rpk:
|
||||
# 1) The transform’s display name, which also becomes the base for the .wasm file name.
|
||||
# 2) A brief description of what it does.
|
||||
# 3) Defaults for environment variables.
|
||||
# 4) Input and output topics (if you want to define them here rather than in the deploy command).
|
||||
|
||||
# Human-readable name of the transform. rpk transform build uses this for the generated .wasm file.
|
||||
name: regex
|
||||
|
||||
description: |
|
||||
Filters the input topic to records that only match a regular expression.
|
||||
|
||||
Regular expressions are implemented using Go's regexp library, which uses the syntax of RE2.
|
||||
See the RE2 wiki for allowed syntax: https://github.com/google/re2/wiki/Syntax
|
||||
|
||||
Environment variables:
|
||||
- PATTERN: The regular expression that will match against records (required).
|
||||
- MATCH_VALUE: By default, the regex matches keys, but if set to "true", the regex matches values.
|
||||
|
||||
# By default, no input topic is set here. (You can set it in your deploy command if preferred.)
|
||||
input-topic: ""
|
||||
|
||||
# By default, no output topic is set here. (You can set it in your deploy command if preferred.)
|
||||
output-topic: ""
|
||||
|
||||
# Indicates the specific TinyGo environment used to compile your transform.
|
||||
language: tinygo-no-goroutines
|
||||
|
||||
env:
|
||||
# The PATTERN variable must be provided at deploy time.
|
||||
# Example: --var=PATTERN=".*@example.com"
|
||||
PATTERN: '<required>'
|
||||
@@ -0,0 +1,24 @@
|
||||
# This file configures `rpk` to connect to a remote Redpanda cluster running in the same local network as `rpk`.
|
||||
|
||||
# Configuration for connecting to the Kafka API of the Redpanda cluster.
|
||||
kafka_api:
|
||||
# SASL (Simple Authentication and Security Layer) settings for authentication.
|
||||
sasl:
|
||||
user: superuser # The username used for authentication
|
||||
password: secretpassword # The password associated with the username
|
||||
mechanism: scram-sha-256 # Authentication mechanism; SCRAM-SHA-256 provides secure password-based authentication
|
||||
# List of Kafka brokers in the Redpanda cluster.
|
||||
# These brokers ensure high availability and fault tolerance for Kafka-based communication.
|
||||
brokers:
|
||||
- 127.0.0.1:19092 # Broker 1: Accessible on localhost, port 19092
|
||||
- 127.0.0.1:29092 # Broker 2: Accessible on localhost, port 29092
|
||||
- 127.0.0.1:39092 # Broker 3: Accessible on localhost, port 39092
|
||||
|
||||
# Configuration for connecting to the Redpanda Admin API.
|
||||
# The Admin API allows you to perform administrative tasks such as managing configurations, monitoring, and scaling.
|
||||
admin_api:
|
||||
# List of Admin API endpoints for managing the cluster.
|
||||
addresses:
|
||||
- 127.0.0.1:19644 # Admin API for Broker 1: Accessible on localhost, port 19644
|
||||
- 127.0.0.1:29644 # Admin API for Broker 2: Accessible on localhost, port 29644
|
||||
- 127.0.0.1:39644 # Admin API for Broker 3: Accessible on localhost, port 39644
|
||||
@@ -0,0 +1,37 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"title": "Transactions",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"email": {
|
||||
"type": "string",
|
||||
"format": "email",
|
||||
"description": "The email address of the user involved in the transaction."
|
||||
},
|
||||
"index": {
|
||||
"type": "integer",
|
||||
"description": "A numeric index associated with the transaction."
|
||||
},
|
||||
"price": {
|
||||
"type": "string",
|
||||
"pattern": "^XXX \\d+\\.\\d{6}$",
|
||||
"description": "A string representing the price of the product, including a currency code followed by the amount."
|
||||
},
|
||||
"product_url": {
|
||||
"type": "string",
|
||||
"format": "uri",
|
||||
"description": "A URL that points to the product involved in the transaction."
|
||||
},
|
||||
"timestamp": {
|
||||
"type": "string",
|
||||
"format": "date-time",
|
||||
"description": "The timestamp of when the transaction occurred, formatted in ISO 8601."
|
||||
},
|
||||
"user_id": {
|
||||
"type": "integer",
|
||||
"description": "A numeric identifier for the user."
|
||||
}
|
||||
},
|
||||
"required": ["email", "index", "price", "product_url", "timestamp", "user_id"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
# Transactions Topic Documentation
|
||||
|
||||
This document provides an overview of the `transactions` topic in the Redpanda cluster. The topic is designed to capture autogenerated transaction events with various attributes.
|
||||
|
||||
## Schema Overview
|
||||
|
||||
Each message in the `transactions` topic adheres to the following JSON schema:
|
||||
|
||||
```json
|
||||
{
|
||||
"email": "string",
|
||||
"index": "integer",
|
||||
"price": "string",
|
||||
"product_url": "string",
|
||||
"timestamp": "string",
|
||||
"user_id": "integer"
|
||||
}
|
||||
```
|
||||
|
||||
- **email**: The email address of the user involved in the transaction.
|
||||
- **index**: A numeric index associated with the transaction. This could represent the position or order of the transaction in a sequence.
|
||||
- **price**: A string representing the price of the product. It includes a currency code (e.g., "XXX") followed by the amount.
|
||||
- **product_url**: A URL that points to the product involved in the transaction.
|
||||
- **timestamp**: The timestamp of when the transaction occurred, formatted in ISO 8601.
|
||||
- **user_id**: A numeric identifier for the user. This is typically a unique ID assigned to each user in the system.
|
||||
|
||||
## Example message
|
||||
|
||||
```json
|
||||
{
|
||||
"email": "wzieme@ykczius.edu",
|
||||
"index": 0,
|
||||
"price": "XXX 5651308.100000",
|
||||
"product_url": "http://yjomdta.top/DxvGsCn.php",
|
||||
"timestamp": "2024-08-16T15:51:19.799474084Z",
|
||||
"user_id": 1
|
||||
}
|
||||
```
|
||||
|
||||
## Use cases
|
||||
|
||||
You can use the `transactions` topic for various purposes, including:
|
||||
|
||||
- **Analytics**: Tracking and analyzing user transactions to understand buying behavior, popular products, etc.
|
||||
- **Monitoring**: Observing transaction patterns to detect anomalies, such as unusual spikes or drops in transaction volume.
|
||||
- **Data Processing**: Feeding transaction data into other systems, such as a data warehouse or real-time processing pipelines, for further processing and analysis.
|
||||
@@ -0,0 +1,73 @@
|
||||
= Modify the Wasm Transform in the Quickstart
|
||||
|
||||
This directory contains the Go source code (`transform.go`) for the data transform that is used in the Redpanda Self-Managed quickstart.
|
||||
If you're following the quickstart, you *do not* need to modify or rebuild this code. The Docker Compose configuration automatically deploys a pre-built transform called `regex.wasm`.
|
||||
|
||||
However, if you want to customize the data transform logic, continue reading.
|
||||
|
||||
== Why customize the transform?
|
||||
|
||||
- **Custom filtering**: Filter by a different regex or apply multiple conditions.
|
||||
- **Data manipulation**: Transform records before writing them out. For example, redacting sensitive data or combining fields.
|
||||
- **Extended functionality**: Add advanced logging, error handling, or multi-topic routing.
|
||||
|
||||
== Prerequisites
|
||||
|
||||
You need the following:
|
||||
|
||||
- At least Go 1.20 installed.
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
go version
|
||||
----
|
||||
|
||||
- The Redpanda CLI (`rpk`) installed.
|
||||
|
||||
- A running Redpanda cluster. If you're using the local quickstart with Docker Compose, ensure the cluster is up and running. Or, point `rpk` to another Redpanda environment.
|
||||
|
||||
== Modify and deploy your transform
|
||||
|
||||
. Open link:transform.go[transform.go] and make your changes. For example:
|
||||
+
|
||||
--
|
||||
- Change the regex logic to handle different use cases.
|
||||
- Add environment variables to control new features.
|
||||
- Extend the `doRegexFilter()` function to manipulate records.
|
||||
--
|
||||
|
||||
. Compile your Go code into a `.wasm` file:
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
rpk transform build
|
||||
----
|
||||
+
|
||||
This command compiles your Go source and produces a `.wasm` file that you can deploy to Redpanda.
|
||||
|
||||
. Deploy the new transform.
|
||||
+
|
||||
If your Docker Compose setup already has a service to deploy the transform, you can restart that service.
|
||||
+
|
||||
Otherwise, you can deploy your updated `.wasm` manually using `rpk transform deploy`.
|
||||
|
||||
. Produce messages into the input topic. For example:
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
echo '{"key":"alice@university.edu","value":"test message"}' | rpk topic produce logins
|
||||
----
|
||||
|
||||
. Consume from the output topic. For example:
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
rpk topic consume edu-filtered-domains --num 1
|
||||
----
|
||||
|
||||
== Suggested reading
|
||||
|
||||
- link:https://docs.redpanda.com/current/reference/rpk/[Redpanda `rpk` CLI Reference^].
|
||||
- link:https://docs.redpanda.com/current/develop/data-transforms/build/[Develop Data Transforms^].
|
||||
- https://golang.org/ref/mod[Go Modules^] for managing dependencies and builds in Go.
|
||||
- https://docs.docker.com/compose/[Docker Compose^] for customizing your environment.
|
||||
@@ -0,0 +1,5 @@
|
||||
module regex
|
||||
|
||||
go 1.20
|
||||
|
||||
require github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0
|
||||
@@ -0,0 +1,2 @@
|
||||
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 h1:KxgHJZsHsrT3YX7DMpu/vJN4TZN3KFm1jzrCFLyOepA=
|
||||
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0/go.mod h1:QGgiwwf/BIsD1b7EiyQ/Apzw+RLSpasRDdpOCiefQFQ=
|
||||
Binary file not shown.
@@ -0,0 +1,122 @@
|
||||
package main
|
||||
// This data transform filters records based on a customizable regex pattern.
|
||||
// If a record's key or value
|
||||
// (determined by an environment variable) matches the specified regex,
|
||||
// the record is forwarded to the output.
|
||||
// Otherwise, it is dropped.
|
||||
//
|
||||
// Usage:
|
||||
// 1. Provide the following environment variables in your Docker or configuration setup:
|
||||
// - PATTERN : (required) a regular expression that determines what you want to match.
|
||||
// - MATCH_VALUE : (optional) a boolean to decide whether to check the record value. If false,
|
||||
// the record key is checked. Default is false.
|
||||
//
|
||||
// Example environment variables:
|
||||
// PATTERN=".*\\.edu$"
|
||||
// MATCH_VALUE="true"
|
||||
//
|
||||
// Logs:
|
||||
// This transform logs information about each record and whether it matched.
|
||||
// The logs appear in the _redpanda.transform_logs topic, so you can debug how your records are being processed.
|
||||
//
|
||||
// Build instructions:
|
||||
// go mod tidy
|
||||
// rpk transform build
|
||||
//
|
||||
// For more details on building transforms with the Redpanda SDK, see:
|
||||
// https://docs.redpanda.com/current/develop/data-transforms
|
||||
//
|
||||
|
||||
import (
|
||||
"log"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/redpanda-data/redpanda/src/transform-sdk/go/transform"
|
||||
)
|
||||
|
||||
var (
|
||||
re *regexp.Regexp
|
||||
checkValue bool
|
||||
)
|
||||
|
||||
func isTrueVar(v string) bool {
|
||||
switch strings.ToLower(v) {
|
||||
case "yes", "ok", "1", "true":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// The main() function runs only once at startup. It performs all initialization steps:
|
||||
// - Reads and compiles the regex pattern.
|
||||
// - Determines whether to match on the key or value.
|
||||
// - Registers the doRegexFilter() function to process records.
|
||||
func main() {
|
||||
// Set logging preferences, including timestamp and UTC time.
|
||||
log.SetPrefix("[regex-transform] ")
|
||||
log.SetFlags(log.Ldate | log.Ltime | log.LUTC | log.Lmicroseconds)
|
||||
|
||||
// Start logging the transformation process
|
||||
log.Println("Starting transform...")
|
||||
|
||||
// Read the PATTERN environment variable to get the regex pattern.
|
||||
pattern, ok := os.LookupEnv("PATTERN")
|
||||
if !ok {
|
||||
log.Fatal("Missing PATTERN environment variable")
|
||||
}
|
||||
// Log the regex pattern being used.
|
||||
log.Printf("Using PATTERN: %q\n", pattern)
|
||||
// Compile the regex pattern for later use.
|
||||
re = regexp.MustCompile(pattern)
|
||||
|
||||
// Read the MATCH_VALUE environment variable to determine whether to check the record's value.
|
||||
mk, ok := os.LookupEnv("MATCH_VALUE")
|
||||
checkValue = ok && isTrueVar(mk)
|
||||
log.Printf("MATCH_VALUE set to: %t\n", checkValue)
|
||||
|
||||
log.Println("Initialization complete, waiting for records...")
|
||||
|
||||
// Listen for records to be written, calling doRegexFilter() for each record.
|
||||
transform.OnRecordWritten(doRegexFilter)
|
||||
}
|
||||
|
||||
// The doRegexFilter() function executes each time a new record is written.
|
||||
// It checks whether the record's key or value (based on MATCH_VALUE) matches the compiled regex.
|
||||
// If it matches, the record is forwarded, if not, it's dropped.
|
||||
func doRegexFilter(e transform.WriteEvent, w transform.RecordWriter) error {
|
||||
// This stores the data to be checked (either the key or value).
|
||||
var dataToCheck []byte
|
||||
|
||||
// Depending on the MATCH_VALUE environment variable, decide whether to check the record's key or value.
|
||||
if checkValue {
|
||||
// Use the value of the record if MATCH_VALUE is true.
|
||||
dataToCheck = e.Record().Value
|
||||
log.Printf("Checking record value: %s\n", string(dataToCheck))
|
||||
} else {
|
||||
// Use the key of the record if MATCH_VALUE is false.
|
||||
dataToCheck = e.Record().Key
|
||||
log.Printf("Checking record key: %s\n", string(dataToCheck))
|
||||
}
|
||||
|
||||
// If there is no key or value to check, log and skip the record.
|
||||
if dataToCheck == nil {
|
||||
log.Println("Record has no key/value to check, skipping.")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check if the data matches the regex pattern.
|
||||
pass := re.Match(dataToCheck)
|
||||
if pass {
|
||||
// If the record matches the pattern, log and write the record to the output topic.
|
||||
log.Printf("Record matched pattern, passing through. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
|
||||
return w.Write(e.Record())
|
||||
} else {
|
||||
// If the record does not match the pattern, log and drop the record.
|
||||
log.Printf("Record did not match pattern, dropping. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
|
||||
// Do not write the record if it doesn't match the pattern.
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
# Transform metadata used by the rpk transform build command.
|
||||
# This metadata file tells rpk:
|
||||
# 1) The transform’s display name, which also becomes the base for the .wasm file name.
|
||||
# 2) A brief description of what it does.
|
||||
# 3) Defaults for environment variables.
|
||||
# 4) Input and output topics (if you want to define them here rather than in the deploy command).
|
||||
|
||||
# Human-readable name of the transform. rpk transform build uses this for the generated .wasm file.
|
||||
name: regex
|
||||
|
||||
description: |
|
||||
Filters the input topic to records that only match a regular expression.
|
||||
|
||||
Regular expressions are implemented using Go's regexp library, which uses the syntax of RE2.
|
||||
See the RE2 wiki for allowed syntax: https://github.com/google/re2/wiki/Syntax
|
||||
|
||||
Environment variables:
|
||||
- PATTERN: The regular expression that will match against records (required).
|
||||
- MATCH_VALUE: By default, the regex matches keys, but if set to "true", the regex matches values.
|
||||
|
||||
# By default, no input topic is set here. (You can set it in your deploy command if preferred.)
|
||||
input-topic: ""
|
||||
|
||||
# By default, no output topic is set here. (You can set it in your deploy command if preferred.)
|
||||
output-topic: ""
|
||||
|
||||
# Indicates the specific TinyGo environment used to compile your transform.
|
||||
language: tinygo-no-goroutines
|
||||
|
||||
env:
|
||||
# The PATTERN variable must be provided at deploy time.
|
||||
# Example: --var=PATTERN=".*@example.com"
|
||||
PATTERN: '<required>'
|
||||
@@ -0,0 +1,24 @@
|
||||
# This file configures `rpk` to connect to a remote Redpanda cluster running in the same local network as `rpk`.
|
||||
|
||||
# Configuration for connecting to the Kafka API of the Redpanda cluster.
|
||||
kafka_api:
|
||||
# SASL (Simple Authentication and Security Layer) settings for authentication.
|
||||
sasl:
|
||||
user: superuser # The username used for authentication
|
||||
password: secretpassword # The password associated with the username
|
||||
mechanism: scram-sha-256 # Authentication mechanism; SCRAM-SHA-256 provides secure password-based authentication
|
||||
# List of Kafka brokers in the Redpanda cluster.
|
||||
# These brokers ensure high availability and fault tolerance for Kafka-based communication.
|
||||
brokers:
|
||||
- 127.0.0.1:19092 # Broker 1: Accessible on localhost, port 19092
|
||||
- 127.0.0.1:29092 # Broker 2: Accessible on localhost, port 29092
|
||||
- 127.0.0.1:39092 # Broker 3: Accessible on localhost, port 39092
|
||||
|
||||
# Configuration for connecting to the Redpanda Admin API.
|
||||
# The Admin API allows you to perform administrative tasks such as managing configurations, monitoring, and scaling.
|
||||
admin_api:
|
||||
# List of Admin API endpoints for managing the cluster.
|
||||
addresses:
|
||||
- 127.0.0.1:19644 # Admin API for Broker 1: Accessible on localhost, port 19644
|
||||
- 127.0.0.1:29644 # Admin API for Broker 2: Accessible on localhost, port 29644
|
||||
- 127.0.0.1:39644 # Admin API for Broker 3: Accessible on localhost, port 39644
|
||||
@@ -0,0 +1,37 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"title": "Transactions",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"email": {
|
||||
"type": "string",
|
||||
"format": "email",
|
||||
"description": "The email address of the user involved in the transaction."
|
||||
},
|
||||
"index": {
|
||||
"type": "integer",
|
||||
"description": "A numeric index associated with the transaction."
|
||||
},
|
||||
"price": {
|
||||
"type": "string",
|
||||
"pattern": "^XXX \\d+\\.\\d{6}$",
|
||||
"description": "A string representing the price of the product, including a currency code followed by the amount."
|
||||
},
|
||||
"product_url": {
|
||||
"type": "string",
|
||||
"format": "uri",
|
||||
"description": "A URL that points to the product involved in the transaction."
|
||||
},
|
||||
"timestamp": {
|
||||
"type": "string",
|
||||
"format": "date-time",
|
||||
"description": "The timestamp of when the transaction occurred, formatted in ISO 8601."
|
||||
},
|
||||
"user_id": {
|
||||
"type": "integer",
|
||||
"description": "A numeric identifier for the user."
|
||||
}
|
||||
},
|
||||
"required": ["email", "index", "price", "product_url", "timestamp", "user_id"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
# Transactions Topic Documentation
|
||||
|
||||
This document provides an overview of the `transactions` topic in the Redpanda cluster. The topic is designed to capture autogenerated transaction events with various attributes.
|
||||
|
||||
## Schema Overview
|
||||
|
||||
Each message in the `transactions` topic adheres to the following JSON schema:
|
||||
|
||||
```json
|
||||
{
|
||||
"email": "string",
|
||||
"index": "integer",
|
||||
"price": "string",
|
||||
"product_url": "string",
|
||||
"timestamp": "string",
|
||||
"user_id": "integer"
|
||||
}
|
||||
```
|
||||
|
||||
- **email**: The email address of the user involved in the transaction.
|
||||
- **index**: A numeric index associated with the transaction. This could represent the position or order of the transaction in a sequence.
|
||||
- **price**: A string representing the price of the product. It includes a currency code (e.g., "XXX") followed by the amount.
|
||||
- **product_url**: A URL that points to the product involved in the transaction.
|
||||
- **timestamp**: The timestamp of when the transaction occurred, formatted in ISO 8601.
|
||||
- **user_id**: A numeric identifier for the user. This is typically a unique ID assigned to each user in the system.
|
||||
|
||||
## Example message
|
||||
|
||||
```json
|
||||
{
|
||||
"email": "wzieme@ykczius.edu",
|
||||
"index": 0,
|
||||
"price": "XXX 5651308.100000",
|
||||
"product_url": "http://yjomdta.top/DxvGsCn.php",
|
||||
"timestamp": "2024-08-16T15:51:19.799474084Z",
|
||||
"user_id": 1
|
||||
}
|
||||
```
|
||||
|
||||
## Use cases
|
||||
|
||||
You can use the `transactions` topic for various purposes, including:
|
||||
|
||||
- **Analytics**: Tracking and analyzing user transactions to understand buying behavior, popular products, etc.
|
||||
- **Monitoring**: Observing transaction patterns to detect anomalies, such as unusual spikes or drops in transaction volume.
|
||||
- **Data Processing**: Feeding transaction data into other systems, such as a data warehouse or real-time processing pipelines, for further processing and analysis.
|
||||
@@ -0,0 +1,73 @@
|
||||
= Modify the Wasm Transform in the Quickstart
|
||||
|
||||
This directory contains the Go source code (`transform.go`) for the data transform that is used in the Redpanda Self-Managed quickstart.
|
||||
If you're following the quickstart, you *do not* need to modify or rebuild this code. The Docker Compose configuration automatically deploys a pre-built transform called `regex.wasm`.
|
||||
|
||||
However, if you want to customize the data transform logic, continue reading.
|
||||
|
||||
== Why customize the transform?
|
||||
|
||||
- **Custom filtering**: Filter by a different regex or apply multiple conditions.
|
||||
- **Data manipulation**: Transform records before writing them out. For example, redacting sensitive data or combining fields.
|
||||
- **Extended functionality**: Add advanced logging, error handling, or multi-topic routing.
|
||||
|
||||
== Prerequisites
|
||||
|
||||
You need the following:
|
||||
|
||||
- At least Go 1.20 installed.
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
go version
|
||||
----
|
||||
|
||||
- The Redpanda CLI (`rpk`) installed.
|
||||
|
||||
- A running Redpanda cluster. If you're using the local quickstart with Docker Compose, ensure the cluster is up and running. Or, point `rpk` to another Redpanda environment.
|
||||
|
||||
== Modify and deploy your transform
|
||||
|
||||
. Open link:transform.go[transform.go] and make your changes. For example:
|
||||
+
|
||||
--
|
||||
- Change the regex logic to handle different use cases.
|
||||
- Add environment variables to control new features.
|
||||
- Extend the `doRegexFilter()` function to manipulate records.
|
||||
--
|
||||
|
||||
. Compile your Go code into a `.wasm` file:
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
rpk transform build
|
||||
----
|
||||
+
|
||||
This command compiles your Go source and produces a `.wasm` file that you can deploy to Redpanda.
|
||||
|
||||
. Deploy the new transform.
|
||||
+
|
||||
If your Docker Compose setup already has a service to deploy the transform, you can restart that service.
|
||||
+
|
||||
Otherwise, you can deploy your updated `.wasm` manually using `rpk transform deploy`.
|
||||
|
||||
. Produce messages into the input topic. For example:
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
echo '{"key":"alice@university.edu","value":"test message"}' | rpk topic produce logins
|
||||
----
|
||||
|
||||
. Consume from the output topic. For example:
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
rpk topic consume edu-filtered-domains --num 1
|
||||
----
|
||||
|
||||
== Suggested reading
|
||||
|
||||
- link:https://docs.redpanda.com/current/reference/rpk/[Redpanda `rpk` CLI Reference^].
|
||||
- link:https://docs.redpanda.com/current/develop/data-transforms/build/[Develop Data Transforms^].
|
||||
- https://golang.org/ref/mod[Go Modules^] for managing dependencies and builds in Go.
|
||||
- https://docs.docker.com/compose/[Docker Compose^] for customizing your environment.
|
||||
@@ -0,0 +1,5 @@
|
||||
module regex
|
||||
|
||||
go 1.20
|
||||
|
||||
require github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0
|
||||
@@ -0,0 +1,2 @@
|
||||
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 h1:KxgHJZsHsrT3YX7DMpu/vJN4TZN3KFm1jzrCFLyOepA=
|
||||
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0/go.mod h1:QGgiwwf/BIsD1b7EiyQ/Apzw+RLSpasRDdpOCiefQFQ=
|
||||
Binary file not shown.
@@ -0,0 +1,122 @@
|
||||
package main
|
||||
// This data transform filters records based on a customizable regex pattern.
|
||||
// If a record's key or value
|
||||
// (determined by an environment variable) matches the specified regex,
|
||||
// the record is forwarded to the output.
|
||||
// Otherwise, it is dropped.
|
||||
//
|
||||
// Usage:
|
||||
// 1. Provide the following environment variables in your Docker or configuration setup:
|
||||
// - PATTERN : (required) a regular expression that determines what you want to match.
|
||||
// - MATCH_VALUE : (optional) a boolean to decide whether to check the record value. If false,
|
||||
// the record key is checked. Default is false.
|
||||
//
|
||||
// Example environment variables:
|
||||
// PATTERN=".*\\.edu$"
|
||||
// MATCH_VALUE="true"
|
||||
//
|
||||
// Logs:
|
||||
// This transform logs information about each record and whether it matched.
|
||||
// The logs appear in the _redpanda.transform_logs topic, so you can debug how your records are being processed.
|
||||
//
|
||||
// Build instructions:
|
||||
// go mod tidy
|
||||
// rpk transform build
|
||||
//
|
||||
// For more details on building transforms with the Redpanda SDK, see:
|
||||
// https://docs.redpanda.com/current/develop/data-transforms
|
||||
//
|
||||
|
||||
import (
|
||||
"log"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/redpanda-data/redpanda/src/transform-sdk/go/transform"
|
||||
)
|
||||
|
||||
var (
|
||||
re *regexp.Regexp
|
||||
checkValue bool
|
||||
)
|
||||
|
||||
func isTrueVar(v string) bool {
|
||||
switch strings.ToLower(v) {
|
||||
case "yes", "ok", "1", "true":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// The main() function runs only once at startup. It performs all initialization steps:
|
||||
// - Reads and compiles the regex pattern.
|
||||
// - Determines whether to match on the key or value.
|
||||
// - Registers the doRegexFilter() function to process records.
|
||||
func main() {
|
||||
// Set logging preferences, including timestamp and UTC time.
|
||||
log.SetPrefix("[regex-transform] ")
|
||||
log.SetFlags(log.Ldate | log.Ltime | log.LUTC | log.Lmicroseconds)
|
||||
|
||||
// Start logging the transformation process
|
||||
log.Println("Starting transform...")
|
||||
|
||||
// Read the PATTERN environment variable to get the regex pattern.
|
||||
pattern, ok := os.LookupEnv("PATTERN")
|
||||
if !ok {
|
||||
log.Fatal("Missing PATTERN environment variable")
|
||||
}
|
||||
// Log the regex pattern being used.
|
||||
log.Printf("Using PATTERN: %q\n", pattern)
|
||||
// Compile the regex pattern for later use.
|
||||
re = regexp.MustCompile(pattern)
|
||||
|
||||
// Read the MATCH_VALUE environment variable to determine whether to check the record's value.
|
||||
mk, ok := os.LookupEnv("MATCH_VALUE")
|
||||
checkValue = ok && isTrueVar(mk)
|
||||
log.Printf("MATCH_VALUE set to: %t\n", checkValue)
|
||||
|
||||
log.Println("Initialization complete, waiting for records...")
|
||||
|
||||
// Listen for records to be written, calling doRegexFilter() for each record.
|
||||
transform.OnRecordWritten(doRegexFilter)
|
||||
}
|
||||
|
||||
// The doRegexFilter() function executes each time a new record is written.
|
||||
// It checks whether the record's key or value (based on MATCH_VALUE) matches the compiled regex.
|
||||
// If it matches, the record is forwarded, if not, it's dropped.
|
||||
func doRegexFilter(e transform.WriteEvent, w transform.RecordWriter) error {
|
||||
// This stores the data to be checked (either the key or value).
|
||||
var dataToCheck []byte
|
||||
|
||||
// Depending on the MATCH_VALUE environment variable, decide whether to check the record's key or value.
|
||||
if checkValue {
|
||||
// Use the value of the record if MATCH_VALUE is true.
|
||||
dataToCheck = e.Record().Value
|
||||
log.Printf("Checking record value: %s\n", string(dataToCheck))
|
||||
} else {
|
||||
// Use the key of the record if MATCH_VALUE is false.
|
||||
dataToCheck = e.Record().Key
|
||||
log.Printf("Checking record key: %s\n", string(dataToCheck))
|
||||
}
|
||||
|
||||
// If there is no key or value to check, log and skip the record.
|
||||
if dataToCheck == nil {
|
||||
log.Println("Record has no key/value to check, skipping.")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check if the data matches the regex pattern.
|
||||
pass := re.Match(dataToCheck)
|
||||
if pass {
|
||||
// If the record matches the pattern, log and write the record to the output topic.
|
||||
log.Printf("Record matched pattern, passing through. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
|
||||
return w.Write(e.Record())
|
||||
} else {
|
||||
// If the record does not match the pattern, log and drop the record.
|
||||
log.Printf("Record did not match pattern, dropping. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
|
||||
// Do not write the record if it doesn't match the pattern.
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
# Transform metadata used by the rpk transform build command.
|
||||
# This metadata file tells rpk:
|
||||
# 1) The transform’s display name, which also becomes the base for the .wasm file name.
|
||||
# 2) A brief description of what it does.
|
||||
# 3) Defaults for environment variables.
|
||||
# 4) Input and output topics (if you want to define them here rather than in the deploy command).
|
||||
|
||||
# Human-readable name of the transform. rpk transform build uses this for the generated .wasm file.
|
||||
name: regex
|
||||
|
||||
description: |
|
||||
Filters the input topic to records that only match a regular expression.
|
||||
|
||||
Regular expressions are implemented using Go's regexp library, which uses the syntax of RE2.
|
||||
See the RE2 wiki for allowed syntax: https://github.com/google/re2/wiki/Syntax
|
||||
|
||||
Environment variables:
|
||||
- PATTERN: The regular expression that will match against records (required).
|
||||
- MATCH_VALUE: By default, the regex matches keys, but if set to "true", the regex matches values.
|
||||
|
||||
# By default, no input topic is set here. (You can set it in your deploy command if preferred.)
|
||||
input-topic: ""
|
||||
|
||||
# By default, no output topic is set here. (You can set it in your deploy command if preferred.)
|
||||
output-topic: ""
|
||||
|
||||
# Indicates the specific TinyGo environment used to compile your transform.
|
||||
language: tinygo-no-goroutines
|
||||
|
||||
env:
|
||||
# The PATTERN variable must be provided at deploy time.
|
||||
# Example: --var=PATTERN=".*@example.com"
|
||||
PATTERN: '<required>'
|
||||
@@ -0,0 +1,24 @@
|
||||
# This file configures `rpk` to connect to a remote Redpanda cluster running in the same local network as `rpk`.
|
||||
|
||||
# Configuration for connecting to the Kafka API of the Redpanda cluster.
|
||||
kafka_api:
|
||||
# SASL (Simple Authentication and Security Layer) settings for authentication.
|
||||
sasl:
|
||||
user: superuser # The username used for authentication
|
||||
password: secretpassword # The password associated with the username
|
||||
mechanism: scram-sha-256 # Authentication mechanism; SCRAM-SHA-256 provides secure password-based authentication
|
||||
# List of Kafka brokers in the Redpanda cluster.
|
||||
# These brokers ensure high availability and fault tolerance for Kafka-based communication.
|
||||
brokers:
|
||||
- 127.0.0.1:19092 # Broker 1: Accessible on localhost, port 19092
|
||||
- 127.0.0.1:29092 # Broker 2: Accessible on localhost, port 29092
|
||||
- 127.0.0.1:39092 # Broker 3: Accessible on localhost, port 39092
|
||||
|
||||
# Configuration for connecting to the Redpanda Admin API.
|
||||
# The Admin API allows you to perform administrative tasks such as managing configurations, monitoring, and scaling.
|
||||
admin_api:
|
||||
# List of Admin API endpoints for managing the cluster.
|
||||
addresses:
|
||||
- 127.0.0.1:19644 # Admin API for Broker 1: Accessible on localhost, port 19644
|
||||
- 127.0.0.1:29644 # Admin API for Broker 2: Accessible on localhost, port 29644
|
||||
- 127.0.0.1:39644 # Admin API for Broker 3: Accessible on localhost, port 39644
|
||||
@@ -0,0 +1,37 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"title": "Transactions",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"email": {
|
||||
"type": "string",
|
||||
"format": "email",
|
||||
"description": "The email address of the user involved in the transaction."
|
||||
},
|
||||
"index": {
|
||||
"type": "integer",
|
||||
"description": "A numeric index associated with the transaction."
|
||||
},
|
||||
"price": {
|
||||
"type": "string",
|
||||
"pattern": "^XXX \\d+\\.\\d{6}$",
|
||||
"description": "A string representing the price of the product, including a currency code followed by the amount."
|
||||
},
|
||||
"product_url": {
|
||||
"type": "string",
|
||||
"format": "uri",
|
||||
"description": "A URL that points to the product involved in the transaction."
|
||||
},
|
||||
"timestamp": {
|
||||
"type": "string",
|
||||
"format": "date-time",
|
||||
"description": "The timestamp of when the transaction occurred, formatted in ISO 8601."
|
||||
},
|
||||
"user_id": {
|
||||
"type": "integer",
|
||||
"description": "A numeric identifier for the user."
|
||||
}
|
||||
},
|
||||
"required": ["email", "index", "price", "product_url", "timestamp", "user_id"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
# Transactions Topic Documentation
|
||||
|
||||
This document provides an overview of the `transactions` topic in the Redpanda cluster. The topic is designed to capture autogenerated transaction events with various attributes.
|
||||
|
||||
## Schema Overview
|
||||
|
||||
Each message in the `transactions` topic adheres to the following JSON schema:
|
||||
|
||||
```json
|
||||
{
|
||||
"email": "string",
|
||||
"index": "integer",
|
||||
"price": "string",
|
||||
"product_url": "string",
|
||||
"timestamp": "string",
|
||||
"user_id": "integer"
|
||||
}
|
||||
```
|
||||
|
||||
- **email**: The email address of the user involved in the transaction.
|
||||
- **index**: A numeric index associated with the transaction. This could represent the position or order of the transaction in a sequence.
|
||||
- **price**: A string representing the price of the product. It includes a currency code (e.g., "XXX") followed by the amount.
|
||||
- **product_url**: A URL that points to the product involved in the transaction.
|
||||
- **timestamp**: The timestamp of when the transaction occurred, formatted in ISO 8601.
|
||||
- **user_id**: A numeric identifier for the user. This is typically a unique ID assigned to each user in the system.
|
||||
|
||||
## Example message
|
||||
|
||||
```json
|
||||
{
|
||||
"email": "wzieme@ykczius.edu",
|
||||
"index": 0,
|
||||
"price": "XXX 5651308.100000",
|
||||
"product_url": "http://yjomdta.top/DxvGsCn.php",
|
||||
"timestamp": "2024-08-16T15:51:19.799474084Z",
|
||||
"user_id": 1
|
||||
}
|
||||
```
|
||||
|
||||
## Use cases
|
||||
|
||||
You can use the `transactions` topic for various purposes, including:
|
||||
|
||||
- **Analytics**: Tracking and analyzing user transactions to understand buying behavior, popular products, etc.
|
||||
- **Monitoring**: Observing transaction patterns to detect anomalies, such as unusual spikes or drops in transaction volume.
|
||||
- **Data Processing**: Feeding transaction data into other systems, such as a data warehouse or real-time processing pipelines, for further processing and analysis.
|
||||
@@ -0,0 +1,73 @@
|
||||
= Modify the Wasm Transform in the Quickstart
|
||||
|
||||
This directory contains the Go source code (`transform.go`) for the data transform that is used in the Redpanda Self-Managed quickstart.
|
||||
If you're following the quickstart, you *do not* need to modify or rebuild this code. The Docker Compose configuration automatically deploys a pre-built transform called `regex.wasm`.
|
||||
|
||||
However, if you want to customize the data transform logic, continue reading.
|
||||
|
||||
== Why customize the transform?
|
||||
|
||||
- **Custom filtering**: Filter by a different regex or apply multiple conditions.
|
||||
- **Data manipulation**: Transform records before writing them out. For example, redacting sensitive data or combining fields.
|
||||
- **Extended functionality**: Add advanced logging, error handling, or multi-topic routing.
|
||||
|
||||
== Prerequisites
|
||||
|
||||
You need the following:
|
||||
|
||||
- At least Go 1.20 installed.
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
go version
|
||||
----
|
||||
|
||||
- The Redpanda CLI (`rpk`) installed.
|
||||
|
||||
- A running Redpanda cluster. If you're using the local quickstart with Docker Compose, ensure the cluster is up and running. Or, point `rpk` to another Redpanda environment.
|
||||
|
||||
== Modify and deploy your transform
|
||||
|
||||
. Open link:transform.go[transform.go] and make your changes. For example:
|
||||
+
|
||||
--
|
||||
- Change the regex logic to handle different use cases.
|
||||
- Add environment variables to control new features.
|
||||
- Extend the `doRegexFilter()` function to manipulate records.
|
||||
--
|
||||
|
||||
. Compile your Go code into a `.wasm` file:
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
rpk transform build
|
||||
----
|
||||
+
|
||||
This command compiles your Go source and produces a `.wasm` file that you can deploy to Redpanda.
|
||||
|
||||
. Deploy the new transform.
|
||||
+
|
||||
If your Docker Compose setup already has a service to deploy the transform, you can restart that service.
|
||||
+
|
||||
Otherwise, you can deploy your updated `.wasm` manually using `rpk transform deploy`.
|
||||
|
||||
. Produce messages into the input topic. For example:
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
echo '{"key":"alice@university.edu","value":"test message"}' | rpk topic produce logins
|
||||
----
|
||||
|
||||
. Consume from the output topic. For example:
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
rpk topic consume edu-filtered-domains --num 1
|
||||
----
|
||||
|
||||
== Suggested reading
|
||||
|
||||
- link:https://docs.redpanda.com/current/reference/rpk/[Redpanda `rpk` CLI Reference^].
|
||||
- link:https://docs.redpanda.com/current/develop/data-transforms/build/[Develop Data Transforms^].
|
||||
- https://golang.org/ref/mod[Go Modules^] for managing dependencies and builds in Go.
|
||||
- https://docs.docker.com/compose/[Docker Compose^] for customizing your environment.
|
||||
@@ -0,0 +1,5 @@
|
||||
module regex
|
||||
|
||||
go 1.20
|
||||
|
||||
require github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0
|
||||
@@ -0,0 +1,2 @@
|
||||
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 h1:KxgHJZsHsrT3YX7DMpu/vJN4TZN3KFm1jzrCFLyOepA=
|
||||
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0/go.mod h1:QGgiwwf/BIsD1b7EiyQ/Apzw+RLSpasRDdpOCiefQFQ=
|
||||
Binary file not shown.
@@ -0,0 +1,122 @@
|
||||
package main
|
||||
// This data transform filters records based on a customizable regex pattern.
|
||||
// If a record's key or value
|
||||
// (determined by an environment variable) matches the specified regex,
|
||||
// the record is forwarded to the output.
|
||||
// Otherwise, it is dropped.
|
||||
//
|
||||
// Usage:
|
||||
// 1. Provide the following environment variables in your Docker or configuration setup:
|
||||
// - PATTERN : (required) a regular expression that determines what you want to match.
|
||||
// - MATCH_VALUE : (optional) a boolean to decide whether to check the record value. If false,
|
||||
// the record key is checked. Default is false.
|
||||
//
|
||||
// Example environment variables:
|
||||
// PATTERN=".*\\.edu$"
|
||||
// MATCH_VALUE="true"
|
||||
//
|
||||
// Logs:
|
||||
// This transform logs information about each record and whether it matched.
|
||||
// The logs appear in the _redpanda.transform_logs topic, so you can debug how your records are being processed.
|
||||
//
|
||||
// Build instructions:
|
||||
// go mod tidy
|
||||
// rpk transform build
|
||||
//
|
||||
// For more details on building transforms with the Redpanda SDK, see:
|
||||
// https://docs.redpanda.com/current/develop/data-transforms
|
||||
//
|
||||
|
||||
import (
|
||||
"log"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/redpanda-data/redpanda/src/transform-sdk/go/transform"
|
||||
)
|
||||
|
||||
var (
|
||||
re *regexp.Regexp
|
||||
checkValue bool
|
||||
)
|
||||
|
||||
func isTrueVar(v string) bool {
|
||||
switch strings.ToLower(v) {
|
||||
case "yes", "ok", "1", "true":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// The main() function runs only once at startup. It performs all initialization steps:
|
||||
// - Reads and compiles the regex pattern.
|
||||
// - Determines whether to match on the key or value.
|
||||
// - Registers the doRegexFilter() function to process records.
|
||||
func main() {
|
||||
// Set logging preferences, including timestamp and UTC time.
|
||||
log.SetPrefix("[regex-transform] ")
|
||||
log.SetFlags(log.Ldate | log.Ltime | log.LUTC | log.Lmicroseconds)
|
||||
|
||||
// Start logging the transformation process
|
||||
log.Println("Starting transform...")
|
||||
|
||||
// Read the PATTERN environment variable to get the regex pattern.
|
||||
pattern, ok := os.LookupEnv("PATTERN")
|
||||
if !ok {
|
||||
log.Fatal("Missing PATTERN environment variable")
|
||||
}
|
||||
// Log the regex pattern being used.
|
||||
log.Printf("Using PATTERN: %q\n", pattern)
|
||||
// Compile the regex pattern for later use.
|
||||
re = regexp.MustCompile(pattern)
|
||||
|
||||
// Read the MATCH_VALUE environment variable to determine whether to check the record's value.
|
||||
mk, ok := os.LookupEnv("MATCH_VALUE")
|
||||
checkValue = ok && isTrueVar(mk)
|
||||
log.Printf("MATCH_VALUE set to: %t\n", checkValue)
|
||||
|
||||
log.Println("Initialization complete, waiting for records...")
|
||||
|
||||
// Listen for records to be written, calling doRegexFilter() for each record.
|
||||
transform.OnRecordWritten(doRegexFilter)
|
||||
}
|
||||
|
||||
// The doRegexFilter() function executes each time a new record is written.
|
||||
// It checks whether the record's key or value (based on MATCH_VALUE) matches the compiled regex.
|
||||
// If it matches, the record is forwarded, if not, it's dropped.
|
||||
func doRegexFilter(e transform.WriteEvent, w transform.RecordWriter) error {
|
||||
// This stores the data to be checked (either the key or value).
|
||||
var dataToCheck []byte
|
||||
|
||||
// Depending on the MATCH_VALUE environment variable, decide whether to check the record's key or value.
|
||||
if checkValue {
|
||||
// Use the value of the record if MATCH_VALUE is true.
|
||||
dataToCheck = e.Record().Value
|
||||
log.Printf("Checking record value: %s\n", string(dataToCheck))
|
||||
} else {
|
||||
// Use the key of the record if MATCH_VALUE is false.
|
||||
dataToCheck = e.Record().Key
|
||||
log.Printf("Checking record key: %s\n", string(dataToCheck))
|
||||
}
|
||||
|
||||
// If there is no key or value to check, log and skip the record.
|
||||
if dataToCheck == nil {
|
||||
log.Println("Record has no key/value to check, skipping.")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check if the data matches the regex pattern.
|
||||
pass := re.Match(dataToCheck)
|
||||
if pass {
|
||||
// If the record matches the pattern, log and write the record to the output topic.
|
||||
log.Printf("Record matched pattern, passing through. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
|
||||
return w.Write(e.Record())
|
||||
} else {
|
||||
// If the record does not match the pattern, log and drop the record.
|
||||
log.Printf("Record did not match pattern, dropping. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
|
||||
// Do not write the record if it doesn't match the pattern.
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
# Transform metadata used by the rpk transform build command.
|
||||
# This metadata file tells rpk:
|
||||
# 1) The transform’s display name, which also becomes the base for the .wasm file name.
|
||||
# 2) A brief description of what it does.
|
||||
# 3) Defaults for environment variables.
|
||||
# 4) Input and output topics (if you want to define them here rather than in the deploy command).
|
||||
|
||||
# Human-readable name of the transform. rpk transform build uses this for the generated .wasm file.
|
||||
name: regex
|
||||
|
||||
description: |
|
||||
Filters the input topic to records that only match a regular expression.
|
||||
|
||||
Regular expressions are implemented using Go's regexp library, which uses the syntax of RE2.
|
||||
See the RE2 wiki for allowed syntax: https://github.com/google/re2/wiki/Syntax
|
||||
|
||||
Environment variables:
|
||||
- PATTERN: The regular expression that will match against records (required).
|
||||
- MATCH_VALUE: By default, the regex matches keys, but if set to "true", the regex matches values.
|
||||
|
||||
# By default, no input topic is set here. (You can set it in your deploy command if preferred.)
|
||||
input-topic: ""
|
||||
|
||||
# By default, no output topic is set here. (You can set it in your deploy command if preferred.)
|
||||
output-topic: ""
|
||||
|
||||
# Indicates the specific TinyGo environment used to compile your transform.
|
||||
language: tinygo-no-goroutines
|
||||
|
||||
env:
|
||||
# The PATTERN variable must be provided at deploy time.
|
||||
# Example: --var=PATTERN=".*@example.com"
|
||||
PATTERN: '<required>'
|
||||
@@ -0,0 +1,24 @@
|
||||
# This file configures `rpk` to connect to a remote Redpanda cluster running in the same local network as `rpk`.
|
||||
|
||||
# Configuration for connecting to the Kafka API of the Redpanda cluster.
|
||||
kafka_api:
|
||||
# SASL (Simple Authentication and Security Layer) settings for authentication.
|
||||
sasl:
|
||||
user: superuser # The username used for authentication
|
||||
password: secretpassword # The password associated with the username
|
||||
mechanism: scram-sha-256 # Authentication mechanism; SCRAM-SHA-256 provides secure password-based authentication
|
||||
# List of Kafka brokers in the Redpanda cluster.
|
||||
# These brokers ensure high availability and fault tolerance for Kafka-based communication.
|
||||
brokers:
|
||||
- 127.0.0.1:19092 # Broker 1: Accessible on localhost, port 19092
|
||||
- 127.0.0.1:29092 # Broker 2: Accessible on localhost, port 29092
|
||||
- 127.0.0.1:39092 # Broker 3: Accessible on localhost, port 39092
|
||||
|
||||
# Configuration for connecting to the Redpanda Admin API.
|
||||
# The Admin API allows you to perform administrative tasks such as managing configurations, monitoring, and scaling.
|
||||
admin_api:
|
||||
# List of Admin API endpoints for managing the cluster.
|
||||
addresses:
|
||||
- 127.0.0.1:19644 # Admin API for Broker 1: Accessible on localhost, port 19644
|
||||
- 127.0.0.1:29644 # Admin API for Broker 2: Accessible on localhost, port 29644
|
||||
- 127.0.0.1:39644 # Admin API for Broker 3: Accessible on localhost, port 39644
|
||||
@@ -0,0 +1,37 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"title": "Transactions",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"email": {
|
||||
"type": "string",
|
||||
"format": "email",
|
||||
"description": "The email address of the user involved in the transaction."
|
||||
},
|
||||
"index": {
|
||||
"type": "integer",
|
||||
"description": "A numeric index associated with the transaction."
|
||||
},
|
||||
"price": {
|
||||
"type": "string",
|
||||
"pattern": "^XXX \\d+\\.\\d{6}$",
|
||||
"description": "A string representing the price of the product, including a currency code followed by the amount."
|
||||
},
|
||||
"product_url": {
|
||||
"type": "string",
|
||||
"format": "uri",
|
||||
"description": "A URL that points to the product involved in the transaction."
|
||||
},
|
||||
"timestamp": {
|
||||
"type": "string",
|
||||
"format": "date-time",
|
||||
"description": "The timestamp of when the transaction occurred, formatted in ISO 8601."
|
||||
},
|
||||
"user_id": {
|
||||
"type": "integer",
|
||||
"description": "A numeric identifier for the user."
|
||||
}
|
||||
},
|
||||
"required": ["email", "index", "price", "product_url", "timestamp", "user_id"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
# Transactions Topic Documentation
|
||||
|
||||
This document provides an overview of the `transactions` topic in the Redpanda cluster. The topic is designed to capture autogenerated transaction events with various attributes.
|
||||
|
||||
## Schema Overview
|
||||
|
||||
Each message in the `transactions` topic adheres to the following JSON schema:
|
||||
|
||||
```json
|
||||
{
|
||||
"email": "string",
|
||||
"index": "integer",
|
||||
"price": "string",
|
||||
"product_url": "string",
|
||||
"timestamp": "string",
|
||||
"user_id": "integer"
|
||||
}
|
||||
```
|
||||
|
||||
- **email**: The email address of the user involved in the transaction.
|
||||
- **index**: A numeric index associated with the transaction. This could represent the position or order of the transaction in a sequence.
|
||||
- **price**: A string representing the price of the product. It includes a currency code (e.g., "XXX") followed by the amount.
|
||||
- **product_url**: A URL that points to the product involved in the transaction.
|
||||
- **timestamp**: The timestamp of when the transaction occurred, formatted in ISO 8601.
|
||||
- **user_id**: A numeric identifier for the user. This is typically a unique ID assigned to each user in the system.
|
||||
|
||||
## Example message
|
||||
|
||||
```json
|
||||
{
|
||||
"email": "wzieme@ykczius.edu",
|
||||
"index": 0,
|
||||
"price": "XXX 5651308.100000",
|
||||
"product_url": "http://yjomdta.top/DxvGsCn.php",
|
||||
"timestamp": "2024-08-16T15:51:19.799474084Z",
|
||||
"user_id": 1
|
||||
}
|
||||
```
|
||||
|
||||
## Use cases
|
||||
|
||||
You can use the `transactions` topic for various purposes, including:
|
||||
|
||||
- **Analytics**: Tracking and analyzing user transactions to understand buying behavior, popular products, etc.
|
||||
- **Monitoring**: Observing transaction patterns to detect anomalies, such as unusual spikes or drops in transaction volume.
|
||||
- **Data Processing**: Feeding transaction data into other systems, such as a data warehouse or real-time processing pipelines, for further processing and analysis.
|
||||
@@ -0,0 +1,73 @@
|
||||
= Modify the Wasm Transform in the Quickstart
|
||||
|
||||
This directory contains the Go source code (`transform.go`) for the data transform that is used in the Redpanda Self-Managed quickstart.
|
||||
If you're following the quickstart, you *do not* need to modify or rebuild this code. The Docker Compose configuration automatically deploys a pre-built transform called `regex.wasm`.
|
||||
|
||||
However, if you want to customize the data transform logic, continue reading.
|
||||
|
||||
== Why customize the transform?
|
||||
|
||||
- **Custom filtering**: Filter by a different regex or apply multiple conditions.
|
||||
- **Data manipulation**: Transform records before writing them out. For example, redacting sensitive data or combining fields.
|
||||
- **Extended functionality**: Add advanced logging, error handling, or multi-topic routing.
|
||||
|
||||
== Prerequisites
|
||||
|
||||
You need the following:
|
||||
|
||||
- At least Go 1.20 installed.
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
go version
|
||||
----
|
||||
|
||||
- The Redpanda CLI (`rpk`) installed.
|
||||
|
||||
- A running Redpanda cluster. If you're using the local quickstart with Docker Compose, ensure the cluster is up and running. Or, point `rpk` to another Redpanda environment.
|
||||
|
||||
== Modify and deploy your transform
|
||||
|
||||
. Open link:transform.go[transform.go] and make your changes. For example:
|
||||
+
|
||||
--
|
||||
- Change the regex logic to handle different use cases.
|
||||
- Add environment variables to control new features.
|
||||
- Extend the `doRegexFilter()` function to manipulate records.
|
||||
--
|
||||
|
||||
. Compile your Go code into a `.wasm` file:
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
rpk transform build
|
||||
----
|
||||
+
|
||||
This command compiles your Go source and produces a `.wasm` file that you can deploy to Redpanda.
|
||||
|
||||
. Deploy the new transform.
|
||||
+
|
||||
If your Docker Compose setup already has a service to deploy the transform, you can restart that service.
|
||||
+
|
||||
Otherwise, you can deploy your updated `.wasm` manually using `rpk transform deploy`.
|
||||
|
||||
. Produce messages into the input topic. For example:
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
echo '{"key":"alice@university.edu","value":"test message"}' | rpk topic produce logins
|
||||
----
|
||||
|
||||
. Consume from the output topic. For example:
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
rpk topic consume edu-filtered-domains --num 1
|
||||
----
|
||||
|
||||
== Suggested reading
|
||||
|
||||
- link:https://docs.redpanda.com/current/reference/rpk/[Redpanda `rpk` CLI Reference^].
|
||||
- link:https://docs.redpanda.com/current/develop/data-transforms/build/[Develop Data Transforms^].
|
||||
- https://golang.org/ref/mod[Go Modules^] for managing dependencies and builds in Go.
|
||||
- https://docs.docker.com/compose/[Docker Compose^] for customizing your environment.
|
||||
@@ -0,0 +1,5 @@
|
||||
module regex
|
||||
|
||||
go 1.20
|
||||
|
||||
require github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0
|
||||
@@ -0,0 +1,2 @@
|
||||
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 h1:KxgHJZsHsrT3YX7DMpu/vJN4TZN3KFm1jzrCFLyOepA=
|
||||
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0/go.mod h1:QGgiwwf/BIsD1b7EiyQ/Apzw+RLSpasRDdpOCiefQFQ=
|
||||
Binary file not shown.
@@ -0,0 +1,122 @@
|
||||
package main
|
||||
// This data transform filters records based on a customizable regex pattern.
|
||||
// If a record's key or value
|
||||
// (determined by an environment variable) matches the specified regex,
|
||||
// the record is forwarded to the output.
|
||||
// Otherwise, it is dropped.
|
||||
//
|
||||
// Usage:
|
||||
// 1. Provide the following environment variables in your Docker or configuration setup:
|
||||
// - PATTERN : (required) a regular expression that determines what you want to match.
|
||||
// - MATCH_VALUE : (optional) a boolean to decide whether to check the record value. If false,
|
||||
// the record key is checked. Default is false.
|
||||
//
|
||||
// Example environment variables:
|
||||
// PATTERN=".*\\.edu$"
|
||||
// MATCH_VALUE="true"
|
||||
//
|
||||
// Logs:
|
||||
// This transform logs information about each record and whether it matched.
|
||||
// The logs appear in the _redpanda.transform_logs topic, so you can debug how your records are being processed.
|
||||
//
|
||||
// Build instructions:
|
||||
// go mod tidy
|
||||
// rpk transform build
|
||||
//
|
||||
// For more details on building transforms with the Redpanda SDK, see:
|
||||
// https://docs.redpanda.com/current/develop/data-transforms
|
||||
//
|
||||
|
||||
import (
|
||||
"log"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/redpanda-data/redpanda/src/transform-sdk/go/transform"
|
||||
)
|
||||
|
||||
var (
|
||||
re *regexp.Regexp
|
||||
checkValue bool
|
||||
)
|
||||
|
||||
func isTrueVar(v string) bool {
|
||||
switch strings.ToLower(v) {
|
||||
case "yes", "ok", "1", "true":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// The main() function runs only once at startup. It performs all initialization steps:
|
||||
// - Reads and compiles the regex pattern.
|
||||
// - Determines whether to match on the key or value.
|
||||
// - Registers the doRegexFilter() function to process records.
|
||||
func main() {
|
||||
// Set logging preferences, including timestamp and UTC time.
|
||||
log.SetPrefix("[regex-transform] ")
|
||||
log.SetFlags(log.Ldate | log.Ltime | log.LUTC | log.Lmicroseconds)
|
||||
|
||||
// Start logging the transformation process
|
||||
log.Println("Starting transform...")
|
||||
|
||||
// Read the PATTERN environment variable to get the regex pattern.
|
||||
pattern, ok := os.LookupEnv("PATTERN")
|
||||
if !ok {
|
||||
log.Fatal("Missing PATTERN environment variable")
|
||||
}
|
||||
// Log the regex pattern being used.
|
||||
log.Printf("Using PATTERN: %q\n", pattern)
|
||||
// Compile the regex pattern for later use.
|
||||
re = regexp.MustCompile(pattern)
|
||||
|
||||
// Read the MATCH_VALUE environment variable to determine whether to check the record's value.
|
||||
mk, ok := os.LookupEnv("MATCH_VALUE")
|
||||
checkValue = ok && isTrueVar(mk)
|
||||
log.Printf("MATCH_VALUE set to: %t\n", checkValue)
|
||||
|
||||
log.Println("Initialization complete, waiting for records...")
|
||||
|
||||
// Listen for records to be written, calling doRegexFilter() for each record.
|
||||
transform.OnRecordWritten(doRegexFilter)
|
||||
}
|
||||
|
||||
// The doRegexFilter() function executes each time a new record is written.
|
||||
// It checks whether the record's key or value (based on MATCH_VALUE) matches the compiled regex.
|
||||
// If it matches, the record is forwarded, if not, it's dropped.
|
||||
func doRegexFilter(e transform.WriteEvent, w transform.RecordWriter) error {
|
||||
// This stores the data to be checked (either the key or value).
|
||||
var dataToCheck []byte
|
||||
|
||||
// Depending on the MATCH_VALUE environment variable, decide whether to check the record's key or value.
|
||||
if checkValue {
|
||||
// Use the value of the record if MATCH_VALUE is true.
|
||||
dataToCheck = e.Record().Value
|
||||
log.Printf("Checking record value: %s\n", string(dataToCheck))
|
||||
} else {
|
||||
// Use the key of the record if MATCH_VALUE is false.
|
||||
dataToCheck = e.Record().Key
|
||||
log.Printf("Checking record key: %s\n", string(dataToCheck))
|
||||
}
|
||||
|
||||
// If there is no key or value to check, log and skip the record.
|
||||
if dataToCheck == nil {
|
||||
log.Println("Record has no key/value to check, skipping.")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check if the data matches the regex pattern.
|
||||
pass := re.Match(dataToCheck)
|
||||
if pass {
|
||||
// If the record matches the pattern, log and write the record to the output topic.
|
||||
log.Printf("Record matched pattern, passing through. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
|
||||
return w.Write(e.Record())
|
||||
} else {
|
||||
// If the record does not match the pattern, log and drop the record.
|
||||
log.Printf("Record did not match pattern, dropping. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
|
||||
// Do not write the record if it doesn't match the pattern.
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
# Transform metadata used by the rpk transform build command.
|
||||
# This metadata file tells rpk:
|
||||
# 1) The transform’s display name, which also becomes the base for the .wasm file name.
|
||||
# 2) A brief description of what it does.
|
||||
# 3) Defaults for environment variables.
|
||||
# 4) Input and output topics (if you want to define them here rather than in the deploy command).
|
||||
|
||||
# Human-readable name of the transform. rpk transform build uses this for the generated .wasm file.
|
||||
name: regex
|
||||
|
||||
description: |
|
||||
Filters the input topic to records that only match a regular expression.
|
||||
|
||||
Regular expressions are implemented using Go's regexp library, which uses the syntax of RE2.
|
||||
See the RE2 wiki for allowed syntax: https://github.com/google/re2/wiki/Syntax
|
||||
|
||||
Environment variables:
|
||||
- PATTERN: The regular expression that will match against records (required).
|
||||
- MATCH_VALUE: By default, the regex matches keys, but if set to "true", the regex matches values.
|
||||
|
||||
# By default, no input topic is set here. (You can set it in your deploy command if preferred.)
|
||||
input-topic: ""
|
||||
|
||||
# By default, no output topic is set here. (You can set it in your deploy command if preferred.)
|
||||
output-topic: ""
|
||||
|
||||
# Indicates the specific TinyGo environment used to compile your transform.
|
||||
language: tinygo-no-goroutines
|
||||
|
||||
env:
|
||||
# The PATTERN variable must be provided at deploy time.
|
||||
# Example: --var=PATTERN=".*@example.com"
|
||||
PATTERN: '<required>'
|
||||
Reference in New Issue
Block a user