redpanda using docker-compose

This commit is contained in:
priyanshu111299
2025-10-06 19:52:00 +00:00
parent e003bc130a
commit ff812ab021
108 changed files with 7902 additions and 0 deletions

View File

@@ -0,0 +1,56 @@
# =================================================================
# This file defines initial cluster properties for a Redpanda cluster.
# Some of these settings are intended for quickstart development and evaluation
# and are not suitable for production environments.
#
# For more information on bootstrap files, see:
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#configure-a-bootstrap-file
# =================================================================
#
# Enable SASL authentication for the Kafka and Admin APIs.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#admin_api_require_auth
admin_api_require_auth: true
# At least one superuser is required to be able to create other SASL users
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#superusers
superusers:
- superuser
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_sasl
enable_sasl: true
# Allow topics to be created on first access.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#auto_create_topics_enabled
auto_create_topics_enabled: true
# Enable data transforms.
# https://docs.redpanda.com/current/develop/data-transforms/how-transforms-work/
data_transforms_enabled: true
# Enable audit logging (enterprise feature).
# https://docs.redpanda.com/current/manage/audit-logging/
audit_enabled: true
# Enable Tiered Storage (enterprise feature).
# https://docs.redpanda.com/current/manage/tiered-storage/
cloud_storage_enabled: true
cloud_storage_region: local
cloud_storage_access_key: minio
cloud_storage_secret_key: redpandaTieredStorage7
cloud_storage_api_endpoint: minio
cloud_storage_api_endpoint_port: 9000
cloud_storage_disable_tls: true
cloud_storage_bucket: redpanda
# Forces segments to be uploaded to Tiered Storage faster for the purposes of the quickstart
# https://docs.redpanda.com/current/reference/properties/object-storage-properties/#cloud_storage_segment_max_upload_interval_sec
cloud_storage_segment_max_upload_interval_sec: 60
# Continuous Data Balancing (enterprise feature) continuously monitors your node and rack availability and disk usage. This enables self-healing clusters that dynamically balance partitions, ensuring smooth operations and optimal cluster performance.
# https://docs.redpanda.com/current/manage/cluster-maintenance/continuous-data-balancing/
partition_autobalancing_mode: continuous
# Enable Redpanda to collect consumer group metrics.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_consumer_group_metrics
enable_consumer_group_metrics:
- "group"
- "partition"
- "consumer_lag"
# Lower the interval for the quickstart
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#consumer_group_lag_collection_interval_sec
consumer_group_lag_collection_interval_sec: 60
# Enable Redpanda to collect host metrics.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_host_metrics
enable_host_metrics: true

View File

@@ -0,0 +1,403 @@
name: redpanda-quickstart-multi-broker
networks:
redpanda_network:
driver: bridge
volumes:
redpanda-0: null
redpanda-1: null
redpanda-2: null
minio: null
services:
##################
# Redpanda Brokers #
##################
redpanda-0:
command:
- redpanda
- start
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:19092
# Address the broker advertises to clients that connect to the Kafka API.
# Use the internal addresses to connect to the Redpanda brokers
# from inside the same Docker network.
# Use the external addresses to connect to the Redpanda brokers
# from outside the Docker network.
- --advertise-kafka-addr internal://redpanda-0:9092,external://localhost:19092
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:18082
# Address the broker advertises to clients that connect to the HTTP Proxy.
- --advertise-pandaproxy-addr internal://redpanda-0:8082,external://localhost:18082
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:18081
# Redpanda brokers use the RPC API to communicate with each other internally.
- --rpc-addr redpanda-0:33145
- --advertise-rpc-addr redpanda-0:33145
# Mode dev-container uses well-known configuration properties for development in containers.
- --mode dev-container
# Tells Seastar (the framework Redpanda uses under the hood) to use 1 core on the system.
- --smp 1
- --default-log-level=info
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
container_name: redpanda-0
# Sets the username and password of the bootstrap SCRAM superuser
# See https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#bootstrap-a-user-account
environment:
RP_BOOTSTRAP_USER: "superuser:secretpassword"
volumes:
- redpanda-0:/var/lib/redpanda/data
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
networks:
- redpanda_network
ports:
- 18081:18081
- 18082:18082
- 19092:19092
- 19644:9644
healthcheck:
test: ["CMD", "rpk", "cluster", "info", "-X", "user=superuser", "-X", "pass=secretpassword"]
interval: 10s
timeout: 15s
retries: 10
depends_on:
minio:
condition: service_healthy
redpanda-1:
command:
- redpanda
- start
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:29092
- --advertise-kafka-addr internal://redpanda-1:9092,external://localhost:29092
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:28082
- --advertise-pandaproxy-addr internal://redpanda-1:8082,external://localhost:28082
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:28081
- --rpc-addr redpanda-1:33145
- --advertise-rpc-addr redpanda-1:33145
- --mode dev-container
- --smp 1
- --default-log-level=info
- --seeds redpanda-0:33145
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
container_name: redpanda-1
environment:
RP_BOOTSTRAP_USER: "superuser:secretpassword"
volumes:
- redpanda-1:/var/lib/redpanda/data
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
networks:
- redpanda_network
ports:
- 28081:28081
- 28082:28082
- 29092:29092
- 29644:9644
depends_on:
- redpanda-0
- minio
redpanda-2:
command:
- redpanda
- start
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:39092
- --advertise-kafka-addr internal://redpanda-2:9092,external://localhost:39092
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:38082
- --advertise-pandaproxy-addr internal://redpanda-2:8082,external://localhost:38082
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:38081
- --rpc-addr redpanda-2:33145
- --advertise-rpc-addr redpanda-2:33145
- --mode dev-container
- --smp 1
- --default-log-level=info
- --seeds redpanda-0:33145
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
container_name: redpanda-2
environment:
RP_BOOTSTRAP_USER: "superuser:secretpassword"
volumes:
- redpanda-2:/var/lib/redpanda/data
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
networks:
- redpanda_network
ports:
- 38081:38081
- 38082:38082
- 39092:39092
- 39644:9644
depends_on:
- redpanda-0
- minio
####################
# Redpanda Console #
####################
console:
container_name: redpanda-console
image: docker.redpanda.com/redpandadata/console:v3.2.2
networks:
- redpanda_network
entrypoint: /bin/sh
command: -c 'echo "$$CONSOLE_CONFIG_FILE" > /tmp/config.yml && /app/console'
volumes:
- ./config:/tmp/config/
environment:
CONFIG_FILEPATH: ${CONFIG_FILEPATH:-/tmp/config.yml}
CONSOLE_CONFIG_FILE: |
# Configure a connection to the Redpanda cluster
# See https://docs.redpanda.com/current/console/config/connect-to-redpanda/
kafka:
brokers: ["redpanda-0:9092"]
sasl:
enabled: true
impersonateUser: true
schemaRegistry:
enabled: true
urls: ["http://redpanda-0:8081","http://redpanda-1:8081","http://redpanda-2:8081"]
authentication:
impersonateUser: true
redpanda:
adminApi:
enabled: true
urls: ["http://redpanda-0:9644","http://redpanda-1:9644","http://redpanda-2:9644"]
authentication:
basic:
username: superuser
password: secretpassword
impersonateUser: false
console:
# Configures Redpanda Console to fetch topic documentation from GitHub and display it in the UI.
# See https://docs.redpanda.com/current/console/config/topic-documentation/
topicDocumentation:
enabled: true
git:
enabled: true
repository:
url: https://github.com/redpanda-data/docs
branch: main
baseDirectory: tests/docker-compose
authentication:
jwtSigningKey: vazxnT+ZHtxKslK6QlDGovcYnSjTk/lKMmZ+mHrBVE+YdVDkLgSuP6AszAKe9Gvq
basic:
enabled: true
authorization:
roleBindings:
- roleName: admin
users:
- loginType: basic
name: superuser
ports:
- 8080:8080
depends_on:
redpanda-0:
condition: service_healthy
createtopic:
condition: service_completed_successfully
registerschema:
condition: service_completed_successfully
deploytransform:
condition: service_completed_successfully
####################
# Redpanda Connect #
####################
connect:
container_name: redpanda-connect
image: docker.redpanda.com/redpandadata/connect
networks:
- redpanda_network
entrypoint: /bin/sh
depends_on:
redpanda-0:
condition: service_healthy
command: -c 'echo "$$CONNECT_CFG_FILE" > /tmp/connect.yml; /redpanda-connect -c /tmp/connect.yml'
environment:
# This Redpanda Connect configuration creates fake data,
# processes it, and writes the output to a set of topics.
#
# Input:
# - Uses Redpanda Connect's generate input to generate fake data.
# See https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
# Pipeline:
# - Bloblang mapping to batch each input and map 1 message to 'logins'
# topic, and a random number (1-3) of messages to 'transaction' topic
# - Unarchive processor to parse the JSON array and extract each
# element into its own message.
# See https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
# Output:
# - kafka_franz output to write the messages to the Redpanda brokers.
# See https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
CONNECT_CFG_FILE: |
input:
generate:
interval: 1s
mapping: |
let first_name = fake("first_name")
let last_name = fake("last_name")
root.user_id = counter()
root.name = $$first_name + " " + $$last_name
root.email = ($$first_name.slice(0,1) + $$last_name + "@" + fake("domain_name")).lowercase()
root.ip = fake("ipv4")
root.login_time = now()
pipeline:
processors:
- mapping: |
root = range(0, random_int(min:2, max:4)).map_each(cust -> this)
- unarchive:
format: "json_array"
- mapping: |
if batch_index() == 0 {
meta topic = "logins"
root = this
} else {
meta topic = "transactions"
root.user_id = this.user_id
root.email = this.email
root.index = batch_index() - 1
root.product_url = fake("url")
root.price = fake("amount_with_currency")
root.timestamp = now()
}
output:
kafka_franz:
seed_brokers: [ "redpanda-0:9092" ]
topic: $${! metadata("topic") }
sasl:
- mechanism: SCRAM-SHA-256
password: secretpassword
username: superuser
####################
# rpk container to create the edu-filtered-domains topic #
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-create/
####################
createtopic:
command:
- topic
- create
- edu-filtered-domains
- -X user=superuser
- -X pass=secretpassword
- -X brokers=redpanda-0:9092
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
networks:
- redpanda_network
depends_on:
redpanda-0:
condition: service_healthy
####################
# rpk container to register the schema #
# See https://docs.redpanda.com/current/manage/schema-reg/schema-reg-api/
####################
registerschema:
command:
- registry
- schema
- create
- transactions
- --schema
- /etc/redpanda/transactions-schema.json
- -X user=superuser
- -X pass=secretpassword
- -X registry.hosts=redpanda-0:8081
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
# Mount the local directory that contains your schema to the container.
volumes:
- ./transactions-schema.json:/etc/redpanda/transactions-schema.json
networks:
- redpanda_network
depends_on:
redpanda-0:
condition: service_healthy
####################
# rpk container to deploy a consumer group #
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-consume/
####################
consumergroup:
command:
- topic
- consume
- transactions
- --group
- transactions-consumer
- -X user=superuser
- -X pass=secretpassword
- -X brokers=redpanda-0:9092
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
networks:
- redpanda_network
depends_on:
createtopic:
condition: service_completed_successfully
deploytransform:
condition: service_completed_successfully
####################
# rpk container to deploy the pre-built data transform #
# See https://docs.redpanda.com/current/develop/data-transforms/deploy/
####################
deploytransform:
command:
- transform
- deploy
- --file=/etc/redpanda/regex.wasm
- --name=regex
- --input-topic=logins
- --output-topic=edu-filtered-domains
- --var=PATTERN="[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.edu"
- --var=MATCH_VALUE=true
- -X user=superuser
- -X pass=secretpassword
- -X admin.hosts=redpanda-0:9644
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
volumes:
- ./transform/regex.wasm:/etc/redpanda/regex.wasm
networks:
- redpanda_network
depends_on:
createtopic:
condition: service_completed_successfully
####################
# MinIO for Tiered Storage #
# See https://min.io/
#
# NOTE: MinIO is included in this quickstart for development and evaluation purposes only.
# It is not supported for production deployments of Redpanda.
#
# For production environments, use one of the supported object storage providers:
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/requirements/#object-storage-providers-for-tiered-storage
####################
minio:
container_name: minio
image: minio/minio:RELEASE.2025-05-24T17-08-30Z
command: server --console-address ":9001" /data
ports:
- 9000:9000
- 9001:9001
environment:
MINIO_ROOT_USER: minio
MINIO_ROOT_PASSWORD: redpandaTieredStorage7
MINIO_SERVER_URL: "http://minio:9000"
MINIO_REGION_NAME: local
MINIO_DOMAIN: minio
volumes:
- minio:/data
networks:
redpanda_network:
aliases:
- redpanda.minio
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/ready"]
interval: 10s
timeout: 5s
retries: 3
mc:
depends_on:
minio:
condition: service_healthy
image: minio/mc:RELEASE.2025-05-21T01-59-54Z
container_name: mc
networks:
- redpanda_network
environment:
- AWS_ACCESS_KEY_ID=minio
- AWS_SECRET_ACCESS_KEY=redpandaTieredStorage7
- AWS_REGION=local
entrypoint: >
/bin/sh -c "
until (/usr/bin/mc alias set minio http://minio:9000 minio redpandaTieredStorage7) do echo '...waiting...' && sleep 1; done;
/usr/bin/mc mb minio/redpanda;
/usr/bin/mc policy set public minio/redpanda;
tail -f /dev/null
"

View File

@@ -0,0 +1,77 @@
input:
# Use the 'generate' input
# https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
generate:
# The interval at which new records are generated.
interval: 1s
# The mapping section defines how each generated record is structured.
# The language used here is called Bloblang.
# https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
mapping: |
# Generate a fake first name using the 'first_name' faker function.
let first_name = fake("first_name")
# Generate a fake last name using the 'last_name' faker function.
let last_name = fake("last_name")
# Define possible subscription levels for users.
let subscription_levels = ["Free", "Basic", "Premium"]
# Define possible notification channels for user preferences.
let notifications = ["email", "sms", "push" ]
# Define supported languages for user preferences.
let languages = ["en", "es", "fr", "de", "zh", "jp"]
# Assign a unique user ID using a UUID digit generator.
root.user_id = fake("uuid_digit")
# Assign the generated first name to the 'first_name' field.
root.first_name = $first_name
# Assign the generated last name to the 'last_name' field.
root.last_name = $last_name
# Construct the user's email by combining the first initial, last name, and a fake domain name.
# The email is converted to lowercase for consistency.
root.email = ($first_name.slice(0,1) + $last_name + "@" + fake("domain_name")).lowercase()
# Assign a fake registration date using the 'date' faker function.
root.registration_date = fake("date")
# Assign the current timestamp as the last login time.
root.last_login = now()
# Randomly assign a subscription level by selecting an index from the 'subscription_levels' array.
root.subscription_level = $subscription_levels.index(random_int(min: 0, max: 2))
# Randomly assign a language preference by selecting an index from the 'languages' array.
root.preferences.language = $languages.index(random_int(min: 0, max: 5))
# Randomly assign a notification preference by selecting an index from the 'notifications' array.
root.preferences.notifications = $notifications.index(random_int(min: 0, max: 2))
pipeline:
processors:
- mapping: |
# Set the target topic for the generated records to 'profiles'.
meta topic = "profiles"
# Assign the entire record (root) to be sent to the specified topic.
root = this
output:
# Use the 'kafka_franz' output to send the result back to Redpanda
# https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
kafka_franz:
# Define the list of seed brokers for the Kafka cluster.
seed_brokers: [ "localhost:19092", "localhost:29092", "localhost:39092"]
# Dynamically assign the topic based on the metadata specified in the processors.
# In this case, it resolves to the 'profiles' topic.
topic: ${! metadata("topic") }
# Configure SASL authentication to securely connect to the Kafka brokers.
sasl:
- # Specify the SASL mechanism to use for authentication.
mechanism: SCRAM-SHA-256
# The password for the SASL authentication.
password: secretpassword
# The username for the SASL authentication.
username: superuser

View File

@@ -0,0 +1,56 @@
# =================================================================
# This file defines initial cluster properties for a Redpanda cluster.
# Some of these settings are intended for quickstart development and evaluation
# and are not suitable for production environments.
#
# For more information on bootstrap files, see:
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#configure-a-bootstrap-file
# =================================================================
#
# Enable SASL authentication for the Kafka and Admin APIs.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#admin_api_require_auth
admin_api_require_auth: true
# At least one superuser is required to be able to create other SASL users
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#superusers
superusers:
- superuser
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_sasl
enable_sasl: true
# Allow topics to be created on first access.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#auto_create_topics_enabled
auto_create_topics_enabled: true
# Enable data transforms.
# https://docs.redpanda.com/current/develop/data-transforms/how-transforms-work/
data_transforms_enabled: true
# Enable audit logging (enterprise feature).
# https://docs.redpanda.com/current/manage/audit-logging/
audit_enabled: true
# Enable Tiered Storage (enterprise feature).
# https://docs.redpanda.com/current/manage/tiered-storage/
cloud_storage_enabled: true
cloud_storage_region: local
cloud_storage_access_key: minio
cloud_storage_secret_key: redpandaTieredStorage7
cloud_storage_api_endpoint: minio
cloud_storage_api_endpoint_port: 9000
cloud_storage_disable_tls: true
cloud_storage_bucket: redpanda
# Forces segments to be uploaded to Tiered Storage faster for the purposes of the quickstart
# https://docs.redpanda.com/current/reference/properties/object-storage-properties/#cloud_storage_segment_max_upload_interval_sec
cloud_storage_segment_max_upload_interval_sec: 60
# Continuous Data Balancing (enterprise feature) continuously monitors your node and rack availability and disk usage. This enables self-healing clusters that dynamically balance partitions, ensuring smooth operations and optimal cluster performance.
# https://docs.redpanda.com/current/manage/cluster-maintenance/continuous-data-balancing/
partition_autobalancing_mode: continuous
# Enable Redpanda to collect consumer group metrics.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_consumer_group_metrics
enable_consumer_group_metrics:
- "group"
- "partition"
- "consumer_lag"
# Lower the interval for the quickstart
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#consumer_group_lag_collection_interval_sec
consumer_group_lag_collection_interval_sec: 60
# Enable Redpanda to collect host metrics.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_host_metrics
enable_host_metrics: true

View File

@@ -0,0 +1,403 @@
name: redpanda-quickstart-multi-broker
networks:
redpanda_network:
driver: bridge
volumes:
redpanda-0: null
redpanda-1: null
redpanda-2: null
minio: null
services:
##################
# Redpanda Brokers #
##################
redpanda-0:
command:
- redpanda
- start
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:19092
# Address the broker advertises to clients that connect to the Kafka API.
# Use the internal addresses to connect to the Redpanda brokers
# from inside the same Docker network.
# Use the external addresses to connect to the Redpanda brokers
# from outside the Docker network.
- --advertise-kafka-addr internal://redpanda-0:9092,external://localhost:19092
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:18082
# Address the broker advertises to clients that connect to the HTTP Proxy.
- --advertise-pandaproxy-addr internal://redpanda-0:8082,external://localhost:18082
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:18081
# Redpanda brokers use the RPC API to communicate with each other internally.
- --rpc-addr redpanda-0:33145
- --advertise-rpc-addr redpanda-0:33145
# Mode dev-container uses well-known configuration properties for development in containers.
- --mode dev-container
# Tells Seastar (the framework Redpanda uses under the hood) to use 1 core on the system.
- --smp 1
- --default-log-level=info
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
container_name: redpanda-0
# Sets the username and password of the bootstrap SCRAM superuser
# See https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#bootstrap-a-user-account
environment:
RP_BOOTSTRAP_USER: "superuser:secretpassword"
volumes:
- redpanda-0:/var/lib/redpanda/data
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
networks:
- redpanda_network
ports:
- 18081:18081
- 18082:18082
- 19092:19092
- 19644:9644
healthcheck:
test: ["CMD", "rpk", "cluster", "info", "-X", "user=superuser", "-X", "pass=secretpassword"]
interval: 10s
timeout: 15s
retries: 10
depends_on:
minio:
condition: service_healthy
redpanda-1:
command:
- redpanda
- start
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:29092
- --advertise-kafka-addr internal://redpanda-1:9092,external://localhost:29092
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:28082
- --advertise-pandaproxy-addr internal://redpanda-1:8082,external://localhost:28082
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:28081
- --rpc-addr redpanda-1:33145
- --advertise-rpc-addr redpanda-1:33145
- --mode dev-container
- --smp 1
- --default-log-level=info
- --seeds redpanda-0:33145
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
container_name: redpanda-1
environment:
RP_BOOTSTRAP_USER: "superuser:secretpassword"
volumes:
- redpanda-1:/var/lib/redpanda/data
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
networks:
- redpanda_network
ports:
- 28081:28081
- 28082:28082
- 29092:29092
- 29644:9644
depends_on:
- redpanda-0
- minio
redpanda-2:
command:
- redpanda
- start
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:39092
- --advertise-kafka-addr internal://redpanda-2:9092,external://localhost:39092
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:38082
- --advertise-pandaproxy-addr internal://redpanda-2:8082,external://localhost:38082
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:38081
- --rpc-addr redpanda-2:33145
- --advertise-rpc-addr redpanda-2:33145
- --mode dev-container
- --smp 1
- --default-log-level=info
- --seeds redpanda-0:33145
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
container_name: redpanda-2
environment:
RP_BOOTSTRAP_USER: "superuser:secretpassword"
volumes:
- redpanda-2:/var/lib/redpanda/data
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
networks:
- redpanda_network
ports:
- 38081:38081
- 38082:38082
- 39092:39092
- 39644:9644
depends_on:
- redpanda-0
- minio
####################
# Redpanda Console #
####################
console:
container_name: redpanda-console
image: docker.redpanda.com/redpandadata/console:v3.2.2
networks:
- redpanda_network
entrypoint: /bin/sh
command: -c 'echo "$$CONSOLE_CONFIG_FILE" > /tmp/config.yml && /app/console'
volumes:
- ./config:/tmp/config/
environment:
CONFIG_FILEPATH: ${CONFIG_FILEPATH:-/tmp/config.yml}
CONSOLE_CONFIG_FILE: |
# Configure a connection to the Redpanda cluster
# See https://docs.redpanda.com/current/console/config/connect-to-redpanda/
kafka:
brokers: ["redpanda-0:9092"]
sasl:
enabled: true
impersonateUser: true
schemaRegistry:
enabled: true
urls: ["http://redpanda-0:8081","http://redpanda-1:8081","http://redpanda-2:8081"]
authentication:
impersonateUser: true
redpanda:
adminApi:
enabled: true
urls: ["http://redpanda-0:9644","http://redpanda-1:9644","http://redpanda-2:9644"]
authentication:
basic:
username: superuser
password: secretpassword
impersonateUser: false
console:
# Configures Redpanda Console to fetch topic documentation from GitHub and display it in the UI.
# See https://docs.redpanda.com/current/console/config/topic-documentation/
topicDocumentation:
enabled: true
git:
enabled: true
repository:
url: https://github.com/redpanda-data/docs
branch: main
baseDirectory: tests/docker-compose
authentication:
jwtSigningKey: vazxnT+ZHtxKslK6QlDGovcYnSjTk/lKMmZ+mHrBVE+YdVDkLgSuP6AszAKe9Gvq
basic:
enabled: true
authorization:
roleBindings:
- roleName: admin
users:
- loginType: basic
name: superuser
ports:
- 8080:8080
depends_on:
redpanda-0:
condition: service_healthy
createtopic:
condition: service_completed_successfully
registerschema:
condition: service_completed_successfully
deploytransform:
condition: service_completed_successfully
####################
# Redpanda Connect #
####################
connect:
container_name: redpanda-connect
image: docker.redpanda.com/redpandadata/connect
networks:
- redpanda_network
entrypoint: /bin/sh
depends_on:
redpanda-0:
condition: service_healthy
command: -c 'echo "$$CONNECT_CFG_FILE" > /tmp/connect.yml; /redpanda-connect -c /tmp/connect.yml'
environment:
# This Redpanda Connect configuration creates fake data,
# processes it, and writes the output to a set of topics.
#
# Input:
# - Uses Redpanda Connect's generate input to generate fake data.
# See https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
# Pipeline:
# - Bloblang mapping to batch each input and map 1 message to 'logins'
# topic, and a random number (1-3) of messages to 'transaction' topic
# - Unarchive processor to parse the JSON array and extract each
# element into its own message.
# See https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
# Output:
# - kafka_franz output to write the messages to the Redpanda brokers.
# See https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
CONNECT_CFG_FILE: |
input:
generate:
interval: 1s
mapping: |
let first_name = fake("first_name")
let last_name = fake("last_name")
root.user_id = counter()
root.name = $$first_name + " " + $$last_name
root.email = ($$first_name.slice(0,1) + $$last_name + "@" + fake("domain_name")).lowercase()
root.ip = fake("ipv4")
root.login_time = now()
pipeline:
processors:
- mapping: |
root = range(0, random_int(min:2, max:4)).map_each(cust -> this)
- unarchive:
format: "json_array"
- mapping: |
if batch_index() == 0 {
meta topic = "logins"
root = this
} else {
meta topic = "transactions"
root.user_id = this.user_id
root.email = this.email
root.index = batch_index() - 1
root.product_url = fake("url")
root.price = fake("amount_with_currency")
root.timestamp = now()
}
output:
kafka_franz:
seed_brokers: [ "redpanda-0:9092" ]
topic: $${! metadata("topic") }
sasl:
- mechanism: SCRAM-SHA-256
password: secretpassword
username: superuser
####################
# rpk container to create the edu-filtered-domains topic #
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-create/
####################
createtopic:
command:
- topic
- create
- edu-filtered-domains
- -X user=superuser
- -X pass=secretpassword
- -X brokers=redpanda-0:9092
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
networks:
- redpanda_network
depends_on:
redpanda-0:
condition: service_healthy
####################
# rpk container to register the schema #
# See https://docs.redpanda.com/current/manage/schema-reg/schema-reg-api/
####################
registerschema:
command:
- registry
- schema
- create
- transactions
- --schema
- /etc/redpanda/transactions-schema.json
- -X user=superuser
- -X pass=secretpassword
- -X registry.hosts=redpanda-0:8081
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
# Mount the local directory that contains your schema to the container.
volumes:
- ./transactions-schema.json:/etc/redpanda/transactions-schema.json
networks:
- redpanda_network
depends_on:
redpanda-0:
condition: service_healthy
####################
# rpk container to deploy a consumer group #
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-consume/
####################
consumergroup:
command:
- topic
- consume
- transactions
- --group
- transactions-consumer
- -X user=superuser
- -X pass=secretpassword
- -X brokers=redpanda-0:9092
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
networks:
- redpanda_network
depends_on:
createtopic:
condition: service_completed_successfully
deploytransform:
condition: service_completed_successfully
####################
# rpk container to deploy the pre-built data transform #
# See https://docs.redpanda.com/current/develop/data-transforms/deploy/
####################
deploytransform:
command:
- transform
- deploy
- --file=/etc/redpanda/regex.wasm
- --name=regex
- --input-topic=logins
- --output-topic=edu-filtered-domains
- --var=PATTERN="[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.edu"
- --var=MATCH_VALUE=true
- -X user=superuser
- -X pass=secretpassword
- -X admin.hosts=redpanda-0:9644
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
volumes:
- ./transform/regex.wasm:/etc/redpanda/regex.wasm
networks:
- redpanda_network
depends_on:
createtopic:
condition: service_completed_successfully
####################
# MinIO for Tiered Storage #
# See https://min.io/
#
# NOTE: MinIO is included in this quickstart for development and evaluation purposes only.
# It is not supported for production deployments of Redpanda.
#
# For production environments, use one of the supported object storage providers:
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/requirements/#object-storage-providers-for-tiered-storage
####################
minio:
container_name: minio
image: minio/minio:RELEASE.2025-05-24T17-08-30Z
command: server --console-address ":9001" /data
ports:
- 9000:9000
- 9001:9001
environment:
MINIO_ROOT_USER: minio
MINIO_ROOT_PASSWORD: redpandaTieredStorage7
MINIO_SERVER_URL: "http://minio:9000"
MINIO_REGION_NAME: local
MINIO_DOMAIN: minio
volumes:
- minio:/data
networks:
redpanda_network:
aliases:
- redpanda.minio
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/ready"]
interval: 10s
timeout: 5s
retries: 3
mc:
depends_on:
minio:
condition: service_healthy
image: minio/mc:RELEASE.2025-05-21T01-59-54Z
container_name: mc
networks:
- redpanda_network
environment:
- AWS_ACCESS_KEY_ID=minio
- AWS_SECRET_ACCESS_KEY=redpandaTieredStorage7
- AWS_REGION=local
entrypoint: >
/bin/sh -c "
until (/usr/bin/mc alias set minio http://minio:9000 minio redpandaTieredStorage7) do echo '...waiting...' && sleep 1; done;
/usr/bin/mc mb minio/redpanda;
/usr/bin/mc policy set public minio/redpanda;
tail -f /dev/null
"

View File

@@ -0,0 +1,77 @@
input:
# Use the 'generate' input
# https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
generate:
# The interval at which new records are generated.
interval: 1s
# The mapping section defines how each generated record is structured.
# The language used here is called Bloblang.
# https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
mapping: |
# Generate a fake first name using the 'first_name' faker function.
let first_name = fake("first_name")
# Generate a fake last name using the 'last_name' faker function.
let last_name = fake("last_name")
# Define possible subscription levels for users.
let subscription_levels = ["Free", "Basic", "Premium"]
# Define possible notification channels for user preferences.
let notifications = ["email", "sms", "push" ]
# Define supported languages for user preferences.
let languages = ["en", "es", "fr", "de", "zh", "jp"]
# Assign a unique user ID using a UUID digit generator.
root.user_id = fake("uuid_digit")
# Assign the generated first name to the 'first_name' field.
root.first_name = $first_name
# Assign the generated last name to the 'last_name' field.
root.last_name = $last_name
# Construct the user's email by combining the first initial, last name, and a fake domain name.
# The email is converted to lowercase for consistency.
root.email = ($first_name.slice(0,1) + $last_name + "@" + fake("domain_name")).lowercase()
# Assign a fake registration date using the 'date' faker function.
root.registration_date = fake("date")
# Assign the current timestamp as the last login time.
root.last_login = now()
# Randomly assign a subscription level by selecting an index from the 'subscription_levels' array.
root.subscription_level = $subscription_levels.index(random_int(min: 0, max: 2))
# Randomly assign a language preference by selecting an index from the 'languages' array.
root.preferences.language = $languages.index(random_int(min: 0, max: 5))
# Randomly assign a notification preference by selecting an index from the 'notifications' array.
root.preferences.notifications = $notifications.index(random_int(min: 0, max: 2))
pipeline:
processors:
- mapping: |
# Set the target topic for the generated records to 'profiles'.
meta topic = "profiles"
# Assign the entire record (root) to be sent to the specified topic.
root = this
output:
# Use the 'kafka_franz' output to send the result back to Redpanda
# https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
kafka_franz:
# Define the list of seed brokers for the Kafka cluster.
seed_brokers: [ "localhost:19092", "localhost:29092", "localhost:39092"]
# Dynamically assign the topic based on the metadata specified in the processors.
# In this case, it resolves to the 'profiles' topic.
topic: ${! metadata("topic") }
# Configure SASL authentication to securely connect to the Kafka brokers.
sasl:
- # Specify the SASL mechanism to use for authentication.
mechanism: SCRAM-SHA-256
# The password for the SASL authentication.
password: secretpassword
# The username for the SASL authentication.
username: superuser

View File

@@ -0,0 +1,56 @@
# =================================================================
# This file defines initial cluster properties for a Redpanda cluster.
# Some of these settings are intended for quickstart development and evaluation
# and are not suitable for production environments.
#
# For more information on bootstrap files, see:
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#configure-a-bootstrap-file
# =================================================================
#
# Enable SASL authentication for the Kafka and Admin APIs.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#admin_api_require_auth
admin_api_require_auth: true
# At least one superuser is required to be able to create other SASL users
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#superusers
superusers:
- superuser
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_sasl
enable_sasl: true
# Allow topics to be created on first access.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#auto_create_topics_enabled
auto_create_topics_enabled: true
# Enable data transforms.
# https://docs.redpanda.com/current/develop/data-transforms/how-transforms-work/
data_transforms_enabled: true
# Enable audit logging (enterprise feature).
# https://docs.redpanda.com/current/manage/audit-logging/
audit_enabled: true
# Enable Tiered Storage (enterprise feature).
# https://docs.redpanda.com/current/manage/tiered-storage/
cloud_storage_enabled: true
cloud_storage_region: local
cloud_storage_access_key: minio
cloud_storage_secret_key: redpandaTieredStorage7
cloud_storage_api_endpoint: minio
cloud_storage_api_endpoint_port: 9000
cloud_storage_disable_tls: true
cloud_storage_bucket: redpanda
# Forces segments to be uploaded to Tiered Storage faster for the purposes of the quickstart
# https://docs.redpanda.com/current/reference/properties/object-storage-properties/#cloud_storage_segment_max_upload_interval_sec
cloud_storage_segment_max_upload_interval_sec: 60
# Continuous Data Balancing (enterprise feature) continuously monitors your node and rack availability and disk usage. This enables self-healing clusters that dynamically balance partitions, ensuring smooth operations and optimal cluster performance.
# https://docs.redpanda.com/current/manage/cluster-maintenance/continuous-data-balancing/
partition_autobalancing_mode: continuous
# Enable Redpanda to collect consumer group metrics.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_consumer_group_metrics
enable_consumer_group_metrics:
- "group"
- "partition"
- "consumer_lag"
# Lower the interval for the quickstart
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#consumer_group_lag_collection_interval_sec
consumer_group_lag_collection_interval_sec: 60
# Enable Redpanda to collect host metrics.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_host_metrics
enable_host_metrics: true

View File

@@ -0,0 +1,403 @@
name: redpanda-quickstart-multi-broker
networks:
redpanda_network:
driver: bridge
volumes:
redpanda-0: null
redpanda-1: null
redpanda-2: null
minio: null
services:
##################
# Redpanda Brokers #
##################
redpanda-0:
command:
- redpanda
- start
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:19092
# Address the broker advertises to clients that connect to the Kafka API.
# Use the internal addresses to connect to the Redpanda brokers
# from inside the same Docker network.
# Use the external addresses to connect to the Redpanda brokers
# from outside the Docker network.
- --advertise-kafka-addr internal://redpanda-0:9092,external://localhost:19092
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:18082
# Address the broker advertises to clients that connect to the HTTP Proxy.
- --advertise-pandaproxy-addr internal://redpanda-0:8082,external://localhost:18082
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:18081
# Redpanda brokers use the RPC API to communicate with each other internally.
- --rpc-addr redpanda-0:33145
- --advertise-rpc-addr redpanda-0:33145
# Mode dev-container uses well-known configuration properties for development in containers.
- --mode dev-container
# Tells Seastar (the framework Redpanda uses under the hood) to use 1 core on the system.
- --smp 1
- --default-log-level=info
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
container_name: redpanda-0
# Sets the username and password of the bootstrap SCRAM superuser
# See https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#bootstrap-a-user-account
environment:
RP_BOOTSTRAP_USER: "superuser:secretpassword"
volumes:
- redpanda-0:/var/lib/redpanda/data
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
networks:
- redpanda_network
ports:
- 18081:18081
- 18082:18082
- 19092:19092
- 19644:9644
healthcheck:
test: ["CMD", "rpk", "cluster", "info", "-X", "user=superuser", "-X", "pass=secretpassword"]
interval: 10s
timeout: 15s
retries: 10
depends_on:
minio:
condition: service_healthy
redpanda-1:
command:
- redpanda
- start
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:29092
- --advertise-kafka-addr internal://redpanda-1:9092,external://localhost:29092
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:28082
- --advertise-pandaproxy-addr internal://redpanda-1:8082,external://localhost:28082
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:28081
- --rpc-addr redpanda-1:33145
- --advertise-rpc-addr redpanda-1:33145
- --mode dev-container
- --smp 1
- --default-log-level=info
- --seeds redpanda-0:33145
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
container_name: redpanda-1
environment:
RP_BOOTSTRAP_USER: "superuser:secretpassword"
volumes:
- redpanda-1:/var/lib/redpanda/data
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
networks:
- redpanda_network
ports:
- 28081:28081
- 28082:28082
- 29092:29092
- 29644:9644
depends_on:
- redpanda-0
- minio
redpanda-2:
command:
- redpanda
- start
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:39092
- --advertise-kafka-addr internal://redpanda-2:9092,external://localhost:39092
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:38082
- --advertise-pandaproxy-addr internal://redpanda-2:8082,external://localhost:38082
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:38081
- --rpc-addr redpanda-2:33145
- --advertise-rpc-addr redpanda-2:33145
- --mode dev-container
- --smp 1
- --default-log-level=info
- --seeds redpanda-0:33145
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
container_name: redpanda-2
environment:
RP_BOOTSTRAP_USER: "superuser:secretpassword"
volumes:
- redpanda-2:/var/lib/redpanda/data
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
networks:
- redpanda_network
ports:
- 38081:38081
- 38082:38082
- 39092:39092
- 39644:9644
depends_on:
- redpanda-0
- minio
####################
# Redpanda Console #
####################
console:
container_name: redpanda-console
image: docker.redpanda.com/redpandadata/console:v3.2.2
networks:
- redpanda_network
entrypoint: /bin/sh
command: -c 'echo "$$CONSOLE_CONFIG_FILE" > /tmp/config.yml && /app/console'
volumes:
- ./config:/tmp/config/
environment:
CONFIG_FILEPATH: ${CONFIG_FILEPATH:-/tmp/config.yml}
CONSOLE_CONFIG_FILE: |
# Configure a connection to the Redpanda cluster
# See https://docs.redpanda.com/current/console/config/connect-to-redpanda/
kafka:
brokers: ["redpanda-0:9092"]
sasl:
enabled: true
impersonateUser: true
schemaRegistry:
enabled: true
urls: ["http://redpanda-0:8081","http://redpanda-1:8081","http://redpanda-2:8081"]
authentication:
impersonateUser: true
redpanda:
adminApi:
enabled: true
urls: ["http://redpanda-0:9644","http://redpanda-1:9644","http://redpanda-2:9644"]
authentication:
basic:
username: superuser
password: secretpassword
impersonateUser: false
console:
# Configures Redpanda Console to fetch topic documentation from GitHub and display it in the UI.
# See https://docs.redpanda.com/current/console/config/topic-documentation/
topicDocumentation:
enabled: true
git:
enabled: true
repository:
url: https://github.com/redpanda-data/docs
branch: main
baseDirectory: tests/docker-compose
authentication:
jwtSigningKey: vazxnT+ZHtxKslK6QlDGovcYnSjTk/lKMmZ+mHrBVE+YdVDkLgSuP6AszAKe9Gvq
basic:
enabled: true
authorization:
roleBindings:
- roleName: admin
users:
- loginType: basic
name: superuser
ports:
- 8080:8080
depends_on:
redpanda-0:
condition: service_healthy
createtopic:
condition: service_completed_successfully
registerschema:
condition: service_completed_successfully
deploytransform:
condition: service_completed_successfully
####################
# Redpanda Connect #
####################
connect:
container_name: redpanda-connect
image: docker.redpanda.com/redpandadata/connect
networks:
- redpanda_network
entrypoint: /bin/sh
depends_on:
redpanda-0:
condition: service_healthy
command: -c 'echo "$$CONNECT_CFG_FILE" > /tmp/connect.yml; /redpanda-connect -c /tmp/connect.yml'
environment:
# This Redpanda Connect configuration creates fake data,
# processes it, and writes the output to a set of topics.
#
# Input:
# - Uses Redpanda Connect's generate input to generate fake data.
# See https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
# Pipeline:
# - Bloblang mapping to batch each input and map 1 message to 'logins'
# topic, and a random number (1-3) of messages to 'transaction' topic
# - Unarchive processor to parse the JSON array and extract each
# element into its own message.
# See https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
# Output:
# - kafka_franz output to write the messages to the Redpanda brokers.
# See https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
CONNECT_CFG_FILE: |
input:
generate:
interval: 1s
mapping: |
let first_name = fake("first_name")
let last_name = fake("last_name")
root.user_id = counter()
root.name = $$first_name + " " + $$last_name
root.email = ($$first_name.slice(0,1) + $$last_name + "@" + fake("domain_name")).lowercase()
root.ip = fake("ipv4")
root.login_time = now()
pipeline:
processors:
- mapping: |
root = range(0, random_int(min:2, max:4)).map_each(cust -> this)
- unarchive:
format: "json_array"
- mapping: |
if batch_index() == 0 {
meta topic = "logins"
root = this
} else {
meta topic = "transactions"
root.user_id = this.user_id
root.email = this.email
root.index = batch_index() - 1
root.product_url = fake("url")
root.price = fake("amount_with_currency")
root.timestamp = now()
}
output:
kafka_franz:
seed_brokers: [ "redpanda-0:9092" ]
topic: $${! metadata("topic") }
sasl:
- mechanism: SCRAM-SHA-256
password: secretpassword
username: superuser
####################
# rpk container to create the edu-filtered-domains topic #
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-create/
####################
createtopic:
command:
- topic
- create
- edu-filtered-domains
- -X user=superuser
- -X pass=secretpassword
- -X brokers=redpanda-0:9092
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
networks:
- redpanda_network
depends_on:
redpanda-0:
condition: service_healthy
####################
# rpk container to register the schema #
# See https://docs.redpanda.com/current/manage/schema-reg/schema-reg-api/
####################
registerschema:
command:
- registry
- schema
- create
- transactions
- --schema
- /etc/redpanda/transactions-schema.json
- -X user=superuser
- -X pass=secretpassword
- -X registry.hosts=redpanda-0:8081
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
# Mount the local directory that contains your schema to the container.
volumes:
- ./transactions-schema.json:/etc/redpanda/transactions-schema.json
networks:
- redpanda_network
depends_on:
redpanda-0:
condition: service_healthy
####################
# rpk container to deploy a consumer group #
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-consume/
####################
consumergroup:
command:
- topic
- consume
- transactions
- --group
- transactions-consumer
- -X user=superuser
- -X pass=secretpassword
- -X brokers=redpanda-0:9092
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
networks:
- redpanda_network
depends_on:
createtopic:
condition: service_completed_successfully
deploytransform:
condition: service_completed_successfully
####################
# rpk container to deploy the pre-built data transform #
# See https://docs.redpanda.com/current/develop/data-transforms/deploy/
####################
deploytransform:
command:
- transform
- deploy
- --file=/etc/redpanda/regex.wasm
- --name=regex
- --input-topic=logins
- --output-topic=edu-filtered-domains
- --var=PATTERN="[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.edu"
- --var=MATCH_VALUE=true
- -X user=superuser
- -X pass=secretpassword
- -X admin.hosts=redpanda-0:9644
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
volumes:
- ./transform/regex.wasm:/etc/redpanda/regex.wasm
networks:
- redpanda_network
depends_on:
createtopic:
condition: service_completed_successfully
####################
# MinIO for Tiered Storage #
# See https://min.io/
#
# NOTE: MinIO is included in this quickstart for development and evaluation purposes only.
# It is not supported for production deployments of Redpanda.
#
# For production environments, use one of the supported object storage providers:
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/requirements/#object-storage-providers-for-tiered-storage
####################
minio:
container_name: minio
image: minio/minio:RELEASE.2025-05-24T17-08-30Z
command: server --console-address ":9001" /data
ports:
- 9000:9000
- 9001:9001
environment:
MINIO_ROOT_USER: minio
MINIO_ROOT_PASSWORD: redpandaTieredStorage7
MINIO_SERVER_URL: "http://minio:9000"
MINIO_REGION_NAME: local
MINIO_DOMAIN: minio
volumes:
- minio:/data
networks:
redpanda_network:
aliases:
- redpanda.minio
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/ready"]
interval: 10s
timeout: 5s
retries: 3
mc:
depends_on:
minio:
condition: service_healthy
image: minio/mc:RELEASE.2025-05-21T01-59-54Z
container_name: mc
networks:
- redpanda_network
environment:
- AWS_ACCESS_KEY_ID=minio
- AWS_SECRET_ACCESS_KEY=redpandaTieredStorage7
- AWS_REGION=local
entrypoint: >
/bin/sh -c "
until (/usr/bin/mc alias set minio http://minio:9000 minio redpandaTieredStorage7) do echo '...waiting...' && sleep 1; done;
/usr/bin/mc mb minio/redpanda;
/usr/bin/mc policy set public minio/redpanda;
tail -f /dev/null
"

View File

@@ -0,0 +1,77 @@
input:
# Use the 'generate' input
# https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
generate:
# The interval at which new records are generated.
interval: 1s
# The mapping section defines how each generated record is structured.
# The language used here is called Bloblang.
# https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
mapping: |
# Generate a fake first name using the 'first_name' faker function.
let first_name = fake("first_name")
# Generate a fake last name using the 'last_name' faker function.
let last_name = fake("last_name")
# Define possible subscription levels for users.
let subscription_levels = ["Free", "Basic", "Premium"]
# Define possible notification channels for user preferences.
let notifications = ["email", "sms", "push" ]
# Define supported languages for user preferences.
let languages = ["en", "es", "fr", "de", "zh", "jp"]
# Assign a unique user ID using a UUID digit generator.
root.user_id = fake("uuid_digit")
# Assign the generated first name to the 'first_name' field.
root.first_name = $first_name
# Assign the generated last name to the 'last_name' field.
root.last_name = $last_name
# Construct the user's email by combining the first initial, last name, and a fake domain name.
# The email is converted to lowercase for consistency.
root.email = ($first_name.slice(0,1) + $last_name + "@" + fake("domain_name")).lowercase()
# Assign a fake registration date using the 'date' faker function.
root.registration_date = fake("date")
# Assign the current timestamp as the last login time.
root.last_login = now()
# Randomly assign a subscription level by selecting an index from the 'subscription_levels' array.
root.subscription_level = $subscription_levels.index(random_int(min: 0, max: 2))
# Randomly assign a language preference by selecting an index from the 'languages' array.
root.preferences.language = $languages.index(random_int(min: 0, max: 5))
# Randomly assign a notification preference by selecting an index from the 'notifications' array.
root.preferences.notifications = $notifications.index(random_int(min: 0, max: 2))
pipeline:
processors:
- mapping: |
# Set the target topic for the generated records to 'profiles'.
meta topic = "profiles"
# Assign the entire record (root) to be sent to the specified topic.
root = this
output:
# Use the 'kafka_franz' output to send the result back to Redpanda
# https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
kafka_franz:
# Define the list of seed brokers for the Kafka cluster.
seed_brokers: [ "localhost:19092", "localhost:29092", "localhost:39092"]
# Dynamically assign the topic based on the metadata specified in the processors.
# In this case, it resolves to the 'profiles' topic.
topic: ${! metadata("topic") }
# Configure SASL authentication to securely connect to the Kafka brokers.
sasl:
- # Specify the SASL mechanism to use for authentication.
mechanism: SCRAM-SHA-256
# The password for the SASL authentication.
password: secretpassword
# The username for the SASL authentication.
username: superuser

View File

@@ -0,0 +1,56 @@
# =================================================================
# This file defines initial cluster properties for a Redpanda cluster.
# Some of these settings are intended for quickstart development and evaluation
# and are not suitable for production environments.
#
# For more information on bootstrap files, see:
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#configure-a-bootstrap-file
# =================================================================
#
# Enable SASL authentication for the Kafka and Admin APIs.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#admin_api_require_auth
admin_api_require_auth: true
# At least one superuser is required to be able to create other SASL users
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#superusers
superusers:
- superuser
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_sasl
enable_sasl: true
# Allow topics to be created on first access.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#auto_create_topics_enabled
auto_create_topics_enabled: true
# Enable data transforms.
# https://docs.redpanda.com/current/develop/data-transforms/how-transforms-work/
data_transforms_enabled: true
# Enable audit logging (enterprise feature).
# https://docs.redpanda.com/current/manage/audit-logging/
audit_enabled: true
# Enable Tiered Storage (enterprise feature).
# https://docs.redpanda.com/current/manage/tiered-storage/
cloud_storage_enabled: true
cloud_storage_region: local
cloud_storage_access_key: minio
cloud_storage_secret_key: redpandaTieredStorage7
cloud_storage_api_endpoint: minio
cloud_storage_api_endpoint_port: 9000
cloud_storage_disable_tls: true
cloud_storage_bucket: redpanda
# Forces segments to be uploaded to Tiered Storage faster for the purposes of the quickstart
# https://docs.redpanda.com/current/reference/properties/object-storage-properties/#cloud_storage_segment_max_upload_interval_sec
cloud_storage_segment_max_upload_interval_sec: 60
# Continuous Data Balancing (enterprise feature) continuously monitors your node and rack availability and disk usage. This enables self-healing clusters that dynamically balance partitions, ensuring smooth operations and optimal cluster performance.
# https://docs.redpanda.com/current/manage/cluster-maintenance/continuous-data-balancing/
partition_autobalancing_mode: continuous
# Enable Redpanda to collect consumer group metrics.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_consumer_group_metrics
enable_consumer_group_metrics:
- "group"
- "partition"
- "consumer_lag"
# Lower the interval for the quickstart
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#consumer_group_lag_collection_interval_sec
consumer_group_lag_collection_interval_sec: 60
# Enable Redpanda to collect host metrics.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_host_metrics
enable_host_metrics: true

View File

@@ -0,0 +1,403 @@
name: redpanda-quickstart-multi-broker
networks:
redpanda_network:
driver: bridge
volumes:
redpanda-0: null
redpanda-1: null
redpanda-2: null
minio: null
services:
##################
# Redpanda Brokers #
##################
redpanda-0:
command:
- redpanda
- start
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:19092
# Address the broker advertises to clients that connect to the Kafka API.
# Use the internal addresses to connect to the Redpanda brokers
# from inside the same Docker network.
# Use the external addresses to connect to the Redpanda brokers
# from outside the Docker network.
- --advertise-kafka-addr internal://redpanda-0:9092,external://localhost:19092
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:18082
# Address the broker advertises to clients that connect to the HTTP Proxy.
- --advertise-pandaproxy-addr internal://redpanda-0:8082,external://localhost:18082
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:18081
# Redpanda brokers use the RPC API to communicate with each other internally.
- --rpc-addr redpanda-0:33145
- --advertise-rpc-addr redpanda-0:33145
# Mode dev-container uses well-known configuration properties for development in containers.
- --mode dev-container
# Tells Seastar (the framework Redpanda uses under the hood) to use 1 core on the system.
- --smp 1
- --default-log-level=info
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
container_name: redpanda-0
# Sets the username and password of the bootstrap SCRAM superuser
# See https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#bootstrap-a-user-account
environment:
RP_BOOTSTRAP_USER: "superuser:secretpassword"
volumes:
- redpanda-0:/var/lib/redpanda/data
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
networks:
- redpanda_network
ports:
- 18081:18081
- 18082:18082
- 19092:19092
- 19644:9644
healthcheck:
test: ["CMD", "rpk", "cluster", "info", "-X", "user=superuser", "-X", "pass=secretpassword"]
interval: 10s
timeout: 15s
retries: 10
depends_on:
minio:
condition: service_healthy
redpanda-1:
command:
- redpanda
- start
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:29092
- --advertise-kafka-addr internal://redpanda-1:9092,external://localhost:29092
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:28082
- --advertise-pandaproxy-addr internal://redpanda-1:8082,external://localhost:28082
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:28081
- --rpc-addr redpanda-1:33145
- --advertise-rpc-addr redpanda-1:33145
- --mode dev-container
- --smp 1
- --default-log-level=info
- --seeds redpanda-0:33145
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
container_name: redpanda-1
environment:
RP_BOOTSTRAP_USER: "superuser:secretpassword"
volumes:
- redpanda-1:/var/lib/redpanda/data
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
networks:
- redpanda_network
ports:
- 28081:28081
- 28082:28082
- 29092:29092
- 29644:9644
depends_on:
- redpanda-0
- minio
redpanda-2:
command:
- redpanda
- start
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:39092
- --advertise-kafka-addr internal://redpanda-2:9092,external://localhost:39092
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:38082
- --advertise-pandaproxy-addr internal://redpanda-2:8082,external://localhost:38082
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:38081
- --rpc-addr redpanda-2:33145
- --advertise-rpc-addr redpanda-2:33145
- --mode dev-container
- --smp 1
- --default-log-level=info
- --seeds redpanda-0:33145
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
container_name: redpanda-2
environment:
RP_BOOTSTRAP_USER: "superuser:secretpassword"
volumes:
- redpanda-2:/var/lib/redpanda/data
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
networks:
- redpanda_network
ports:
- 38081:38081
- 38082:38082
- 39092:39092
- 39644:9644
depends_on:
- redpanda-0
- minio
####################
# Redpanda Console #
####################
console:
container_name: redpanda-console
image: docker.redpanda.com/redpandadata/console:v3.2.2
networks:
- redpanda_network
entrypoint: /bin/sh
command: -c 'echo "$$CONSOLE_CONFIG_FILE" > /tmp/config.yml && /app/console'
volumes:
- ./config:/tmp/config/
environment:
CONFIG_FILEPATH: ${CONFIG_FILEPATH:-/tmp/config.yml}
CONSOLE_CONFIG_FILE: |
# Configure a connection to the Redpanda cluster
# See https://docs.redpanda.com/current/console/config/connect-to-redpanda/
kafka:
brokers: ["redpanda-0:9092"]
sasl:
enabled: true
impersonateUser: true
schemaRegistry:
enabled: true
urls: ["http://redpanda-0:8081","http://redpanda-1:8081","http://redpanda-2:8081"]
authentication:
impersonateUser: true
redpanda:
adminApi:
enabled: true
urls: ["http://redpanda-0:9644","http://redpanda-1:9644","http://redpanda-2:9644"]
authentication:
basic:
username: superuser
password: secretpassword
impersonateUser: false
console:
# Configures Redpanda Console to fetch topic documentation from GitHub and display it in the UI.
# See https://docs.redpanda.com/current/console/config/topic-documentation/
topicDocumentation:
enabled: true
git:
enabled: true
repository:
url: https://github.com/redpanda-data/docs
branch: main
baseDirectory: tests/docker-compose
authentication:
jwtSigningKey: vazxnT+ZHtxKslK6QlDGovcYnSjTk/lKMmZ+mHrBVE+YdVDkLgSuP6AszAKe9Gvq
basic:
enabled: true
authorization:
roleBindings:
- roleName: admin
users:
- loginType: basic
name: superuser
ports:
- 8080:8080
depends_on:
redpanda-0:
condition: service_healthy
createtopic:
condition: service_completed_successfully
registerschema:
condition: service_completed_successfully
deploytransform:
condition: service_completed_successfully
####################
# Redpanda Connect #
####################
connect:
container_name: redpanda-connect
image: docker.redpanda.com/redpandadata/connect
networks:
- redpanda_network
entrypoint: /bin/sh
depends_on:
redpanda-0:
condition: service_healthy
command: -c 'echo "$$CONNECT_CFG_FILE" > /tmp/connect.yml; /redpanda-connect -c /tmp/connect.yml'
environment:
# This Redpanda Connect configuration creates fake data,
# processes it, and writes the output to a set of topics.
#
# Input:
# - Uses Redpanda Connect's generate input to generate fake data.
# See https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
# Pipeline:
# - Bloblang mapping to batch each input and map 1 message to 'logins'
# topic, and a random number (1-3) of messages to 'transaction' topic
# - Unarchive processor to parse the JSON array and extract each
# element into its own message.
# See https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
# Output:
# - kafka_franz output to write the messages to the Redpanda brokers.
# See https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
CONNECT_CFG_FILE: |
input:
generate:
interval: 1s
mapping: |
let first_name = fake("first_name")
let last_name = fake("last_name")
root.user_id = counter()
root.name = $$first_name + " " + $$last_name
root.email = ($$first_name.slice(0,1) + $$last_name + "@" + fake("domain_name")).lowercase()
root.ip = fake("ipv4")
root.login_time = now()
pipeline:
processors:
- mapping: |
root = range(0, random_int(min:2, max:4)).map_each(cust -> this)
- unarchive:
format: "json_array"
- mapping: |
if batch_index() == 0 {
meta topic = "logins"
root = this
} else {
meta topic = "transactions"
root.user_id = this.user_id
root.email = this.email
root.index = batch_index() - 1
root.product_url = fake("url")
root.price = fake("amount_with_currency")
root.timestamp = now()
}
output:
kafka_franz:
seed_brokers: [ "redpanda-0:9092" ]
topic: $${! metadata("topic") }
sasl:
- mechanism: SCRAM-SHA-256
password: secretpassword
username: superuser
####################
# rpk container to create the edu-filtered-domains topic #
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-create/
####################
createtopic:
command:
- topic
- create
- edu-filtered-domains
- -X user=superuser
- -X pass=secretpassword
- -X brokers=redpanda-0:9092
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
networks:
- redpanda_network
depends_on:
redpanda-0:
condition: service_healthy
####################
# rpk container to register the schema #
# See https://docs.redpanda.com/current/manage/schema-reg/schema-reg-api/
####################
registerschema:
command:
- registry
- schema
- create
- transactions
- --schema
- /etc/redpanda/transactions-schema.json
- -X user=superuser
- -X pass=secretpassword
- -X registry.hosts=redpanda-0:8081
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
# Mount the local directory that contains your schema to the container.
volumes:
- ./transactions-schema.json:/etc/redpanda/transactions-schema.json
networks:
- redpanda_network
depends_on:
redpanda-0:
condition: service_healthy
####################
# rpk container to deploy a consumer group #
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-consume/
####################
consumergroup:
command:
- topic
- consume
- transactions
- --group
- transactions-consumer
- -X user=superuser
- -X pass=secretpassword
- -X brokers=redpanda-0:9092
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
networks:
- redpanda_network
depends_on:
createtopic:
condition: service_completed_successfully
deploytransform:
condition: service_completed_successfully
####################
# rpk container to deploy the pre-built data transform #
# See https://docs.redpanda.com/current/develop/data-transforms/deploy/
####################
deploytransform:
command:
- transform
- deploy
- --file=/etc/redpanda/regex.wasm
- --name=regex
- --input-topic=logins
- --output-topic=edu-filtered-domains
- --var=PATTERN="[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.edu"
- --var=MATCH_VALUE=true
- -X user=superuser
- -X pass=secretpassword
- -X admin.hosts=redpanda-0:9644
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
volumes:
- ./transform/regex.wasm:/etc/redpanda/regex.wasm
networks:
- redpanda_network
depends_on:
createtopic:
condition: service_completed_successfully
####################
# MinIO for Tiered Storage #
# See https://min.io/
#
# NOTE: MinIO is included in this quickstart for development and evaluation purposes only.
# It is not supported for production deployments of Redpanda.
#
# For production environments, use one of the supported object storage providers:
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/requirements/#object-storage-providers-for-tiered-storage
####################
minio:
container_name: minio
image: minio/minio:RELEASE.2025-05-24T17-08-30Z
command: server --console-address ":9001" /data
ports:
- 9000:9000
- 9001:9001
environment:
MINIO_ROOT_USER: minio
MINIO_ROOT_PASSWORD: redpandaTieredStorage7
MINIO_SERVER_URL: "http://minio:9000"
MINIO_REGION_NAME: local
MINIO_DOMAIN: minio
volumes:
- minio:/data
networks:
redpanda_network:
aliases:
- redpanda.minio
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/ready"]
interval: 10s
timeout: 5s
retries: 3
mc:
depends_on:
minio:
condition: service_healthy
image: minio/mc:RELEASE.2025-05-21T01-59-54Z
container_name: mc
networks:
- redpanda_network
environment:
- AWS_ACCESS_KEY_ID=minio
- AWS_SECRET_ACCESS_KEY=redpandaTieredStorage7
- AWS_REGION=local
entrypoint: >
/bin/sh -c "
until (/usr/bin/mc alias set minio http://minio:9000 minio redpandaTieredStorage7) do echo '...waiting...' && sleep 1; done;
/usr/bin/mc mb minio/redpanda;
/usr/bin/mc policy set public minio/redpanda;
tail -f /dev/null
"

View File

@@ -0,0 +1,77 @@
input:
# Use the 'generate' input
# https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
generate:
# The interval at which new records are generated.
interval: 1s
# The mapping section defines how each generated record is structured.
# The language used here is called Bloblang.
# https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
mapping: |
# Generate a fake first name using the 'first_name' faker function.
let first_name = fake("first_name")
# Generate a fake last name using the 'last_name' faker function.
let last_name = fake("last_name")
# Define possible subscription levels for users.
let subscription_levels = ["Free", "Basic", "Premium"]
# Define possible notification channels for user preferences.
let notifications = ["email", "sms", "push" ]
# Define supported languages for user preferences.
let languages = ["en", "es", "fr", "de", "zh", "jp"]
# Assign a unique user ID using a UUID digit generator.
root.user_id = fake("uuid_digit")
# Assign the generated first name to the 'first_name' field.
root.first_name = $first_name
# Assign the generated last name to the 'last_name' field.
root.last_name = $last_name
# Construct the user's email by combining the first initial, last name, and a fake domain name.
# The email is converted to lowercase for consistency.
root.email = ($first_name.slice(0,1) + $last_name + "@" + fake("domain_name")).lowercase()
# Assign a fake registration date using the 'date' faker function.
root.registration_date = fake("date")
# Assign the current timestamp as the last login time.
root.last_login = now()
# Randomly assign a subscription level by selecting an index from the 'subscription_levels' array.
root.subscription_level = $subscription_levels.index(random_int(min: 0, max: 2))
# Randomly assign a language preference by selecting an index from the 'languages' array.
root.preferences.language = $languages.index(random_int(min: 0, max: 5))
# Randomly assign a notification preference by selecting an index from the 'notifications' array.
root.preferences.notifications = $notifications.index(random_int(min: 0, max: 2))
pipeline:
processors:
- mapping: |
# Set the target topic for the generated records to 'profiles'.
meta topic = "profiles"
# Assign the entire record (root) to be sent to the specified topic.
root = this
output:
# Use the 'kafka_franz' output to send the result back to Redpanda
# https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
kafka_franz:
# Define the list of seed brokers for the Kafka cluster.
seed_brokers: [ "localhost:19092", "localhost:29092", "localhost:39092"]
# Dynamically assign the topic based on the metadata specified in the processors.
# In this case, it resolves to the 'profiles' topic.
topic: ${! metadata("topic") }
# Configure SASL authentication to securely connect to the Kafka brokers.
sasl:
- # Specify the SASL mechanism to use for authentication.
mechanism: SCRAM-SHA-256
# The password for the SASL authentication.
password: secretpassword
# The username for the SASL authentication.
username: superuser

View File

@@ -0,0 +1,56 @@
# =================================================================
# This file defines initial cluster properties for a Redpanda cluster.
# Some of these settings are intended for quickstart development and evaluation
# and are not suitable for production environments.
#
# For more information on bootstrap files, see:
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#configure-a-bootstrap-file
# =================================================================
#
# Enable SASL authentication for the Kafka and Admin APIs.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#admin_api_require_auth
admin_api_require_auth: true
# At least one superuser is required to be able to create other SASL users
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#superusers
superusers:
- superuser
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_sasl
enable_sasl: true
# Allow topics to be created on first access.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#auto_create_topics_enabled
auto_create_topics_enabled: true
# Enable data transforms.
# https://docs.redpanda.com/current/develop/data-transforms/how-transforms-work/
data_transforms_enabled: true
# Enable audit logging (enterprise feature).
# https://docs.redpanda.com/current/manage/audit-logging/
audit_enabled: true
# Enable Tiered Storage (enterprise feature).
# https://docs.redpanda.com/current/manage/tiered-storage/
cloud_storage_enabled: true
cloud_storage_region: local
cloud_storage_access_key: minio
cloud_storage_secret_key: redpandaTieredStorage7
cloud_storage_api_endpoint: minio
cloud_storage_api_endpoint_port: 9000
cloud_storage_disable_tls: true
cloud_storage_bucket: redpanda
# Forces segments to be uploaded to Tiered Storage faster for the purposes of the quickstart
# https://docs.redpanda.com/current/reference/properties/object-storage-properties/#cloud_storage_segment_max_upload_interval_sec
cloud_storage_segment_max_upload_interval_sec: 60
# Continuous Data Balancing (enterprise feature) continuously monitors your node and rack availability and disk usage. This enables self-healing clusters that dynamically balance partitions, ensuring smooth operations and optimal cluster performance.
# https://docs.redpanda.com/current/manage/cluster-maintenance/continuous-data-balancing/
partition_autobalancing_mode: continuous
# Enable Redpanda to collect consumer group metrics.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_consumer_group_metrics
enable_consumer_group_metrics:
- "group"
- "partition"
- "consumer_lag"
# Lower the interval for the quickstart
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#consumer_group_lag_collection_interval_sec
consumer_group_lag_collection_interval_sec: 60
# Enable Redpanda to collect host metrics.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_host_metrics
enable_host_metrics: true

View File

@@ -0,0 +1,403 @@
name: redpanda-quickstart-multi-broker
networks:
redpanda_network:
driver: bridge
volumes:
redpanda-0: null
redpanda-1: null
redpanda-2: null
minio: null
services:
##################
# Redpanda Brokers #
##################
redpanda-0:
command:
- redpanda
- start
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:19092
# Address the broker advertises to clients that connect to the Kafka API.
# Use the internal addresses to connect to the Redpanda brokers
# from inside the same Docker network.
# Use the external addresses to connect to the Redpanda brokers
# from outside the Docker network.
- --advertise-kafka-addr internal://redpanda-0:9092,external://localhost:19092
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:18082
# Address the broker advertises to clients that connect to the HTTP Proxy.
- --advertise-pandaproxy-addr internal://redpanda-0:8082,external://localhost:18082
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:18081
# Redpanda brokers use the RPC API to communicate with each other internally.
- --rpc-addr redpanda-0:33145
- --advertise-rpc-addr redpanda-0:33145
# Mode dev-container uses well-known configuration properties for development in containers.
- --mode dev-container
# Tells Seastar (the framework Redpanda uses under the hood) to use 1 core on the system.
- --smp 1
- --default-log-level=info
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
container_name: redpanda-0
# Sets the username and password of the bootstrap SCRAM superuser
# See https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#bootstrap-a-user-account
environment:
RP_BOOTSTRAP_USER: "superuser:secretpassword"
volumes:
- redpanda-0:/var/lib/redpanda/data
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
networks:
- redpanda_network
ports:
- 18081:18081
- 18082:18082
- 19092:19092
- 19644:9644
healthcheck:
test: ["CMD", "rpk", "cluster", "info", "-X", "user=superuser", "-X", "pass=secretpassword"]
interval: 10s
timeout: 15s
retries: 10
depends_on:
minio:
condition: service_healthy
redpanda-1:
command:
- redpanda
- start
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:29092
- --advertise-kafka-addr internal://redpanda-1:9092,external://localhost:29092
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:28082
- --advertise-pandaproxy-addr internal://redpanda-1:8082,external://localhost:28082
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:28081
- --rpc-addr redpanda-1:33145
- --advertise-rpc-addr redpanda-1:33145
- --mode dev-container
- --smp 1
- --default-log-level=info
- --seeds redpanda-0:33145
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
container_name: redpanda-1
environment:
RP_BOOTSTRAP_USER: "superuser:secretpassword"
volumes:
- redpanda-1:/var/lib/redpanda/data
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
networks:
- redpanda_network
ports:
- 28081:28081
- 28082:28082
- 29092:29092
- 29644:9644
depends_on:
- redpanda-0
- minio
redpanda-2:
command:
- redpanda
- start
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:39092
- --advertise-kafka-addr internal://redpanda-2:9092,external://localhost:39092
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:38082
- --advertise-pandaproxy-addr internal://redpanda-2:8082,external://localhost:38082
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:38081
- --rpc-addr redpanda-2:33145
- --advertise-rpc-addr redpanda-2:33145
- --mode dev-container
- --smp 1
- --default-log-level=info
- --seeds redpanda-0:33145
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
container_name: redpanda-2
environment:
RP_BOOTSTRAP_USER: "superuser:secretpassword"
volumes:
- redpanda-2:/var/lib/redpanda/data
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
networks:
- redpanda_network
ports:
- 38081:38081
- 38082:38082
- 39092:39092
- 39644:9644
depends_on:
- redpanda-0
- minio
####################
# Redpanda Console #
####################
console:
container_name: redpanda-console
image: docker.redpanda.com/redpandadata/console:v3.2.2
networks:
- redpanda_network
entrypoint: /bin/sh
command: -c 'echo "$$CONSOLE_CONFIG_FILE" > /tmp/config.yml && /app/console'
volumes:
- ./config:/tmp/config/
environment:
CONFIG_FILEPATH: ${CONFIG_FILEPATH:-/tmp/config.yml}
CONSOLE_CONFIG_FILE: |
# Configure a connection to the Redpanda cluster
# See https://docs.redpanda.com/current/console/config/connect-to-redpanda/
kafka:
brokers: ["redpanda-0:9092"]
sasl:
enabled: true
impersonateUser: true
schemaRegistry:
enabled: true
urls: ["http://redpanda-0:8081","http://redpanda-1:8081","http://redpanda-2:8081"]
authentication:
impersonateUser: true
redpanda:
adminApi:
enabled: true
urls: ["http://redpanda-0:9644","http://redpanda-1:9644","http://redpanda-2:9644"]
authentication:
basic:
username: superuser
password: secretpassword
impersonateUser: false
console:
# Configures Redpanda Console to fetch topic documentation from GitHub and display it in the UI.
# See https://docs.redpanda.com/current/console/config/topic-documentation/
topicDocumentation:
enabled: true
git:
enabled: true
repository:
url: https://github.com/redpanda-data/docs
branch: main
baseDirectory: tests/docker-compose
authentication:
jwtSigningKey: vazxnT+ZHtxKslK6QlDGovcYnSjTk/lKMmZ+mHrBVE+YdVDkLgSuP6AszAKe9Gvq
basic:
enabled: true
authorization:
roleBindings:
- roleName: admin
users:
- loginType: basic
name: superuser
ports:
- 8080:8080
depends_on:
redpanda-0:
condition: service_healthy
createtopic:
condition: service_completed_successfully
registerschema:
condition: service_completed_successfully
deploytransform:
condition: service_completed_successfully
####################
# Redpanda Connect #
####################
connect:
container_name: redpanda-connect
image: docker.redpanda.com/redpandadata/connect
networks:
- redpanda_network
entrypoint: /bin/sh
depends_on:
redpanda-0:
condition: service_healthy
command: -c 'echo "$$CONNECT_CFG_FILE" > /tmp/connect.yml; /redpanda-connect -c /tmp/connect.yml'
environment:
# This Redpanda Connect configuration creates fake data,
# processes it, and writes the output to a set of topics.
#
# Input:
# - Uses Redpanda Connect's generate input to generate fake data.
# See https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
# Pipeline:
# - Bloblang mapping to batch each input and map 1 message to 'logins'
# topic, and a random number (1-3) of messages to 'transaction' topic
# - Unarchive processor to parse the JSON array and extract each
# element into its own message.
# See https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
# Output:
# - kafka_franz output to write the messages to the Redpanda brokers.
# See https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
CONNECT_CFG_FILE: |
input:
generate:
interval: 1s
mapping: |
let first_name = fake("first_name")
let last_name = fake("last_name")
root.user_id = counter()
root.name = $$first_name + " " + $$last_name
root.email = ($$first_name.slice(0,1) + $$last_name + "@" + fake("domain_name")).lowercase()
root.ip = fake("ipv4")
root.login_time = now()
pipeline:
processors:
- mapping: |
root = range(0, random_int(min:2, max:4)).map_each(cust -> this)
- unarchive:
format: "json_array"
- mapping: |
if batch_index() == 0 {
meta topic = "logins"
root = this
} else {
meta topic = "transactions"
root.user_id = this.user_id
root.email = this.email
root.index = batch_index() - 1
root.product_url = fake("url")
root.price = fake("amount_with_currency")
root.timestamp = now()
}
output:
kafka_franz:
seed_brokers: [ "redpanda-0:9092" ]
topic: $${! metadata("topic") }
sasl:
- mechanism: SCRAM-SHA-256
password: secretpassword
username: superuser
####################
# rpk container to create the edu-filtered-domains topic #
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-create/
####################
createtopic:
command:
- topic
- create
- edu-filtered-domains
- -X user=superuser
- -X pass=secretpassword
- -X brokers=redpanda-0:9092
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
networks:
- redpanda_network
depends_on:
redpanda-0:
condition: service_healthy
####################
# rpk container to register the schema #
# See https://docs.redpanda.com/current/manage/schema-reg/schema-reg-api/
####################
registerschema:
command:
- registry
- schema
- create
- transactions
- --schema
- /etc/redpanda/transactions-schema.json
- -X user=superuser
- -X pass=secretpassword
- -X registry.hosts=redpanda-0:8081
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
# Mount the local directory that contains your schema to the container.
volumes:
- ./transactions-schema.json:/etc/redpanda/transactions-schema.json
networks:
- redpanda_network
depends_on:
redpanda-0:
condition: service_healthy
####################
# rpk container to deploy a consumer group #
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-consume/
####################
consumergroup:
command:
- topic
- consume
- transactions
- --group
- transactions-consumer
- -X user=superuser
- -X pass=secretpassword
- -X brokers=redpanda-0:9092
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
networks:
- redpanda_network
depends_on:
createtopic:
condition: service_completed_successfully
deploytransform:
condition: service_completed_successfully
####################
# rpk container to deploy the pre-built data transform #
# See https://docs.redpanda.com/current/develop/data-transforms/deploy/
####################
deploytransform:
command:
- transform
- deploy
- --file=/etc/redpanda/regex.wasm
- --name=regex
- --input-topic=logins
- --output-topic=edu-filtered-domains
- --var=PATTERN="[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.edu"
- --var=MATCH_VALUE=true
- -X user=superuser
- -X pass=secretpassword
- -X admin.hosts=redpanda-0:9644
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
volumes:
- ./transform/regex.wasm:/etc/redpanda/regex.wasm
networks:
- redpanda_network
depends_on:
createtopic:
condition: service_completed_successfully
####################
# MinIO for Tiered Storage #
# See https://min.io/
#
# NOTE: MinIO is included in this quickstart for development and evaluation purposes only.
# It is not supported for production deployments of Redpanda.
#
# For production environments, use one of the supported object storage providers:
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/requirements/#object-storage-providers-for-tiered-storage
####################
minio:
container_name: minio
image: minio/minio:RELEASE.2025-05-24T17-08-30Z
command: server --console-address ":9001" /data
ports:
- 9000:9000
- 9001:9001
environment:
MINIO_ROOT_USER: minio
MINIO_ROOT_PASSWORD: redpandaTieredStorage7
MINIO_SERVER_URL: "http://minio:9000"
MINIO_REGION_NAME: local
MINIO_DOMAIN: minio
volumes:
- minio:/data
networks:
redpanda_network:
aliases:
- redpanda.minio
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/ready"]
interval: 10s
timeout: 5s
retries: 3
mc:
depends_on:
minio:
condition: service_healthy
image: minio/mc:RELEASE.2025-05-21T01-59-54Z
container_name: mc
networks:
- redpanda_network
environment:
- AWS_ACCESS_KEY_ID=minio
- AWS_SECRET_ACCESS_KEY=redpandaTieredStorage7
- AWS_REGION=local
entrypoint: >
/bin/sh -c "
until (/usr/bin/mc alias set minio http://minio:9000 minio redpandaTieredStorage7) do echo '...waiting...' && sleep 1; done;
/usr/bin/mc mb minio/redpanda;
/usr/bin/mc policy set public minio/redpanda;
tail -f /dev/null
"

View File

@@ -0,0 +1,77 @@
input:
# Use the 'generate' input
# https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
generate:
# The interval at which new records are generated.
interval: 1s
# The mapping section defines how each generated record is structured.
# The language used here is called Bloblang.
# https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
mapping: |
# Generate a fake first name using the 'first_name' faker function.
let first_name = fake("first_name")
# Generate a fake last name using the 'last_name' faker function.
let last_name = fake("last_name")
# Define possible subscription levels for users.
let subscription_levels = ["Free", "Basic", "Premium"]
# Define possible notification channels for user preferences.
let notifications = ["email", "sms", "push" ]
# Define supported languages for user preferences.
let languages = ["en", "es", "fr", "de", "zh", "jp"]
# Assign a unique user ID using a UUID digit generator.
root.user_id = fake("uuid_digit")
# Assign the generated first name to the 'first_name' field.
root.first_name = $first_name
# Assign the generated last name to the 'last_name' field.
root.last_name = $last_name
# Construct the user's email by combining the first initial, last name, and a fake domain name.
# The email is converted to lowercase for consistency.
root.email = ($first_name.slice(0,1) + $last_name + "@" + fake("domain_name")).lowercase()
# Assign a fake registration date using the 'date' faker function.
root.registration_date = fake("date")
# Assign the current timestamp as the last login time.
root.last_login = now()
# Randomly assign a subscription level by selecting an index from the 'subscription_levels' array.
root.subscription_level = $subscription_levels.index(random_int(min: 0, max: 2))
# Randomly assign a language preference by selecting an index from the 'languages' array.
root.preferences.language = $languages.index(random_int(min: 0, max: 5))
# Randomly assign a notification preference by selecting an index from the 'notifications' array.
root.preferences.notifications = $notifications.index(random_int(min: 0, max: 2))
pipeline:
processors:
- mapping: |
# Set the target topic for the generated records to 'profiles'.
meta topic = "profiles"
# Assign the entire record (root) to be sent to the specified topic.
root = this
output:
# Use the 'kafka_franz' output to send the result back to Redpanda
# https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
kafka_franz:
# Define the list of seed brokers for the Kafka cluster.
seed_brokers: [ "localhost:19092", "localhost:29092", "localhost:39092"]
# Dynamically assign the topic based on the metadata specified in the processors.
# In this case, it resolves to the 'profiles' topic.
topic: ${! metadata("topic") }
# Configure SASL authentication to securely connect to the Kafka brokers.
sasl:
- # Specify the SASL mechanism to use for authentication.
mechanism: SCRAM-SHA-256
# The password for the SASL authentication.
password: secretpassword
# The username for the SASL authentication.
username: superuser

View File

@@ -0,0 +1,56 @@
# =================================================================
# This file defines initial cluster properties for a Redpanda cluster.
# Some of these settings are intended for quickstart development and evaluation
# and are not suitable for production environments.
#
# For more information on bootstrap files, see:
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#configure-a-bootstrap-file
# =================================================================
#
# Enable SASL authentication for the Kafka and Admin APIs.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#admin_api_require_auth
admin_api_require_auth: true
# At least one superuser is required to be able to create other SASL users
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#superusers
superusers:
- superuser
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_sasl
enable_sasl: true
# Allow topics to be created on first access.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#auto_create_topics_enabled
auto_create_topics_enabled: true
# Enable data transforms.
# https://docs.redpanda.com/current/develop/data-transforms/how-transforms-work/
data_transforms_enabled: true
# Enable audit logging (enterprise feature).
# https://docs.redpanda.com/current/manage/audit-logging/
audit_enabled: true
# Enable Tiered Storage (enterprise feature).
# https://docs.redpanda.com/current/manage/tiered-storage/
cloud_storage_enabled: true
cloud_storage_region: local
cloud_storage_access_key: minio
cloud_storage_secret_key: redpandaTieredStorage7
cloud_storage_api_endpoint: minio
cloud_storage_api_endpoint_port: 9000
cloud_storage_disable_tls: true
cloud_storage_bucket: redpanda
# Forces segments to be uploaded to Tiered Storage faster for the purposes of the quickstart
# https://docs.redpanda.com/current/reference/properties/object-storage-properties/#cloud_storage_segment_max_upload_interval_sec
cloud_storage_segment_max_upload_interval_sec: 60
# Continuous Data Balancing (enterprise feature) continuously monitors your node and rack availability and disk usage. This enables self-healing clusters that dynamically balance partitions, ensuring smooth operations and optimal cluster performance.
# https://docs.redpanda.com/current/manage/cluster-maintenance/continuous-data-balancing/
partition_autobalancing_mode: continuous
# Enable Redpanda to collect consumer group metrics.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_consumer_group_metrics
enable_consumer_group_metrics:
- "group"
- "partition"
- "consumer_lag"
# Lower the interval for the quickstart
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#consumer_group_lag_collection_interval_sec
consumer_group_lag_collection_interval_sec: 60
# Enable Redpanda to collect host metrics.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_host_metrics
enable_host_metrics: true

View File

@@ -0,0 +1,403 @@
name: redpanda-quickstart-multi-broker
networks:
redpanda_network:
driver: bridge
volumes:
redpanda-0: null
redpanda-1: null
redpanda-2: null
minio: null
services:
##################
# Redpanda Brokers #
##################
redpanda-0:
command:
- redpanda
- start
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:19092
# Address the broker advertises to clients that connect to the Kafka API.
# Use the internal addresses to connect to the Redpanda brokers
# from inside the same Docker network.
# Use the external addresses to connect to the Redpanda brokers
# from outside the Docker network.
- --advertise-kafka-addr internal://redpanda-0:9092,external://localhost:19092
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:18082
# Address the broker advertises to clients that connect to the HTTP Proxy.
- --advertise-pandaproxy-addr internal://redpanda-0:8082,external://localhost:18082
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:18081
# Redpanda brokers use the RPC API to communicate with each other internally.
- --rpc-addr redpanda-0:33145
- --advertise-rpc-addr redpanda-0:33145
# Mode dev-container uses well-known configuration properties for development in containers.
- --mode dev-container
# Tells Seastar (the framework Redpanda uses under the hood) to use 1 core on the system.
- --smp 1
- --default-log-level=info
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
container_name: redpanda-0
# Sets the username and password of the bootstrap SCRAM superuser
# See https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#bootstrap-a-user-account
environment:
RP_BOOTSTRAP_USER: "superuser:secretpassword"
volumes:
- redpanda-0:/var/lib/redpanda/data
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
networks:
- redpanda_network
ports:
- 18081:18081
- 18082:18082
- 19092:19092
- 19644:9644
healthcheck:
test: ["CMD", "rpk", "cluster", "info", "-X", "user=superuser", "-X", "pass=secretpassword"]
interval: 10s
timeout: 15s
retries: 10
depends_on:
minio:
condition: service_healthy
redpanda-1:
command:
- redpanda
- start
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:29092
- --advertise-kafka-addr internal://redpanda-1:9092,external://localhost:29092
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:28082
- --advertise-pandaproxy-addr internal://redpanda-1:8082,external://localhost:28082
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:28081
- --rpc-addr redpanda-1:33145
- --advertise-rpc-addr redpanda-1:33145
- --mode dev-container
- --smp 1
- --default-log-level=info
- --seeds redpanda-0:33145
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
container_name: redpanda-1
environment:
RP_BOOTSTRAP_USER: "superuser:secretpassword"
volumes:
- redpanda-1:/var/lib/redpanda/data
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
networks:
- redpanda_network
ports:
- 28081:28081
- 28082:28082
- 29092:29092
- 29644:9644
depends_on:
- redpanda-0
- minio
redpanda-2:
command:
- redpanda
- start
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:39092
- --advertise-kafka-addr internal://redpanda-2:9092,external://localhost:39092
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:38082
- --advertise-pandaproxy-addr internal://redpanda-2:8082,external://localhost:38082
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:38081
- --rpc-addr redpanda-2:33145
- --advertise-rpc-addr redpanda-2:33145
- --mode dev-container
- --smp 1
- --default-log-level=info
- --seeds redpanda-0:33145
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
container_name: redpanda-2
environment:
RP_BOOTSTRAP_USER: "superuser:secretpassword"
volumes:
- redpanda-2:/var/lib/redpanda/data
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
networks:
- redpanda_network
ports:
- 38081:38081
- 38082:38082
- 39092:39092
- 39644:9644
depends_on:
- redpanda-0
- minio
####################
# Redpanda Console #
####################
console:
container_name: redpanda-console
image: docker.redpanda.com/redpandadata/console:v3.2.2
networks:
- redpanda_network
entrypoint: /bin/sh
command: -c 'echo "$$CONSOLE_CONFIG_FILE" > /tmp/config.yml && /app/console'
volumes:
- ./config:/tmp/config/
environment:
CONFIG_FILEPATH: ${CONFIG_FILEPATH:-/tmp/config.yml}
CONSOLE_CONFIG_FILE: |
# Configure a connection to the Redpanda cluster
# See https://docs.redpanda.com/current/console/config/connect-to-redpanda/
kafka:
brokers: ["redpanda-0:9092"]
sasl:
enabled: true
impersonateUser: true
schemaRegistry:
enabled: true
urls: ["http://redpanda-0:8081","http://redpanda-1:8081","http://redpanda-2:8081"]
authentication:
impersonateUser: true
redpanda:
adminApi:
enabled: true
urls: ["http://redpanda-0:9644","http://redpanda-1:9644","http://redpanda-2:9644"]
authentication:
basic:
username: superuser
password: secretpassword
impersonateUser: false
console:
# Configures Redpanda Console to fetch topic documentation from GitHub and display it in the UI.
# See https://docs.redpanda.com/current/console/config/topic-documentation/
topicDocumentation:
enabled: true
git:
enabled: true
repository:
url: https://github.com/redpanda-data/docs
branch: main
baseDirectory: tests/docker-compose
authentication:
jwtSigningKey: vazxnT+ZHtxKslK6QlDGovcYnSjTk/lKMmZ+mHrBVE+YdVDkLgSuP6AszAKe9Gvq
basic:
enabled: true
authorization:
roleBindings:
- roleName: admin
users:
- loginType: basic
name: superuser
ports:
- 8080:8080
depends_on:
redpanda-0:
condition: service_healthy
createtopic:
condition: service_completed_successfully
registerschema:
condition: service_completed_successfully
deploytransform:
condition: service_completed_successfully
####################
# Redpanda Connect #
####################
connect:
container_name: redpanda-connect
image: docker.redpanda.com/redpandadata/connect
networks:
- redpanda_network
entrypoint: /bin/sh
depends_on:
redpanda-0:
condition: service_healthy
command: -c 'echo "$$CONNECT_CFG_FILE" > /tmp/connect.yml; /redpanda-connect -c /tmp/connect.yml'
environment:
# This Redpanda Connect configuration creates fake data,
# processes it, and writes the output to a set of topics.
#
# Input:
# - Uses Redpanda Connect's generate input to generate fake data.
# See https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
# Pipeline:
# - Bloblang mapping to batch each input and map 1 message to 'logins'
# topic, and a random number (1-3) of messages to 'transaction' topic
# - Unarchive processor to parse the JSON array and extract each
# element into its own message.
# See https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
# Output:
# - kafka_franz output to write the messages to the Redpanda brokers.
# See https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
CONNECT_CFG_FILE: |
input:
generate:
interval: 1s
mapping: |
let first_name = fake("first_name")
let last_name = fake("last_name")
root.user_id = counter()
root.name = $$first_name + " " + $$last_name
root.email = ($$first_name.slice(0,1) + $$last_name + "@" + fake("domain_name")).lowercase()
root.ip = fake("ipv4")
root.login_time = now()
pipeline:
processors:
- mapping: |
root = range(0, random_int(min:2, max:4)).map_each(cust -> this)
- unarchive:
format: "json_array"
- mapping: |
if batch_index() == 0 {
meta topic = "logins"
root = this
} else {
meta topic = "transactions"
root.user_id = this.user_id
root.email = this.email
root.index = batch_index() - 1
root.product_url = fake("url")
root.price = fake("amount_with_currency")
root.timestamp = now()
}
output:
kafka_franz:
seed_brokers: [ "redpanda-0:9092" ]
topic: $${! metadata("topic") }
sasl:
- mechanism: SCRAM-SHA-256
password: secretpassword
username: superuser
####################
# rpk container to create the edu-filtered-domains topic #
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-create/
####################
createtopic:
command:
- topic
- create
- edu-filtered-domains
- -X user=superuser
- -X pass=secretpassword
- -X brokers=redpanda-0:9092
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
networks:
- redpanda_network
depends_on:
redpanda-0:
condition: service_healthy
####################
# rpk container to register the schema #
# See https://docs.redpanda.com/current/manage/schema-reg/schema-reg-api/
####################
registerschema:
command:
- registry
- schema
- create
- transactions
- --schema
- /etc/redpanda/transactions-schema.json
- -X user=superuser
- -X pass=secretpassword
- -X registry.hosts=redpanda-0:8081
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
# Mount the local directory that contains your schema to the container.
volumes:
- ./transactions-schema.json:/etc/redpanda/transactions-schema.json
networks:
- redpanda_network
depends_on:
redpanda-0:
condition: service_healthy
####################
# rpk container to deploy a consumer group #
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-consume/
####################
consumergroup:
command:
- topic
- consume
- transactions
- --group
- transactions-consumer
- -X user=superuser
- -X pass=secretpassword
- -X brokers=redpanda-0:9092
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
networks:
- redpanda_network
depends_on:
createtopic:
condition: service_completed_successfully
deploytransform:
condition: service_completed_successfully
####################
# rpk container to deploy the pre-built data transform #
# See https://docs.redpanda.com/current/develop/data-transforms/deploy/
####################
deploytransform:
command:
- transform
- deploy
- --file=/etc/redpanda/regex.wasm
- --name=regex
- --input-topic=logins
- --output-topic=edu-filtered-domains
- --var=PATTERN="[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.edu"
- --var=MATCH_VALUE=true
- -X user=superuser
- -X pass=secretpassword
- -X admin.hosts=redpanda-0:9644
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
volumes:
- ./transform/regex.wasm:/etc/redpanda/regex.wasm
networks:
- redpanda_network
depends_on:
createtopic:
condition: service_completed_successfully
####################
# MinIO for Tiered Storage #
# See https://min.io/
#
# NOTE: MinIO is included in this quickstart for development and evaluation purposes only.
# It is not supported for production deployments of Redpanda.
#
# For production environments, use one of the supported object storage providers:
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/requirements/#object-storage-providers-for-tiered-storage
####################
minio:
container_name: minio
image: minio/minio:RELEASE.2025-05-24T17-08-30Z
command: server --console-address ":9001" /data
ports:
- 9000:9000
- 9001:9001
environment:
MINIO_ROOT_USER: minio
MINIO_ROOT_PASSWORD: redpandaTieredStorage7
MINIO_SERVER_URL: "http://minio:9000"
MINIO_REGION_NAME: local
MINIO_DOMAIN: minio
volumes:
- minio:/data
networks:
redpanda_network:
aliases:
- redpanda.minio
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/ready"]
interval: 10s
timeout: 5s
retries: 3
mc:
depends_on:
minio:
condition: service_healthy
image: minio/mc:RELEASE.2025-05-21T01-59-54Z
container_name: mc
networks:
- redpanda_network
environment:
- AWS_ACCESS_KEY_ID=minio
- AWS_SECRET_ACCESS_KEY=redpandaTieredStorage7
- AWS_REGION=local
entrypoint: >
/bin/sh -c "
until (/usr/bin/mc alias set minio http://minio:9000 minio redpandaTieredStorage7) do echo '...waiting...' && sleep 1; done;
/usr/bin/mc mb minio/redpanda;
/usr/bin/mc policy set public minio/redpanda;
tail -f /dev/null
"

View File

@@ -0,0 +1,77 @@
input:
# Use the 'generate' input
# https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
generate:
# The interval at which new records are generated.
interval: 1s
# The mapping section defines how each generated record is structured.
# The language used here is called Bloblang.
# https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
mapping: |
# Generate a fake first name using the 'first_name' faker function.
let first_name = fake("first_name")
# Generate a fake last name using the 'last_name' faker function.
let last_name = fake("last_name")
# Define possible subscription levels for users.
let subscription_levels = ["Free", "Basic", "Premium"]
# Define possible notification channels for user preferences.
let notifications = ["email", "sms", "push" ]
# Define supported languages for user preferences.
let languages = ["en", "es", "fr", "de", "zh", "jp"]
# Assign a unique user ID using a UUID digit generator.
root.user_id = fake("uuid_digit")
# Assign the generated first name to the 'first_name' field.
root.first_name = $first_name
# Assign the generated last name to the 'last_name' field.
root.last_name = $last_name
# Construct the user's email by combining the first initial, last name, and a fake domain name.
# The email is converted to lowercase for consistency.
root.email = ($first_name.slice(0,1) + $last_name + "@" + fake("domain_name")).lowercase()
# Assign a fake registration date using the 'date' faker function.
root.registration_date = fake("date")
# Assign the current timestamp as the last login time.
root.last_login = now()
# Randomly assign a subscription level by selecting an index from the 'subscription_levels' array.
root.subscription_level = $subscription_levels.index(random_int(min: 0, max: 2))
# Randomly assign a language preference by selecting an index from the 'languages' array.
root.preferences.language = $languages.index(random_int(min: 0, max: 5))
# Randomly assign a notification preference by selecting an index from the 'notifications' array.
root.preferences.notifications = $notifications.index(random_int(min: 0, max: 2))
pipeline:
processors:
- mapping: |
# Set the target topic for the generated records to 'profiles'.
meta topic = "profiles"
# Assign the entire record (root) to be sent to the specified topic.
root = this
output:
# Use the 'kafka_franz' output to send the result back to Redpanda
# https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
kafka_franz:
# Define the list of seed brokers for the Kafka cluster.
seed_brokers: [ "localhost:19092", "localhost:29092", "localhost:39092"]
# Dynamically assign the topic based on the metadata specified in the processors.
# In this case, it resolves to the 'profiles' topic.
topic: ${! metadata("topic") }
# Configure SASL authentication to securely connect to the Kafka brokers.
sasl:
- # Specify the SASL mechanism to use for authentication.
mechanism: SCRAM-SHA-256
# The password for the SASL authentication.
password: secretpassword
# The username for the SASL authentication.
username: superuser

View File

@@ -0,0 +1,56 @@
# =================================================================
# This file defines initial cluster properties for a Redpanda cluster.
# Some of these settings are intended for quickstart development and evaluation
# and are not suitable for production environments.
#
# For more information on bootstrap files, see:
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#configure-a-bootstrap-file
# =================================================================
#
# Enable SASL authentication for the Kafka and Admin APIs.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#admin_api_require_auth
admin_api_require_auth: true
# At least one superuser is required to be able to create other SASL users
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#superusers
superusers:
- superuser
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_sasl
enable_sasl: true
# Allow topics to be created on first access.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#auto_create_topics_enabled
auto_create_topics_enabled: true
# Enable data transforms.
# https://docs.redpanda.com/current/develop/data-transforms/how-transforms-work/
data_transforms_enabled: true
# Enable audit logging (enterprise feature).
# https://docs.redpanda.com/current/manage/audit-logging/
audit_enabled: true
# Enable Tiered Storage (enterprise feature).
# https://docs.redpanda.com/current/manage/tiered-storage/
cloud_storage_enabled: true
cloud_storage_region: local
cloud_storage_access_key: minio
cloud_storage_secret_key: redpandaTieredStorage7
cloud_storage_api_endpoint: minio
cloud_storage_api_endpoint_port: 9000
cloud_storage_disable_tls: true
cloud_storage_bucket: redpanda
# Forces segments to be uploaded to Tiered Storage faster for the purposes of the quickstart
# https://docs.redpanda.com/current/reference/properties/object-storage-properties/#cloud_storage_segment_max_upload_interval_sec
cloud_storage_segment_max_upload_interval_sec: 60
# Continuous Data Balancing (enterprise feature) continuously monitors your node and rack availability and disk usage. This enables self-healing clusters that dynamically balance partitions, ensuring smooth operations and optimal cluster performance.
# https://docs.redpanda.com/current/manage/cluster-maintenance/continuous-data-balancing/
partition_autobalancing_mode: continuous
# Enable Redpanda to collect consumer group metrics.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_consumer_group_metrics
enable_consumer_group_metrics:
- "group"
- "partition"
- "consumer_lag"
# Lower the interval for the quickstart
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#consumer_group_lag_collection_interval_sec
consumer_group_lag_collection_interval_sec: 60
# Enable Redpanda to collect host metrics.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_host_metrics
enable_host_metrics: true

View File

@@ -0,0 +1,403 @@
name: redpanda-quickstart-multi-broker
networks:
redpanda_network:
driver: bridge
volumes:
redpanda-0: null
redpanda-1: null
redpanda-2: null
minio: null
services:
##################
# Redpanda Brokers #
##################
redpanda-0:
command:
- redpanda
- start
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:19092
# Address the broker advertises to clients that connect to the Kafka API.
# Use the internal addresses to connect to the Redpanda brokers
# from inside the same Docker network.
# Use the external addresses to connect to the Redpanda brokers
# from outside the Docker network.
- --advertise-kafka-addr internal://redpanda-0:9092,external://localhost:19092
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:18082
# Address the broker advertises to clients that connect to the HTTP Proxy.
- --advertise-pandaproxy-addr internal://redpanda-0:8082,external://localhost:18082
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:18081
# Redpanda brokers use the RPC API to communicate with each other internally.
- --rpc-addr redpanda-0:33145
- --advertise-rpc-addr redpanda-0:33145
# Mode dev-container uses well-known configuration properties for development in containers.
- --mode dev-container
# Tells Seastar (the framework Redpanda uses under the hood) to use 1 core on the system.
- --smp 1
- --default-log-level=info
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
container_name: redpanda-0
# Sets the username and password of the bootstrap SCRAM superuser
# See https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#bootstrap-a-user-account
environment:
RP_BOOTSTRAP_USER: "superuser:secretpassword"
volumes:
- redpanda-0:/var/lib/redpanda/data
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
networks:
- redpanda_network
ports:
- 18081:18081
- 18082:18082
- 19092:19092
- 19644:9644
healthcheck:
test: ["CMD", "rpk", "cluster", "info", "-X", "user=superuser", "-X", "pass=secretpassword"]
interval: 10s
timeout: 15s
retries: 10
depends_on:
minio:
condition: service_healthy
redpanda-1:
command:
- redpanda
- start
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:29092
- --advertise-kafka-addr internal://redpanda-1:9092,external://localhost:29092
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:28082
- --advertise-pandaproxy-addr internal://redpanda-1:8082,external://localhost:28082
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:28081
- --rpc-addr redpanda-1:33145
- --advertise-rpc-addr redpanda-1:33145
- --mode dev-container
- --smp 1
- --default-log-level=info
- --seeds redpanda-0:33145
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
container_name: redpanda-1
environment:
RP_BOOTSTRAP_USER: "superuser:secretpassword"
volumes:
- redpanda-1:/var/lib/redpanda/data
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
networks:
- redpanda_network
ports:
- 28081:28081
- 28082:28082
- 29092:29092
- 29644:9644
depends_on:
- redpanda-0
- minio
redpanda-2:
command:
- redpanda
- start
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:39092
- --advertise-kafka-addr internal://redpanda-2:9092,external://localhost:39092
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:38082
- --advertise-pandaproxy-addr internal://redpanda-2:8082,external://localhost:38082
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:38081
- --rpc-addr redpanda-2:33145
- --advertise-rpc-addr redpanda-2:33145
- --mode dev-container
- --smp 1
- --default-log-level=info
- --seeds redpanda-0:33145
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
container_name: redpanda-2
environment:
RP_BOOTSTRAP_USER: "superuser:secretpassword"
volumes:
- redpanda-2:/var/lib/redpanda/data
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
networks:
- redpanda_network
ports:
- 38081:38081
- 38082:38082
- 39092:39092
- 39644:9644
depends_on:
- redpanda-0
- minio
####################
# Redpanda Console #
####################
console:
container_name: redpanda-console
image: docker.redpanda.com/redpandadata/console:v3.2.2
networks:
- redpanda_network
entrypoint: /bin/sh
command: -c 'echo "$$CONSOLE_CONFIG_FILE" > /tmp/config.yml && /app/console'
volumes:
- ./config:/tmp/config/
environment:
CONFIG_FILEPATH: ${CONFIG_FILEPATH:-/tmp/config.yml}
CONSOLE_CONFIG_FILE: |
# Configure a connection to the Redpanda cluster
# See https://docs.redpanda.com/current/console/config/connect-to-redpanda/
kafka:
brokers: ["redpanda-0:9092"]
sasl:
enabled: true
impersonateUser: true
schemaRegistry:
enabled: true
urls: ["http://redpanda-0:8081","http://redpanda-1:8081","http://redpanda-2:8081"]
authentication:
impersonateUser: true
redpanda:
adminApi:
enabled: true
urls: ["http://redpanda-0:9644","http://redpanda-1:9644","http://redpanda-2:9644"]
authentication:
basic:
username: superuser
password: secretpassword
impersonateUser: false
console:
# Configures Redpanda Console to fetch topic documentation from GitHub and display it in the UI.
# See https://docs.redpanda.com/current/console/config/topic-documentation/
topicDocumentation:
enabled: true
git:
enabled: true
repository:
url: https://github.com/redpanda-data/docs
branch: main
baseDirectory: tests/docker-compose
authentication:
jwtSigningKey: vazxnT+ZHtxKslK6QlDGovcYnSjTk/lKMmZ+mHrBVE+YdVDkLgSuP6AszAKe9Gvq
basic:
enabled: true
authorization:
roleBindings:
- roleName: admin
users:
- loginType: basic
name: superuser
ports:
- 8080:8080
depends_on:
redpanda-0:
condition: service_healthy
createtopic:
condition: service_completed_successfully
registerschema:
condition: service_completed_successfully
deploytransform:
condition: service_completed_successfully
####################
# Redpanda Connect #
####################
connect:
container_name: redpanda-connect
image: docker.redpanda.com/redpandadata/connect
networks:
- redpanda_network
entrypoint: /bin/sh
depends_on:
redpanda-0:
condition: service_healthy
command: -c 'echo "$$CONNECT_CFG_FILE" > /tmp/connect.yml; /redpanda-connect -c /tmp/connect.yml'
environment:
# This Redpanda Connect configuration creates fake data,
# processes it, and writes the output to a set of topics.
#
# Input:
# - Uses Redpanda Connect's generate input to generate fake data.
# See https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
# Pipeline:
# - Bloblang mapping to batch each input and map 1 message to 'logins'
# topic, and a random number (1-3) of messages to 'transaction' topic
# - Unarchive processor to parse the JSON array and extract each
# element into its own message.
# See https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
# Output:
# - kafka_franz output to write the messages to the Redpanda brokers.
# See https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
CONNECT_CFG_FILE: |
input:
generate:
interval: 1s
mapping: |
let first_name = fake("first_name")
let last_name = fake("last_name")
root.user_id = counter()
root.name = $$first_name + " " + $$last_name
root.email = ($$first_name.slice(0,1) + $$last_name + "@" + fake("domain_name")).lowercase()
root.ip = fake("ipv4")
root.login_time = now()
pipeline:
processors:
- mapping: |
root = range(0, random_int(min:2, max:4)).map_each(cust -> this)
- unarchive:
format: "json_array"
- mapping: |
if batch_index() == 0 {
meta topic = "logins"
root = this
} else {
meta topic = "transactions"
root.user_id = this.user_id
root.email = this.email
root.index = batch_index() - 1
root.product_url = fake("url")
root.price = fake("amount_with_currency")
root.timestamp = now()
}
output:
kafka_franz:
seed_brokers: [ "redpanda-0:9092" ]
topic: $${! metadata("topic") }
sasl:
- mechanism: SCRAM-SHA-256
password: secretpassword
username: superuser
####################
# rpk container to create the edu-filtered-domains topic #
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-create/
####################
createtopic:
command:
- topic
- create
- edu-filtered-domains
- -X user=superuser
- -X pass=secretpassword
- -X brokers=redpanda-0:9092
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
networks:
- redpanda_network
depends_on:
redpanda-0:
condition: service_healthy
####################
# rpk container to register the schema #
# See https://docs.redpanda.com/current/manage/schema-reg/schema-reg-api/
####################
registerschema:
command:
- registry
- schema
- create
- transactions
- --schema
- /etc/redpanda/transactions-schema.json
- -X user=superuser
- -X pass=secretpassword
- -X registry.hosts=redpanda-0:8081
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
# Mount the local directory that contains your schema to the container.
volumes:
- ./transactions-schema.json:/etc/redpanda/transactions-schema.json
networks:
- redpanda_network
depends_on:
redpanda-0:
condition: service_healthy
####################
# rpk container to deploy a consumer group #
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-consume/
####################
consumergroup:
command:
- topic
- consume
- transactions
- --group
- transactions-consumer
- -X user=superuser
- -X pass=secretpassword
- -X brokers=redpanda-0:9092
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
networks:
- redpanda_network
depends_on:
createtopic:
condition: service_completed_successfully
deploytransform:
condition: service_completed_successfully
####################
# rpk container to deploy the pre-built data transform #
# See https://docs.redpanda.com/current/develop/data-transforms/deploy/
####################
deploytransform:
command:
- transform
- deploy
- --file=/etc/redpanda/regex.wasm
- --name=regex
- --input-topic=logins
- --output-topic=edu-filtered-domains
- --var=PATTERN="[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.edu"
- --var=MATCH_VALUE=true
- -X user=superuser
- -X pass=secretpassword
- -X admin.hosts=redpanda-0:9644
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
volumes:
- ./transform/regex.wasm:/etc/redpanda/regex.wasm
networks:
- redpanda_network
depends_on:
createtopic:
condition: service_completed_successfully
####################
# MinIO for Tiered Storage #
# See https://min.io/
#
# NOTE: MinIO is included in this quickstart for development and evaluation purposes only.
# It is not supported for production deployments of Redpanda.
#
# For production environments, use one of the supported object storage providers:
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/requirements/#object-storage-providers-for-tiered-storage
####################
minio:
container_name: minio
image: minio/minio:RELEASE.2025-05-24T17-08-30Z
command: server --console-address ":9001" /data
ports:
- 9000:9000
- 9001:9001
environment:
MINIO_ROOT_USER: minio
MINIO_ROOT_PASSWORD: redpandaTieredStorage7
MINIO_SERVER_URL: "http://minio:9000"
MINIO_REGION_NAME: local
MINIO_DOMAIN: minio
volumes:
- minio:/data
networks:
redpanda_network:
aliases:
- redpanda.minio
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/ready"]
interval: 10s
timeout: 5s
retries: 3
mc:
depends_on:
minio:
condition: service_healthy
image: minio/mc:RELEASE.2025-05-21T01-59-54Z
container_name: mc
networks:
- redpanda_network
environment:
- AWS_ACCESS_KEY_ID=minio
- AWS_SECRET_ACCESS_KEY=redpandaTieredStorage7
- AWS_REGION=local
entrypoint: >
/bin/sh -c "
until (/usr/bin/mc alias set minio http://minio:9000 minio redpandaTieredStorage7) do echo '...waiting...' && sleep 1; done;
/usr/bin/mc mb minio/redpanda;
/usr/bin/mc policy set public minio/redpanda;
tail -f /dev/null
"

View File

@@ -0,0 +1,77 @@
input:
# Use the 'generate' input
# https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
generate:
# The interval at which new records are generated.
interval: 1s
# The mapping section defines how each generated record is structured.
# The language used here is called Bloblang.
# https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
mapping: |
# Generate a fake first name using the 'first_name' faker function.
let first_name = fake("first_name")
# Generate a fake last name using the 'last_name' faker function.
let last_name = fake("last_name")
# Define possible subscription levels for users.
let subscription_levels = ["Free", "Basic", "Premium"]
# Define possible notification channels for user preferences.
let notifications = ["email", "sms", "push" ]
# Define supported languages for user preferences.
let languages = ["en", "es", "fr", "de", "zh", "jp"]
# Assign a unique user ID using a UUID digit generator.
root.user_id = fake("uuid_digit")
# Assign the generated first name to the 'first_name' field.
root.first_name = $first_name
# Assign the generated last name to the 'last_name' field.
root.last_name = $last_name
# Construct the user's email by combining the first initial, last name, and a fake domain name.
# The email is converted to lowercase for consistency.
root.email = ($first_name.slice(0,1) + $last_name + "@" + fake("domain_name")).lowercase()
# Assign a fake registration date using the 'date' faker function.
root.registration_date = fake("date")
# Assign the current timestamp as the last login time.
root.last_login = now()
# Randomly assign a subscription level by selecting an index from the 'subscription_levels' array.
root.subscription_level = $subscription_levels.index(random_int(min: 0, max: 2))
# Randomly assign a language preference by selecting an index from the 'languages' array.
root.preferences.language = $languages.index(random_int(min: 0, max: 5))
# Randomly assign a notification preference by selecting an index from the 'notifications' array.
root.preferences.notifications = $notifications.index(random_int(min: 0, max: 2))
pipeline:
processors:
- mapping: |
# Set the target topic for the generated records to 'profiles'.
meta topic = "profiles"
# Assign the entire record (root) to be sent to the specified topic.
root = this
output:
# Use the 'kafka_franz' output to send the result back to Redpanda
# https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
kafka_franz:
# Define the list of seed brokers for the Kafka cluster.
seed_brokers: [ "localhost:19092", "localhost:29092", "localhost:39092"]
# Dynamically assign the topic based on the metadata specified in the processors.
# In this case, it resolves to the 'profiles' topic.
topic: ${! metadata("topic") }
# Configure SASL authentication to securely connect to the Kafka brokers.
sasl:
- # Specify the SASL mechanism to use for authentication.
mechanism: SCRAM-SHA-256
# The password for the SASL authentication.
password: secretpassword
# The username for the SASL authentication.
username: superuser

View File

@@ -0,0 +1,56 @@
# =================================================================
# This file defines initial cluster properties for a Redpanda cluster.
# Some of these settings are intended for quickstart development and evaluation
# and are not suitable for production environments.
#
# For more information on bootstrap files, see:
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#configure-a-bootstrap-file
# =================================================================
#
# Enable SASL authentication for the Kafka and Admin APIs.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#admin_api_require_auth
admin_api_require_auth: true
# At least one superuser is required to be able to create other SASL users
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#superusers
superusers:
- superuser
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_sasl
enable_sasl: true
# Allow topics to be created on first access.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#auto_create_topics_enabled
auto_create_topics_enabled: true
# Enable data transforms.
# https://docs.redpanda.com/current/develop/data-transforms/how-transforms-work/
data_transforms_enabled: true
# Enable audit logging (enterprise feature).
# https://docs.redpanda.com/current/manage/audit-logging/
audit_enabled: true
# Enable Tiered Storage (enterprise feature).
# https://docs.redpanda.com/current/manage/tiered-storage/
cloud_storage_enabled: true
cloud_storage_region: local
cloud_storage_access_key: minio
cloud_storage_secret_key: redpandaTieredStorage7
cloud_storage_api_endpoint: minio
cloud_storage_api_endpoint_port: 9000
cloud_storage_disable_tls: true
cloud_storage_bucket: redpanda
# Forces segments to be uploaded to Tiered Storage faster for the purposes of the quickstart
# https://docs.redpanda.com/current/reference/properties/object-storage-properties/#cloud_storage_segment_max_upload_interval_sec
cloud_storage_segment_max_upload_interval_sec: 60
# Continuous Data Balancing (enterprise feature) continuously monitors your node and rack availability and disk usage. This enables self-healing clusters that dynamically balance partitions, ensuring smooth operations and optimal cluster performance.
# https://docs.redpanda.com/current/manage/cluster-maintenance/continuous-data-balancing/
partition_autobalancing_mode: continuous
# Enable Redpanda to collect consumer group metrics.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_consumer_group_metrics
enable_consumer_group_metrics:
- "group"
- "partition"
- "consumer_lag"
# Lower the interval for the quickstart
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#consumer_group_lag_collection_interval_sec
consumer_group_lag_collection_interval_sec: 60
# Enable Redpanda to collect host metrics.
# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_host_metrics
enable_host_metrics: true

View File

@@ -0,0 +1,403 @@
name: redpanda-quickstart-multi-broker
networks:
redpanda_network:
driver: bridge
volumes:
redpanda-0: null
redpanda-1: null
redpanda-2: null
minio: null
services:
##################
# Redpanda Brokers #
##################
redpanda-0:
command:
- redpanda
- start
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:19092
# Address the broker advertises to clients that connect to the Kafka API.
# Use the internal addresses to connect to the Redpanda brokers
# from inside the same Docker network.
# Use the external addresses to connect to the Redpanda brokers
# from outside the Docker network.
- --advertise-kafka-addr internal://redpanda-0:9092,external://localhost:19092
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:18082
# Address the broker advertises to clients that connect to the HTTP Proxy.
- --advertise-pandaproxy-addr internal://redpanda-0:8082,external://localhost:18082
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:18081
# Redpanda brokers use the RPC API to communicate with each other internally.
- --rpc-addr redpanda-0:33145
- --advertise-rpc-addr redpanda-0:33145
# Mode dev-container uses well-known configuration properties for development in containers.
- --mode dev-container
# Tells Seastar (the framework Redpanda uses under the hood) to use 1 core on the system.
- --smp 1
- --default-log-level=info
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
container_name: redpanda-0
# Sets the username and password of the bootstrap SCRAM superuser
# See https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#bootstrap-a-user-account
environment:
RP_BOOTSTRAP_USER: "superuser:secretpassword"
volumes:
- redpanda-0:/var/lib/redpanda/data
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
networks:
- redpanda_network
ports:
- 18081:18081
- 18082:18082
- 19092:19092
- 19644:9644
healthcheck:
test: ["CMD", "rpk", "cluster", "info", "-X", "user=superuser", "-X", "pass=secretpassword"]
interval: 10s
timeout: 15s
retries: 10
depends_on:
minio:
condition: service_healthy
redpanda-1:
command:
- redpanda
- start
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:29092
- --advertise-kafka-addr internal://redpanda-1:9092,external://localhost:29092
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:28082
- --advertise-pandaproxy-addr internal://redpanda-1:8082,external://localhost:28082
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:28081
- --rpc-addr redpanda-1:33145
- --advertise-rpc-addr redpanda-1:33145
- --mode dev-container
- --smp 1
- --default-log-level=info
- --seeds redpanda-0:33145
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
container_name: redpanda-1
environment:
RP_BOOTSTRAP_USER: "superuser:secretpassword"
volumes:
- redpanda-1:/var/lib/redpanda/data
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
networks:
- redpanda_network
ports:
- 28081:28081
- 28082:28082
- 29092:29092
- 29644:9644
depends_on:
- redpanda-0
- minio
redpanda-2:
command:
- redpanda
- start
- --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:39092
- --advertise-kafka-addr internal://redpanda-2:9092,external://localhost:39092
- --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:38082
- --advertise-pandaproxy-addr internal://redpanda-2:8082,external://localhost:38082
- --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:38081
- --rpc-addr redpanda-2:33145
- --advertise-rpc-addr redpanda-2:33145
- --mode dev-container
- --smp 1
- --default-log-level=info
- --seeds redpanda-0:33145
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
container_name: redpanda-2
environment:
RP_BOOTSTRAP_USER: "superuser:secretpassword"
volumes:
- redpanda-2:/var/lib/redpanda/data
- ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml
networks:
- redpanda_network
ports:
- 38081:38081
- 38082:38082
- 39092:39092
- 39644:9644
depends_on:
- redpanda-0
- minio
####################
# Redpanda Console #
####################
console:
container_name: redpanda-console
image: docker.redpanda.com/redpandadata/console:v3.2.2
networks:
- redpanda_network
entrypoint: /bin/sh
command: -c 'echo "$$CONSOLE_CONFIG_FILE" > /tmp/config.yml && /app/console'
volumes:
- ./config:/tmp/config/
environment:
CONFIG_FILEPATH: ${CONFIG_FILEPATH:-/tmp/config.yml}
CONSOLE_CONFIG_FILE: |
# Configure a connection to the Redpanda cluster
# See https://docs.redpanda.com/current/console/config/connect-to-redpanda/
kafka:
brokers: ["redpanda-0:9092"]
sasl:
enabled: true
impersonateUser: true
schemaRegistry:
enabled: true
urls: ["http://redpanda-0:8081","http://redpanda-1:8081","http://redpanda-2:8081"]
authentication:
impersonateUser: true
redpanda:
adminApi:
enabled: true
urls: ["http://redpanda-0:9644","http://redpanda-1:9644","http://redpanda-2:9644"]
authentication:
basic:
username: superuser
password: secretpassword
impersonateUser: false
console:
# Configures Redpanda Console to fetch topic documentation from GitHub and display it in the UI.
# See https://docs.redpanda.com/current/console/config/topic-documentation/
topicDocumentation:
enabled: true
git:
enabled: true
repository:
url: https://github.com/redpanda-data/docs
branch: main
baseDirectory: tests/docker-compose
authentication:
jwtSigningKey: vazxnT+ZHtxKslK6QlDGovcYnSjTk/lKMmZ+mHrBVE+YdVDkLgSuP6AszAKe9Gvq
basic:
enabled: true
authorization:
roleBindings:
- roleName: admin
users:
- loginType: basic
name: superuser
ports:
- 8080:8080
depends_on:
redpanda-0:
condition: service_healthy
createtopic:
condition: service_completed_successfully
registerschema:
condition: service_completed_successfully
deploytransform:
condition: service_completed_successfully
####################
# Redpanda Connect #
####################
connect:
container_name: redpanda-connect
image: docker.redpanda.com/redpandadata/connect
networks:
- redpanda_network
entrypoint: /bin/sh
depends_on:
redpanda-0:
condition: service_healthy
command: -c 'echo "$$CONNECT_CFG_FILE" > /tmp/connect.yml; /redpanda-connect -c /tmp/connect.yml'
environment:
# This Redpanda Connect configuration creates fake data,
# processes it, and writes the output to a set of topics.
#
# Input:
# - Uses Redpanda Connect's generate input to generate fake data.
# See https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
# Pipeline:
# - Bloblang mapping to batch each input and map 1 message to 'logins'
# topic, and a random number (1-3) of messages to 'transaction' topic
# - Unarchive processor to parse the JSON array and extract each
# element into its own message.
# See https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
# Output:
# - kafka_franz output to write the messages to the Redpanda brokers.
# See https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
CONNECT_CFG_FILE: |
input:
generate:
interval: 1s
mapping: |
let first_name = fake("first_name")
let last_name = fake("last_name")
root.user_id = counter()
root.name = $$first_name + " " + $$last_name
root.email = ($$first_name.slice(0,1) + $$last_name + "@" + fake("domain_name")).lowercase()
root.ip = fake("ipv4")
root.login_time = now()
pipeline:
processors:
- mapping: |
root = range(0, random_int(min:2, max:4)).map_each(cust -> this)
- unarchive:
format: "json_array"
- mapping: |
if batch_index() == 0 {
meta topic = "logins"
root = this
} else {
meta topic = "transactions"
root.user_id = this.user_id
root.email = this.email
root.index = batch_index() - 1
root.product_url = fake("url")
root.price = fake("amount_with_currency")
root.timestamp = now()
}
output:
kafka_franz:
seed_brokers: [ "redpanda-0:9092" ]
topic: $${! metadata("topic") }
sasl:
- mechanism: SCRAM-SHA-256
password: secretpassword
username: superuser
####################
# rpk container to create the edu-filtered-domains topic #
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-create/
####################
createtopic:
command:
- topic
- create
- edu-filtered-domains
- -X user=superuser
- -X pass=secretpassword
- -X brokers=redpanda-0:9092
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
networks:
- redpanda_network
depends_on:
redpanda-0:
condition: service_healthy
####################
# rpk container to register the schema #
# See https://docs.redpanda.com/current/manage/schema-reg/schema-reg-api/
####################
registerschema:
command:
- registry
- schema
- create
- transactions
- --schema
- /etc/redpanda/transactions-schema.json
- -X user=superuser
- -X pass=secretpassword
- -X registry.hosts=redpanda-0:8081
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
# Mount the local directory that contains your schema to the container.
volumes:
- ./transactions-schema.json:/etc/redpanda/transactions-schema.json
networks:
- redpanda_network
depends_on:
redpanda-0:
condition: service_healthy
####################
# rpk container to deploy a consumer group #
# See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-consume/
####################
consumergroup:
command:
- topic
- consume
- transactions
- --group
- transactions-consumer
- -X user=superuser
- -X pass=secretpassword
- -X brokers=redpanda-0:9092
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
networks:
- redpanda_network
depends_on:
createtopic:
condition: service_completed_successfully
deploytransform:
condition: service_completed_successfully
####################
# rpk container to deploy the pre-built data transform #
# See https://docs.redpanda.com/current/develop/data-transforms/deploy/
####################
deploytransform:
command:
- transform
- deploy
- --file=/etc/redpanda/regex.wasm
- --name=regex
- --input-topic=logins
- --output-topic=edu-filtered-domains
- --var=PATTERN="[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.edu"
- --var=MATCH_VALUE=true
- -X user=superuser
- -X pass=secretpassword
- -X admin.hosts=redpanda-0:9644
image: docker.redpanda.com/redpandadata/redpanda:v25.2.7
volumes:
- ./transform/regex.wasm:/etc/redpanda/regex.wasm
networks:
- redpanda_network
depends_on:
createtopic:
condition: service_completed_successfully
####################
# MinIO for Tiered Storage #
# See https://min.io/
#
# NOTE: MinIO is included in this quickstart for development and evaluation purposes only.
# It is not supported for production deployments of Redpanda.
#
# For production environments, use one of the supported object storage providers:
# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/requirements/#object-storage-providers-for-tiered-storage
####################
minio:
container_name: minio
image: minio/minio:RELEASE.2025-05-24T17-08-30Z
command: server --console-address ":9001" /data
ports:
- 9000:9000
- 9001:9001
environment:
MINIO_ROOT_USER: minio
MINIO_ROOT_PASSWORD: redpandaTieredStorage7
MINIO_SERVER_URL: "http://minio:9000"
MINIO_REGION_NAME: local
MINIO_DOMAIN: minio
volumes:
- minio:/data
networks:
redpanda_network:
aliases:
- redpanda.minio
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/ready"]
interval: 10s
timeout: 5s
retries: 3
mc:
depends_on:
minio:
condition: service_healthy
image: minio/mc:RELEASE.2025-05-21T01-59-54Z
container_name: mc
networks:
- redpanda_network
environment:
- AWS_ACCESS_KEY_ID=minio
- AWS_SECRET_ACCESS_KEY=redpandaTieredStorage7
- AWS_REGION=local
entrypoint: >
/bin/sh -c "
until (/usr/bin/mc alias set minio http://minio:9000 minio redpandaTieredStorage7) do echo '...waiting...' && sleep 1; done;
/usr/bin/mc mb minio/redpanda;
/usr/bin/mc policy set public minio/redpanda;
tail -f /dev/null
"

View File

@@ -0,0 +1,77 @@
input:
# Use the 'generate' input
# https://docs.redpanda.com/redpanda-connect/components/inputs/generate/
generate:
# The interval at which new records are generated.
interval: 1s
# The mapping section defines how each generated record is structured.
# The language used here is called Bloblang.
# https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/
mapping: |
# Generate a fake first name using the 'first_name' faker function.
let first_name = fake("first_name")
# Generate a fake last name using the 'last_name' faker function.
let last_name = fake("last_name")
# Define possible subscription levels for users.
let subscription_levels = ["Free", "Basic", "Premium"]
# Define possible notification channels for user preferences.
let notifications = ["email", "sms", "push" ]
# Define supported languages for user preferences.
let languages = ["en", "es", "fr", "de", "zh", "jp"]
# Assign a unique user ID using a UUID digit generator.
root.user_id = fake("uuid_digit")
# Assign the generated first name to the 'first_name' field.
root.first_name = $first_name
# Assign the generated last name to the 'last_name' field.
root.last_name = $last_name
# Construct the user's email by combining the first initial, last name, and a fake domain name.
# The email is converted to lowercase for consistency.
root.email = ($first_name.slice(0,1) + $last_name + "@" + fake("domain_name")).lowercase()
# Assign a fake registration date using the 'date' faker function.
root.registration_date = fake("date")
# Assign the current timestamp as the last login time.
root.last_login = now()
# Randomly assign a subscription level by selecting an index from the 'subscription_levels' array.
root.subscription_level = $subscription_levels.index(random_int(min: 0, max: 2))
# Randomly assign a language preference by selecting an index from the 'languages' array.
root.preferences.language = $languages.index(random_int(min: 0, max: 5))
# Randomly assign a notification preference by selecting an index from the 'notifications' array.
root.preferences.notifications = $notifications.index(random_int(min: 0, max: 2))
pipeline:
processors:
- mapping: |
# Set the target topic for the generated records to 'profiles'.
meta topic = "profiles"
# Assign the entire record (root) to be sent to the specified topic.
root = this
output:
# Use the 'kafka_franz' output to send the result back to Redpanda
# https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/
kafka_franz:
# Define the list of seed brokers for the Kafka cluster.
seed_brokers: [ "localhost:19092", "localhost:29092", "localhost:39092"]
# Dynamically assign the topic based on the metadata specified in the processors.
# In this case, it resolves to the 'profiles' topic.
topic: ${! metadata("topic") }
# Configure SASL authentication to securely connect to the Kafka brokers.
sasl:
- # Specify the SASL mechanism to use for authentication.
mechanism: SCRAM-SHA-256
# The password for the SASL authentication.
password: secretpassword
# The username for the SASL authentication.
username: superuser

View File

@@ -0,0 +1,24 @@
# This file configures `rpk` to connect to a remote Redpanda cluster running in the same local network as `rpk`.
# Configuration for connecting to the Kafka API of the Redpanda cluster.
kafka_api:
# SASL (Simple Authentication and Security Layer) settings for authentication.
sasl:
user: superuser # The username used for authentication
password: secretpassword # The password associated with the username
mechanism: scram-sha-256 # Authentication mechanism; SCRAM-SHA-256 provides secure password-based authentication
# List of Kafka brokers in the Redpanda cluster.
# These brokers ensure high availability and fault tolerance for Kafka-based communication.
brokers:
- 127.0.0.1:19092 # Broker 1: Accessible on localhost, port 19092
- 127.0.0.1:29092 # Broker 2: Accessible on localhost, port 29092
- 127.0.0.1:39092 # Broker 3: Accessible on localhost, port 39092
# Configuration for connecting to the Redpanda Admin API.
# The Admin API allows you to perform administrative tasks such as managing configurations, monitoring, and scaling.
admin_api:
# List of Admin API endpoints for managing the cluster.
addresses:
- 127.0.0.1:19644 # Admin API for Broker 1: Accessible on localhost, port 19644
- 127.0.0.1:29644 # Admin API for Broker 2: Accessible on localhost, port 29644
- 127.0.0.1:39644 # Admin API for Broker 3: Accessible on localhost, port 39644

View File

@@ -0,0 +1,37 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Transactions",
"type": "object",
"properties": {
"email": {
"type": "string",
"format": "email",
"description": "The email address of the user involved in the transaction."
},
"index": {
"type": "integer",
"description": "A numeric index associated with the transaction."
},
"price": {
"type": "string",
"pattern": "^XXX \\d+\\.\\d{6}$",
"description": "A string representing the price of the product, including a currency code followed by the amount."
},
"product_url": {
"type": "string",
"format": "uri",
"description": "A URL that points to the product involved in the transaction."
},
"timestamp": {
"type": "string",
"format": "date-time",
"description": "The timestamp of when the transaction occurred, formatted in ISO 8601."
},
"user_id": {
"type": "integer",
"description": "A numeric identifier for the user."
}
},
"required": ["email", "index", "price", "product_url", "timestamp", "user_id"],
"additionalProperties": false
}

View File

@@ -0,0 +1,46 @@
# Transactions Topic Documentation
This document provides an overview of the `transactions` topic in the Redpanda cluster. The topic is designed to capture autogenerated transaction events with various attributes.
## Schema Overview
Each message in the `transactions` topic adheres to the following JSON schema:
```json
{
"email": "string",
"index": "integer",
"price": "string",
"product_url": "string",
"timestamp": "string",
"user_id": "integer"
}
```
- **email**: The email address of the user involved in the transaction.
- **index**: A numeric index associated with the transaction. This could represent the position or order of the transaction in a sequence.
- **price**: A string representing the price of the product. It includes a currency code (e.g., "XXX") followed by the amount.
- **product_url**: A URL that points to the product involved in the transaction.
- **timestamp**: The timestamp of when the transaction occurred, formatted in ISO 8601.
- **user_id**: A numeric identifier for the user. This is typically a unique ID assigned to each user in the system.
## Example message
```json
{
"email": "wzieme@ykczius.edu",
"index": 0,
"price": "XXX 5651308.100000",
"product_url": "http://yjomdta.top/DxvGsCn.php",
"timestamp": "2024-08-16T15:51:19.799474084Z",
"user_id": 1
}
```
## Use cases
You can use the `transactions` topic for various purposes, including:
- **Analytics**: Tracking and analyzing user transactions to understand buying behavior, popular products, etc.
- **Monitoring**: Observing transaction patterns to detect anomalies, such as unusual spikes or drops in transaction volume.
- **Data Processing**: Feeding transaction data into other systems, such as a data warehouse or real-time processing pipelines, for further processing and analysis.

View File

@@ -0,0 +1,73 @@
= Modify the Wasm Transform in the Quickstart
This directory contains the Go source code (`transform.go`) for the data transform that is used in the Redpanda Self-Managed quickstart.
If you're following the quickstart, you *do not* need to modify or rebuild this code. The Docker Compose configuration automatically deploys a pre-built transform called `regex.wasm`.
However, if you want to customize the data transform logic, continue reading.
== Why customize the transform?
- **Custom filtering**: Filter by a different regex or apply multiple conditions.
- **Data manipulation**: Transform records before writing them out. For example, redacting sensitive data or combining fields.
- **Extended functionality**: Add advanced logging, error handling, or multi-topic routing.
== Prerequisites
You need the following:
- At least Go 1.20 installed.
+
[source,bash]
----
go version
----
- The Redpanda CLI (`rpk`) installed.
- A running Redpanda cluster. If you're using the local quickstart with Docker Compose, ensure the cluster is up and running. Or, point `rpk` to another Redpanda environment.
== Modify and deploy your transform
. Open link:transform.go[transform.go] and make your changes. For example:
+
--
- Change the regex logic to handle different use cases.
- Add environment variables to control new features.
- Extend the `doRegexFilter()` function to manipulate records.
--
. Compile your Go code into a `.wasm` file:
+
[source,bash]
----
rpk transform build
----
+
This command compiles your Go source and produces a `.wasm` file that you can deploy to Redpanda.
. Deploy the new transform.
+
If your Docker Compose setup already has a service to deploy the transform, you can restart that service.
+
Otherwise, you can deploy your updated `.wasm` manually using `rpk transform deploy`.
. Produce messages into the input topic. For example:
+
[source,bash]
----
echo '{"key":"alice@university.edu","value":"test message"}' | rpk topic produce logins
----
. Consume from the output topic. For example:
+
[source,bash]
----
rpk topic consume edu-filtered-domains --num 1
----
== Suggested reading
- link:https://docs.redpanda.com/current/reference/rpk/[Redpanda `rpk` CLI Reference^].
- link:https://docs.redpanda.com/current/develop/data-transforms/build/[Develop Data Transforms^].
- https://golang.org/ref/mod[Go Modules^] for managing dependencies and builds in Go.
- https://docs.docker.com/compose/[Docker Compose^] for customizing your environment.

View File

@@ -0,0 +1,2 @@
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 h1:KxgHJZsHsrT3YX7DMpu/vJN4TZN3KFm1jzrCFLyOepA=
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0/go.mod h1:QGgiwwf/BIsD1b7EiyQ/Apzw+RLSpasRDdpOCiefQFQ=

View File

@@ -0,0 +1,122 @@
package main
// This data transform filters records based on a customizable regex pattern.
// If a record's key or value
// (determined by an environment variable) matches the specified regex,
// the record is forwarded to the output.
// Otherwise, it is dropped.
//
// Usage:
// 1. Provide the following environment variables in your Docker or configuration setup:
// - PATTERN : (required) a regular expression that determines what you want to match.
// - MATCH_VALUE : (optional) a boolean to decide whether to check the record value. If false,
// the record key is checked. Default is false.
//
// Example environment variables:
// PATTERN=".*\\.edu$"
// MATCH_VALUE="true"
//
// Logs:
// This transform logs information about each record and whether it matched.
// The logs appear in the _redpanda.transform_logs topic, so you can debug how your records are being processed.
//
// Build instructions:
// go mod tidy
// rpk transform build
//
// For more details on building transforms with the Redpanda SDK, see:
// https://docs.redpanda.com/current/develop/data-transforms
//
import (
"log"
"os"
"regexp"
"strings"
"github.com/redpanda-data/redpanda/src/transform-sdk/go/transform"
)
var (
re *regexp.Regexp
checkValue bool
)
func isTrueVar(v string) bool {
switch strings.ToLower(v) {
case "yes", "ok", "1", "true":
return true
default:
return false
}
}
// The main() function runs only once at startup. It performs all initialization steps:
// - Reads and compiles the regex pattern.
// - Determines whether to match on the key or value.
// - Registers the doRegexFilter() function to process records.
func main() {
// Set logging preferences, including timestamp and UTC time.
log.SetPrefix("[regex-transform] ")
log.SetFlags(log.Ldate | log.Ltime | log.LUTC | log.Lmicroseconds)
// Start logging the transformation process
log.Println("Starting transform...")
// Read the PATTERN environment variable to get the regex pattern.
pattern, ok := os.LookupEnv("PATTERN")
if !ok {
log.Fatal("Missing PATTERN environment variable")
}
// Log the regex pattern being used.
log.Printf("Using PATTERN: %q\n", pattern)
// Compile the regex pattern for later use.
re = regexp.MustCompile(pattern)
// Read the MATCH_VALUE environment variable to determine whether to check the record's value.
mk, ok := os.LookupEnv("MATCH_VALUE")
checkValue = ok && isTrueVar(mk)
log.Printf("MATCH_VALUE set to: %t\n", checkValue)
log.Println("Initialization complete, waiting for records...")
// Listen for records to be written, calling doRegexFilter() for each record.
transform.OnRecordWritten(doRegexFilter)
}
// The doRegexFilter() function executes each time a new record is written.
// It checks whether the record's key or value (based on MATCH_VALUE) matches the compiled regex.
// If it matches, the record is forwarded, if not, it's dropped.
func doRegexFilter(e transform.WriteEvent, w transform.RecordWriter) error {
// This stores the data to be checked (either the key or value).
var dataToCheck []byte
// Depending on the MATCH_VALUE environment variable, decide whether to check the record's key or value.
if checkValue {
// Use the value of the record if MATCH_VALUE is true.
dataToCheck = e.Record().Value
log.Printf("Checking record value: %s\n", string(dataToCheck))
} else {
// Use the key of the record if MATCH_VALUE is false.
dataToCheck = e.Record().Key
log.Printf("Checking record key: %s\n", string(dataToCheck))
}
// If there is no key or value to check, log and skip the record.
if dataToCheck == nil {
log.Println("Record has no key/value to check, skipping.")
return nil
}
// Check if the data matches the regex pattern.
pass := re.Match(dataToCheck)
if pass {
// If the record matches the pattern, log and write the record to the output topic.
log.Printf("Record matched pattern, passing through. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
return w.Write(e.Record())
} else {
// If the record does not match the pattern, log and drop the record.
log.Printf("Record did not match pattern, dropping. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
// Do not write the record if it doesn't match the pattern.
return nil
}
}

View File

@@ -0,0 +1,33 @@
# Transform metadata used by the rpk transform build command.
# This metadata file tells rpk:
# 1) The transforms display name, which also becomes the base for the .wasm file name.
# 2) A brief description of what it does.
# 3) Defaults for environment variables.
# 4) Input and output topics (if you want to define them here rather than in the deploy command).
# Human-readable name of the transform. rpk transform build uses this for the generated .wasm file.
name: regex
description: |
Filters the input topic to records that only match a regular expression.
Regular expressions are implemented using Go's regexp library, which uses the syntax of RE2.
See the RE2 wiki for allowed syntax: https://github.com/google/re2/wiki/Syntax
Environment variables:
- PATTERN: The regular expression that will match against records (required).
- MATCH_VALUE: By default, the regex matches keys, but if set to "true", the regex matches values.
# By default, no input topic is set here. (You can set it in your deploy command if preferred.)
input-topic: ""
# By default, no output topic is set here. (You can set it in your deploy command if preferred.)
output-topic: ""
# Indicates the specific TinyGo environment used to compile your transform.
language: tinygo-no-goroutines
env:
# The PATTERN variable must be provided at deploy time.
# Example: --var=PATTERN=".*@example.com"
PATTERN: '<required>'

View File

@@ -0,0 +1,24 @@
# This file configures `rpk` to connect to a remote Redpanda cluster running in the same local network as `rpk`.
# Configuration for connecting to the Kafka API of the Redpanda cluster.
kafka_api:
# SASL (Simple Authentication and Security Layer) settings for authentication.
sasl:
user: superuser # The username used for authentication
password: secretpassword # The password associated with the username
mechanism: scram-sha-256 # Authentication mechanism; SCRAM-SHA-256 provides secure password-based authentication
# List of Kafka brokers in the Redpanda cluster.
# These brokers ensure high availability and fault tolerance for Kafka-based communication.
brokers:
- 127.0.0.1:19092 # Broker 1: Accessible on localhost, port 19092
- 127.0.0.1:29092 # Broker 2: Accessible on localhost, port 29092
- 127.0.0.1:39092 # Broker 3: Accessible on localhost, port 39092
# Configuration for connecting to the Redpanda Admin API.
# The Admin API allows you to perform administrative tasks such as managing configurations, monitoring, and scaling.
admin_api:
# List of Admin API endpoints for managing the cluster.
addresses:
- 127.0.0.1:19644 # Admin API for Broker 1: Accessible on localhost, port 19644
- 127.0.0.1:29644 # Admin API for Broker 2: Accessible on localhost, port 29644
- 127.0.0.1:39644 # Admin API for Broker 3: Accessible on localhost, port 39644

View File

@@ -0,0 +1,37 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Transactions",
"type": "object",
"properties": {
"email": {
"type": "string",
"format": "email",
"description": "The email address of the user involved in the transaction."
},
"index": {
"type": "integer",
"description": "A numeric index associated with the transaction."
},
"price": {
"type": "string",
"pattern": "^XXX \\d+\\.\\d{6}$",
"description": "A string representing the price of the product, including a currency code followed by the amount."
},
"product_url": {
"type": "string",
"format": "uri",
"description": "A URL that points to the product involved in the transaction."
},
"timestamp": {
"type": "string",
"format": "date-time",
"description": "The timestamp of when the transaction occurred, formatted in ISO 8601."
},
"user_id": {
"type": "integer",
"description": "A numeric identifier for the user."
}
},
"required": ["email", "index", "price", "product_url", "timestamp", "user_id"],
"additionalProperties": false
}

View File

@@ -0,0 +1,46 @@
# Transactions Topic Documentation
This document provides an overview of the `transactions` topic in the Redpanda cluster. The topic is designed to capture autogenerated transaction events with various attributes.
## Schema Overview
Each message in the `transactions` topic adheres to the following JSON schema:
```json
{
"email": "string",
"index": "integer",
"price": "string",
"product_url": "string",
"timestamp": "string",
"user_id": "integer"
}
```
- **email**: The email address of the user involved in the transaction.
- **index**: A numeric index associated with the transaction. This could represent the position or order of the transaction in a sequence.
- **price**: A string representing the price of the product. It includes a currency code (e.g., "XXX") followed by the amount.
- **product_url**: A URL that points to the product involved in the transaction.
- **timestamp**: The timestamp of when the transaction occurred, formatted in ISO 8601.
- **user_id**: A numeric identifier for the user. This is typically a unique ID assigned to each user in the system.
## Example message
```json
{
"email": "wzieme@ykczius.edu",
"index": 0,
"price": "XXX 5651308.100000",
"product_url": "http://yjomdta.top/DxvGsCn.php",
"timestamp": "2024-08-16T15:51:19.799474084Z",
"user_id": 1
}
```
## Use cases
You can use the `transactions` topic for various purposes, including:
- **Analytics**: Tracking and analyzing user transactions to understand buying behavior, popular products, etc.
- **Monitoring**: Observing transaction patterns to detect anomalies, such as unusual spikes or drops in transaction volume.
- **Data Processing**: Feeding transaction data into other systems, such as a data warehouse or real-time processing pipelines, for further processing and analysis.

View File

@@ -0,0 +1,73 @@
= Modify the Wasm Transform in the Quickstart
This directory contains the Go source code (`transform.go`) for the data transform that is used in the Redpanda Self-Managed quickstart.
If you're following the quickstart, you *do not* need to modify or rebuild this code. The Docker Compose configuration automatically deploys a pre-built transform called `regex.wasm`.
However, if you want to customize the data transform logic, continue reading.
== Why customize the transform?
- **Custom filtering**: Filter by a different regex or apply multiple conditions.
- **Data manipulation**: Transform records before writing them out. For example, redacting sensitive data or combining fields.
- **Extended functionality**: Add advanced logging, error handling, or multi-topic routing.
== Prerequisites
You need the following:
- At least Go 1.20 installed.
+
[source,bash]
----
go version
----
- The Redpanda CLI (`rpk`) installed.
- A running Redpanda cluster. If you're using the local quickstart with Docker Compose, ensure the cluster is up and running. Or, point `rpk` to another Redpanda environment.
== Modify and deploy your transform
. Open link:transform.go[transform.go] and make your changes. For example:
+
--
- Change the regex logic to handle different use cases.
- Add environment variables to control new features.
- Extend the `doRegexFilter()` function to manipulate records.
--
. Compile your Go code into a `.wasm` file:
+
[source,bash]
----
rpk transform build
----
+
This command compiles your Go source and produces a `.wasm` file that you can deploy to Redpanda.
. Deploy the new transform.
+
If your Docker Compose setup already has a service to deploy the transform, you can restart that service.
+
Otherwise, you can deploy your updated `.wasm` manually using `rpk transform deploy`.
. Produce messages into the input topic. For example:
+
[source,bash]
----
echo '{"key":"alice@university.edu","value":"test message"}' | rpk topic produce logins
----
. Consume from the output topic. For example:
+
[source,bash]
----
rpk topic consume edu-filtered-domains --num 1
----
== Suggested reading
- link:https://docs.redpanda.com/current/reference/rpk/[Redpanda `rpk` CLI Reference^].
- link:https://docs.redpanda.com/current/develop/data-transforms/build/[Develop Data Transforms^].
- https://golang.org/ref/mod[Go Modules^] for managing dependencies and builds in Go.
- https://docs.docker.com/compose/[Docker Compose^] for customizing your environment.

View File

@@ -0,0 +1,2 @@
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 h1:KxgHJZsHsrT3YX7DMpu/vJN4TZN3KFm1jzrCFLyOepA=
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0/go.mod h1:QGgiwwf/BIsD1b7EiyQ/Apzw+RLSpasRDdpOCiefQFQ=

View File

@@ -0,0 +1,122 @@
package main
// This data transform filters records based on a customizable regex pattern.
// If a record's key or value
// (determined by an environment variable) matches the specified regex,
// the record is forwarded to the output.
// Otherwise, it is dropped.
//
// Usage:
// 1. Provide the following environment variables in your Docker or configuration setup:
// - PATTERN : (required) a regular expression that determines what you want to match.
// - MATCH_VALUE : (optional) a boolean to decide whether to check the record value. If false,
// the record key is checked. Default is false.
//
// Example environment variables:
// PATTERN=".*\\.edu$"
// MATCH_VALUE="true"
//
// Logs:
// This transform logs information about each record and whether it matched.
// The logs appear in the _redpanda.transform_logs topic, so you can debug how your records are being processed.
//
// Build instructions:
// go mod tidy
// rpk transform build
//
// For more details on building transforms with the Redpanda SDK, see:
// https://docs.redpanda.com/current/develop/data-transforms
//
import (
"log"
"os"
"regexp"
"strings"
"github.com/redpanda-data/redpanda/src/transform-sdk/go/transform"
)
var (
re *regexp.Regexp
checkValue bool
)
func isTrueVar(v string) bool {
switch strings.ToLower(v) {
case "yes", "ok", "1", "true":
return true
default:
return false
}
}
// The main() function runs only once at startup. It performs all initialization steps:
// - Reads and compiles the regex pattern.
// - Determines whether to match on the key or value.
// - Registers the doRegexFilter() function to process records.
func main() {
// Set logging preferences, including timestamp and UTC time.
log.SetPrefix("[regex-transform] ")
log.SetFlags(log.Ldate | log.Ltime | log.LUTC | log.Lmicroseconds)
// Start logging the transformation process
log.Println("Starting transform...")
// Read the PATTERN environment variable to get the regex pattern.
pattern, ok := os.LookupEnv("PATTERN")
if !ok {
log.Fatal("Missing PATTERN environment variable")
}
// Log the regex pattern being used.
log.Printf("Using PATTERN: %q\n", pattern)
// Compile the regex pattern for later use.
re = regexp.MustCompile(pattern)
// Read the MATCH_VALUE environment variable to determine whether to check the record's value.
mk, ok := os.LookupEnv("MATCH_VALUE")
checkValue = ok && isTrueVar(mk)
log.Printf("MATCH_VALUE set to: %t\n", checkValue)
log.Println("Initialization complete, waiting for records...")
// Listen for records to be written, calling doRegexFilter() for each record.
transform.OnRecordWritten(doRegexFilter)
}
// The doRegexFilter() function executes each time a new record is written.
// It checks whether the record's key or value (based on MATCH_VALUE) matches the compiled regex.
// If it matches, the record is forwarded, if not, it's dropped.
func doRegexFilter(e transform.WriteEvent, w transform.RecordWriter) error {
// This stores the data to be checked (either the key or value).
var dataToCheck []byte
// Depending on the MATCH_VALUE environment variable, decide whether to check the record's key or value.
if checkValue {
// Use the value of the record if MATCH_VALUE is true.
dataToCheck = e.Record().Value
log.Printf("Checking record value: %s\n", string(dataToCheck))
} else {
// Use the key of the record if MATCH_VALUE is false.
dataToCheck = e.Record().Key
log.Printf("Checking record key: %s\n", string(dataToCheck))
}
// If there is no key or value to check, log and skip the record.
if dataToCheck == nil {
log.Println("Record has no key/value to check, skipping.")
return nil
}
// Check if the data matches the regex pattern.
pass := re.Match(dataToCheck)
if pass {
// If the record matches the pattern, log and write the record to the output topic.
log.Printf("Record matched pattern, passing through. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
return w.Write(e.Record())
} else {
// If the record does not match the pattern, log and drop the record.
log.Printf("Record did not match pattern, dropping. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
// Do not write the record if it doesn't match the pattern.
return nil
}
}

View File

@@ -0,0 +1,33 @@
# Transform metadata used by the rpk transform build command.
# This metadata file tells rpk:
# 1) The transforms display name, which also becomes the base for the .wasm file name.
# 2) A brief description of what it does.
# 3) Defaults for environment variables.
# 4) Input and output topics (if you want to define them here rather than in the deploy command).
# Human-readable name of the transform. rpk transform build uses this for the generated .wasm file.
name: regex
description: |
Filters the input topic to records that only match a regular expression.
Regular expressions are implemented using Go's regexp library, which uses the syntax of RE2.
See the RE2 wiki for allowed syntax: https://github.com/google/re2/wiki/Syntax
Environment variables:
- PATTERN: The regular expression that will match against records (required).
- MATCH_VALUE: By default, the regex matches keys, but if set to "true", the regex matches values.
# By default, no input topic is set here. (You can set it in your deploy command if preferred.)
input-topic: ""
# By default, no output topic is set here. (You can set it in your deploy command if preferred.)
output-topic: ""
# Indicates the specific TinyGo environment used to compile your transform.
language: tinygo-no-goroutines
env:
# The PATTERN variable must be provided at deploy time.
# Example: --var=PATTERN=".*@example.com"
PATTERN: '<required>'

View File

@@ -0,0 +1,24 @@
# This file configures `rpk` to connect to a remote Redpanda cluster running in the same local network as `rpk`.
# Configuration for connecting to the Kafka API of the Redpanda cluster.
kafka_api:
# SASL (Simple Authentication and Security Layer) settings for authentication.
sasl:
user: superuser # The username used for authentication
password: secretpassword # The password associated with the username
mechanism: scram-sha-256 # Authentication mechanism; SCRAM-SHA-256 provides secure password-based authentication
# List of Kafka brokers in the Redpanda cluster.
# These brokers ensure high availability and fault tolerance for Kafka-based communication.
brokers:
- 127.0.0.1:19092 # Broker 1: Accessible on localhost, port 19092
- 127.0.0.1:29092 # Broker 2: Accessible on localhost, port 29092
- 127.0.0.1:39092 # Broker 3: Accessible on localhost, port 39092
# Configuration for connecting to the Redpanda Admin API.
# The Admin API allows you to perform administrative tasks such as managing configurations, monitoring, and scaling.
admin_api:
# List of Admin API endpoints for managing the cluster.
addresses:
- 127.0.0.1:19644 # Admin API for Broker 1: Accessible on localhost, port 19644
- 127.0.0.1:29644 # Admin API for Broker 2: Accessible on localhost, port 29644
- 127.0.0.1:39644 # Admin API for Broker 3: Accessible on localhost, port 39644

View File

@@ -0,0 +1,37 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Transactions",
"type": "object",
"properties": {
"email": {
"type": "string",
"format": "email",
"description": "The email address of the user involved in the transaction."
},
"index": {
"type": "integer",
"description": "A numeric index associated with the transaction."
},
"price": {
"type": "string",
"pattern": "^XXX \\d+\\.\\d{6}$",
"description": "A string representing the price of the product, including a currency code followed by the amount."
},
"product_url": {
"type": "string",
"format": "uri",
"description": "A URL that points to the product involved in the transaction."
},
"timestamp": {
"type": "string",
"format": "date-time",
"description": "The timestamp of when the transaction occurred, formatted in ISO 8601."
},
"user_id": {
"type": "integer",
"description": "A numeric identifier for the user."
}
},
"required": ["email", "index", "price", "product_url", "timestamp", "user_id"],
"additionalProperties": false
}

View File

@@ -0,0 +1,46 @@
# Transactions Topic Documentation
This document provides an overview of the `transactions` topic in the Redpanda cluster. The topic is designed to capture autogenerated transaction events with various attributes.
## Schema Overview
Each message in the `transactions` topic adheres to the following JSON schema:
```json
{
"email": "string",
"index": "integer",
"price": "string",
"product_url": "string",
"timestamp": "string",
"user_id": "integer"
}
```
- **email**: The email address of the user involved in the transaction.
- **index**: A numeric index associated with the transaction. This could represent the position or order of the transaction in a sequence.
- **price**: A string representing the price of the product. It includes a currency code (e.g., "XXX") followed by the amount.
- **product_url**: A URL that points to the product involved in the transaction.
- **timestamp**: The timestamp of when the transaction occurred, formatted in ISO 8601.
- **user_id**: A numeric identifier for the user. This is typically a unique ID assigned to each user in the system.
## Example message
```json
{
"email": "wzieme@ykczius.edu",
"index": 0,
"price": "XXX 5651308.100000",
"product_url": "http://yjomdta.top/DxvGsCn.php",
"timestamp": "2024-08-16T15:51:19.799474084Z",
"user_id": 1
}
```
## Use cases
You can use the `transactions` topic for various purposes, including:
- **Analytics**: Tracking and analyzing user transactions to understand buying behavior, popular products, etc.
- **Monitoring**: Observing transaction patterns to detect anomalies, such as unusual spikes or drops in transaction volume.
- **Data Processing**: Feeding transaction data into other systems, such as a data warehouse or real-time processing pipelines, for further processing and analysis.

View File

@@ -0,0 +1,73 @@
= Modify the Wasm Transform in the Quickstart
This directory contains the Go source code (`transform.go`) for the data transform that is used in the Redpanda Self-Managed quickstart.
If you're following the quickstart, you *do not* need to modify or rebuild this code. The Docker Compose configuration automatically deploys a pre-built transform called `regex.wasm`.
However, if you want to customize the data transform logic, continue reading.
== Why customize the transform?
- **Custom filtering**: Filter by a different regex or apply multiple conditions.
- **Data manipulation**: Transform records before writing them out. For example, redacting sensitive data or combining fields.
- **Extended functionality**: Add advanced logging, error handling, or multi-topic routing.
== Prerequisites
You need the following:
- At least Go 1.20 installed.
+
[source,bash]
----
go version
----
- The Redpanda CLI (`rpk`) installed.
- A running Redpanda cluster. If you're using the local quickstart with Docker Compose, ensure the cluster is up and running. Or, point `rpk` to another Redpanda environment.
== Modify and deploy your transform
. Open link:transform.go[transform.go] and make your changes. For example:
+
--
- Change the regex logic to handle different use cases.
- Add environment variables to control new features.
- Extend the `doRegexFilter()` function to manipulate records.
--
. Compile your Go code into a `.wasm` file:
+
[source,bash]
----
rpk transform build
----
+
This command compiles your Go source and produces a `.wasm` file that you can deploy to Redpanda.
. Deploy the new transform.
+
If your Docker Compose setup already has a service to deploy the transform, you can restart that service.
+
Otherwise, you can deploy your updated `.wasm` manually using `rpk transform deploy`.
. Produce messages into the input topic. For example:
+
[source,bash]
----
echo '{"key":"alice@university.edu","value":"test message"}' | rpk topic produce logins
----
. Consume from the output topic. For example:
+
[source,bash]
----
rpk topic consume edu-filtered-domains --num 1
----
== Suggested reading
- link:https://docs.redpanda.com/current/reference/rpk/[Redpanda `rpk` CLI Reference^].
- link:https://docs.redpanda.com/current/develop/data-transforms/build/[Develop Data Transforms^].
- https://golang.org/ref/mod[Go Modules^] for managing dependencies and builds in Go.
- https://docs.docker.com/compose/[Docker Compose^] for customizing your environment.

View File

@@ -0,0 +1,2 @@
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 h1:KxgHJZsHsrT3YX7DMpu/vJN4TZN3KFm1jzrCFLyOepA=
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0/go.mod h1:QGgiwwf/BIsD1b7EiyQ/Apzw+RLSpasRDdpOCiefQFQ=

View File

@@ -0,0 +1,122 @@
package main
// This data transform filters records based on a customizable regex pattern.
// If a record's key or value
// (determined by an environment variable) matches the specified regex,
// the record is forwarded to the output.
// Otherwise, it is dropped.
//
// Usage:
// 1. Provide the following environment variables in your Docker or configuration setup:
// - PATTERN : (required) a regular expression that determines what you want to match.
// - MATCH_VALUE : (optional) a boolean to decide whether to check the record value. If false,
// the record key is checked. Default is false.
//
// Example environment variables:
// PATTERN=".*\\.edu$"
// MATCH_VALUE="true"
//
// Logs:
// This transform logs information about each record and whether it matched.
// The logs appear in the _redpanda.transform_logs topic, so you can debug how your records are being processed.
//
// Build instructions:
// go mod tidy
// rpk transform build
//
// For more details on building transforms with the Redpanda SDK, see:
// https://docs.redpanda.com/current/develop/data-transforms
//
import (
"log"
"os"
"regexp"
"strings"
"github.com/redpanda-data/redpanda/src/transform-sdk/go/transform"
)
var (
re *regexp.Regexp
checkValue bool
)
func isTrueVar(v string) bool {
switch strings.ToLower(v) {
case "yes", "ok", "1", "true":
return true
default:
return false
}
}
// The main() function runs only once at startup. It performs all initialization steps:
// - Reads and compiles the regex pattern.
// - Determines whether to match on the key or value.
// - Registers the doRegexFilter() function to process records.
func main() {
// Set logging preferences, including timestamp and UTC time.
log.SetPrefix("[regex-transform] ")
log.SetFlags(log.Ldate | log.Ltime | log.LUTC | log.Lmicroseconds)
// Start logging the transformation process
log.Println("Starting transform...")
// Read the PATTERN environment variable to get the regex pattern.
pattern, ok := os.LookupEnv("PATTERN")
if !ok {
log.Fatal("Missing PATTERN environment variable")
}
// Log the regex pattern being used.
log.Printf("Using PATTERN: %q\n", pattern)
// Compile the regex pattern for later use.
re = regexp.MustCompile(pattern)
// Read the MATCH_VALUE environment variable to determine whether to check the record's value.
mk, ok := os.LookupEnv("MATCH_VALUE")
checkValue = ok && isTrueVar(mk)
log.Printf("MATCH_VALUE set to: %t\n", checkValue)
log.Println("Initialization complete, waiting for records...")
// Listen for records to be written, calling doRegexFilter() for each record.
transform.OnRecordWritten(doRegexFilter)
}
// The doRegexFilter() function executes each time a new record is written.
// It checks whether the record's key or value (based on MATCH_VALUE) matches the compiled regex.
// If it matches, the record is forwarded, if not, it's dropped.
func doRegexFilter(e transform.WriteEvent, w transform.RecordWriter) error {
// This stores the data to be checked (either the key or value).
var dataToCheck []byte
// Depending on the MATCH_VALUE environment variable, decide whether to check the record's key or value.
if checkValue {
// Use the value of the record if MATCH_VALUE is true.
dataToCheck = e.Record().Value
log.Printf("Checking record value: %s\n", string(dataToCheck))
} else {
// Use the key of the record if MATCH_VALUE is false.
dataToCheck = e.Record().Key
log.Printf("Checking record key: %s\n", string(dataToCheck))
}
// If there is no key or value to check, log and skip the record.
if dataToCheck == nil {
log.Println("Record has no key/value to check, skipping.")
return nil
}
// Check if the data matches the regex pattern.
pass := re.Match(dataToCheck)
if pass {
// If the record matches the pattern, log and write the record to the output topic.
log.Printf("Record matched pattern, passing through. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
return w.Write(e.Record())
} else {
// If the record does not match the pattern, log and drop the record.
log.Printf("Record did not match pattern, dropping. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
// Do not write the record if it doesn't match the pattern.
return nil
}
}

View File

@@ -0,0 +1,33 @@
# Transform metadata used by the rpk transform build command.
# This metadata file tells rpk:
# 1) The transforms display name, which also becomes the base for the .wasm file name.
# 2) A brief description of what it does.
# 3) Defaults for environment variables.
# 4) Input and output topics (if you want to define them here rather than in the deploy command).
# Human-readable name of the transform. rpk transform build uses this for the generated .wasm file.
name: regex
description: |
Filters the input topic to records that only match a regular expression.
Regular expressions are implemented using Go's regexp library, which uses the syntax of RE2.
See the RE2 wiki for allowed syntax: https://github.com/google/re2/wiki/Syntax
Environment variables:
- PATTERN: The regular expression that will match against records (required).
- MATCH_VALUE: By default, the regex matches keys, but if set to "true", the regex matches values.
# By default, no input topic is set here. (You can set it in your deploy command if preferred.)
input-topic: ""
# By default, no output topic is set here. (You can set it in your deploy command if preferred.)
output-topic: ""
# Indicates the specific TinyGo environment used to compile your transform.
language: tinygo-no-goroutines
env:
# The PATTERN variable must be provided at deploy time.
# Example: --var=PATTERN=".*@example.com"
PATTERN: '<required>'

View File

@@ -0,0 +1,24 @@
# This file configures `rpk` to connect to a remote Redpanda cluster running in the same local network as `rpk`.
# Configuration for connecting to the Kafka API of the Redpanda cluster.
kafka_api:
# SASL (Simple Authentication and Security Layer) settings for authentication.
sasl:
user: superuser # The username used for authentication
password: secretpassword # The password associated with the username
mechanism: scram-sha-256 # Authentication mechanism; SCRAM-SHA-256 provides secure password-based authentication
# List of Kafka brokers in the Redpanda cluster.
# These brokers ensure high availability and fault tolerance for Kafka-based communication.
brokers:
- 127.0.0.1:19092 # Broker 1: Accessible on localhost, port 19092
- 127.0.0.1:29092 # Broker 2: Accessible on localhost, port 29092
- 127.0.0.1:39092 # Broker 3: Accessible on localhost, port 39092
# Configuration for connecting to the Redpanda Admin API.
# The Admin API allows you to perform administrative tasks such as managing configurations, monitoring, and scaling.
admin_api:
# List of Admin API endpoints for managing the cluster.
addresses:
- 127.0.0.1:19644 # Admin API for Broker 1: Accessible on localhost, port 19644
- 127.0.0.1:29644 # Admin API for Broker 2: Accessible on localhost, port 29644
- 127.0.0.1:39644 # Admin API for Broker 3: Accessible on localhost, port 39644

View File

@@ -0,0 +1,37 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Transactions",
"type": "object",
"properties": {
"email": {
"type": "string",
"format": "email",
"description": "The email address of the user involved in the transaction."
},
"index": {
"type": "integer",
"description": "A numeric index associated with the transaction."
},
"price": {
"type": "string",
"pattern": "^XXX \\d+\\.\\d{6}$",
"description": "A string representing the price of the product, including a currency code followed by the amount."
},
"product_url": {
"type": "string",
"format": "uri",
"description": "A URL that points to the product involved in the transaction."
},
"timestamp": {
"type": "string",
"format": "date-time",
"description": "The timestamp of when the transaction occurred, formatted in ISO 8601."
},
"user_id": {
"type": "integer",
"description": "A numeric identifier for the user."
}
},
"required": ["email", "index", "price", "product_url", "timestamp", "user_id"],
"additionalProperties": false
}

View File

@@ -0,0 +1,46 @@
# Transactions Topic Documentation
This document provides an overview of the `transactions` topic in the Redpanda cluster. The topic is designed to capture autogenerated transaction events with various attributes.
## Schema Overview
Each message in the `transactions` topic adheres to the following JSON schema:
```json
{
"email": "string",
"index": "integer",
"price": "string",
"product_url": "string",
"timestamp": "string",
"user_id": "integer"
}
```
- **email**: The email address of the user involved in the transaction.
- **index**: A numeric index associated with the transaction. This could represent the position or order of the transaction in a sequence.
- **price**: A string representing the price of the product. It includes a currency code (e.g., "XXX") followed by the amount.
- **product_url**: A URL that points to the product involved in the transaction.
- **timestamp**: The timestamp of when the transaction occurred, formatted in ISO 8601.
- **user_id**: A numeric identifier for the user. This is typically a unique ID assigned to each user in the system.
## Example message
```json
{
"email": "wzieme@ykczius.edu",
"index": 0,
"price": "XXX 5651308.100000",
"product_url": "http://yjomdta.top/DxvGsCn.php",
"timestamp": "2024-08-16T15:51:19.799474084Z",
"user_id": 1
}
```
## Use cases
You can use the `transactions` topic for various purposes, including:
- **Analytics**: Tracking and analyzing user transactions to understand buying behavior, popular products, etc.
- **Monitoring**: Observing transaction patterns to detect anomalies, such as unusual spikes or drops in transaction volume.
- **Data Processing**: Feeding transaction data into other systems, such as a data warehouse or real-time processing pipelines, for further processing and analysis.

View File

@@ -0,0 +1,73 @@
= Modify the Wasm Transform in the Quickstart
This directory contains the Go source code (`transform.go`) for the data transform that is used in the Redpanda Self-Managed quickstart.
If you're following the quickstart, you *do not* need to modify or rebuild this code. The Docker Compose configuration automatically deploys a pre-built transform called `regex.wasm`.
However, if you want to customize the data transform logic, continue reading.
== Why customize the transform?
- **Custom filtering**: Filter by a different regex or apply multiple conditions.
- **Data manipulation**: Transform records before writing them out. For example, redacting sensitive data or combining fields.
- **Extended functionality**: Add advanced logging, error handling, or multi-topic routing.
== Prerequisites
You need the following:
- At least Go 1.20 installed.
+
[source,bash]
----
go version
----
- The Redpanda CLI (`rpk`) installed.
- A running Redpanda cluster. If you're using the local quickstart with Docker Compose, ensure the cluster is up and running. Or, point `rpk` to another Redpanda environment.
== Modify and deploy your transform
. Open link:transform.go[transform.go] and make your changes. For example:
+
--
- Change the regex logic to handle different use cases.
- Add environment variables to control new features.
- Extend the `doRegexFilter()` function to manipulate records.
--
. Compile your Go code into a `.wasm` file:
+
[source,bash]
----
rpk transform build
----
+
This command compiles your Go source and produces a `.wasm` file that you can deploy to Redpanda.
. Deploy the new transform.
+
If your Docker Compose setup already has a service to deploy the transform, you can restart that service.
+
Otherwise, you can deploy your updated `.wasm` manually using `rpk transform deploy`.
. Produce messages into the input topic. For example:
+
[source,bash]
----
echo '{"key":"alice@university.edu","value":"test message"}' | rpk topic produce logins
----
. Consume from the output topic. For example:
+
[source,bash]
----
rpk topic consume edu-filtered-domains --num 1
----
== Suggested reading
- link:https://docs.redpanda.com/current/reference/rpk/[Redpanda `rpk` CLI Reference^].
- link:https://docs.redpanda.com/current/develop/data-transforms/build/[Develop Data Transforms^].
- https://golang.org/ref/mod[Go Modules^] for managing dependencies and builds in Go.
- https://docs.docker.com/compose/[Docker Compose^] for customizing your environment.

View File

@@ -0,0 +1,5 @@
module regex
go 1.20
require github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0

View File

@@ -0,0 +1,2 @@
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 h1:KxgHJZsHsrT3YX7DMpu/vJN4TZN3KFm1jzrCFLyOepA=
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0/go.mod h1:QGgiwwf/BIsD1b7EiyQ/Apzw+RLSpasRDdpOCiefQFQ=

View File

@@ -0,0 +1,122 @@
package main
// This data transform filters records based on a customizable regex pattern.
// If a record's key or value
// (determined by an environment variable) matches the specified regex,
// the record is forwarded to the output.
// Otherwise, it is dropped.
//
// Usage:
// 1. Provide the following environment variables in your Docker or configuration setup:
// - PATTERN : (required) a regular expression that determines what you want to match.
// - MATCH_VALUE : (optional) a boolean to decide whether to check the record value. If false,
// the record key is checked. Default is false.
//
// Example environment variables:
// PATTERN=".*\\.edu$"
// MATCH_VALUE="true"
//
// Logs:
// This transform logs information about each record and whether it matched.
// The logs appear in the _redpanda.transform_logs topic, so you can debug how your records are being processed.
//
// Build instructions:
// go mod tidy
// rpk transform build
//
// For more details on building transforms with the Redpanda SDK, see:
// https://docs.redpanda.com/current/develop/data-transforms
//
import (
"log"
"os"
"regexp"
"strings"
"github.com/redpanda-data/redpanda/src/transform-sdk/go/transform"
)
var (
re *regexp.Regexp
checkValue bool
)
func isTrueVar(v string) bool {
switch strings.ToLower(v) {
case "yes", "ok", "1", "true":
return true
default:
return false
}
}
// The main() function runs only once at startup. It performs all initialization steps:
// - Reads and compiles the regex pattern.
// - Determines whether to match on the key or value.
// - Registers the doRegexFilter() function to process records.
func main() {
// Set logging preferences, including timestamp and UTC time.
log.SetPrefix("[regex-transform] ")
log.SetFlags(log.Ldate | log.Ltime | log.LUTC | log.Lmicroseconds)
// Start logging the transformation process
log.Println("Starting transform...")
// Read the PATTERN environment variable to get the regex pattern.
pattern, ok := os.LookupEnv("PATTERN")
if !ok {
log.Fatal("Missing PATTERN environment variable")
}
// Log the regex pattern being used.
log.Printf("Using PATTERN: %q\n", pattern)
// Compile the regex pattern for later use.
re = regexp.MustCompile(pattern)
// Read the MATCH_VALUE environment variable to determine whether to check the record's value.
mk, ok := os.LookupEnv("MATCH_VALUE")
checkValue = ok && isTrueVar(mk)
log.Printf("MATCH_VALUE set to: %t\n", checkValue)
log.Println("Initialization complete, waiting for records...")
// Listen for records to be written, calling doRegexFilter() for each record.
transform.OnRecordWritten(doRegexFilter)
}
// The doRegexFilter() function executes each time a new record is written.
// It checks whether the record's key or value (based on MATCH_VALUE) matches the compiled regex.
// If it matches, the record is forwarded, if not, it's dropped.
func doRegexFilter(e transform.WriteEvent, w transform.RecordWriter) error {
// This stores the data to be checked (either the key or value).
var dataToCheck []byte
// Depending on the MATCH_VALUE environment variable, decide whether to check the record's key or value.
if checkValue {
// Use the value of the record if MATCH_VALUE is true.
dataToCheck = e.Record().Value
log.Printf("Checking record value: %s\n", string(dataToCheck))
} else {
// Use the key of the record if MATCH_VALUE is false.
dataToCheck = e.Record().Key
log.Printf("Checking record key: %s\n", string(dataToCheck))
}
// If there is no key or value to check, log and skip the record.
if dataToCheck == nil {
log.Println("Record has no key/value to check, skipping.")
return nil
}
// Check if the data matches the regex pattern.
pass := re.Match(dataToCheck)
if pass {
// If the record matches the pattern, log and write the record to the output topic.
log.Printf("Record matched pattern, passing through. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
return w.Write(e.Record())
} else {
// If the record does not match the pattern, log and drop the record.
log.Printf("Record did not match pattern, dropping. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
// Do not write the record if it doesn't match the pattern.
return nil
}
}

View File

@@ -0,0 +1,33 @@
# Transform metadata used by the rpk transform build command.
# This metadata file tells rpk:
# 1) The transforms display name, which also becomes the base for the .wasm file name.
# 2) A brief description of what it does.
# 3) Defaults for environment variables.
# 4) Input and output topics (if you want to define them here rather than in the deploy command).
# Human-readable name of the transform. rpk transform build uses this for the generated .wasm file.
name: regex
description: |
Filters the input topic to records that only match a regular expression.
Regular expressions are implemented using Go's regexp library, which uses the syntax of RE2.
See the RE2 wiki for allowed syntax: https://github.com/google/re2/wiki/Syntax
Environment variables:
- PATTERN: The regular expression that will match against records (required).
- MATCH_VALUE: By default, the regex matches keys, but if set to "true", the regex matches values.
# By default, no input topic is set here. (You can set it in your deploy command if preferred.)
input-topic: ""
# By default, no output topic is set here. (You can set it in your deploy command if preferred.)
output-topic: ""
# Indicates the specific TinyGo environment used to compile your transform.
language: tinygo-no-goroutines
env:
# The PATTERN variable must be provided at deploy time.
# Example: --var=PATTERN=".*@example.com"
PATTERN: '<required>'

View File

@@ -0,0 +1,24 @@
# This file configures `rpk` to connect to a remote Redpanda cluster running in the same local network as `rpk`.
# Configuration for connecting to the Kafka API of the Redpanda cluster.
kafka_api:
# SASL (Simple Authentication and Security Layer) settings for authentication.
sasl:
user: superuser # The username used for authentication
password: secretpassword # The password associated with the username
mechanism: scram-sha-256 # Authentication mechanism; SCRAM-SHA-256 provides secure password-based authentication
# List of Kafka brokers in the Redpanda cluster.
# These brokers ensure high availability and fault tolerance for Kafka-based communication.
brokers:
- 127.0.0.1:19092 # Broker 1: Accessible on localhost, port 19092
- 127.0.0.1:29092 # Broker 2: Accessible on localhost, port 29092
- 127.0.0.1:39092 # Broker 3: Accessible on localhost, port 39092
# Configuration for connecting to the Redpanda Admin API.
# The Admin API allows you to perform administrative tasks such as managing configurations, monitoring, and scaling.
admin_api:
# List of Admin API endpoints for managing the cluster.
addresses:
- 127.0.0.1:19644 # Admin API for Broker 1: Accessible on localhost, port 19644
- 127.0.0.1:29644 # Admin API for Broker 2: Accessible on localhost, port 29644
- 127.0.0.1:39644 # Admin API for Broker 3: Accessible on localhost, port 39644

View File

@@ -0,0 +1,37 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Transactions",
"type": "object",
"properties": {
"email": {
"type": "string",
"format": "email",
"description": "The email address of the user involved in the transaction."
},
"index": {
"type": "integer",
"description": "A numeric index associated with the transaction."
},
"price": {
"type": "string",
"pattern": "^XXX \\d+\\.\\d{6}$",
"description": "A string representing the price of the product, including a currency code followed by the amount."
},
"product_url": {
"type": "string",
"format": "uri",
"description": "A URL that points to the product involved in the transaction."
},
"timestamp": {
"type": "string",
"format": "date-time",
"description": "The timestamp of when the transaction occurred, formatted in ISO 8601."
},
"user_id": {
"type": "integer",
"description": "A numeric identifier for the user."
}
},
"required": ["email", "index", "price", "product_url", "timestamp", "user_id"],
"additionalProperties": false
}

View File

@@ -0,0 +1,46 @@
# Transactions Topic Documentation
This document provides an overview of the `transactions` topic in the Redpanda cluster. The topic is designed to capture autogenerated transaction events with various attributes.
## Schema Overview
Each message in the `transactions` topic adheres to the following JSON schema:
```json
{
"email": "string",
"index": "integer",
"price": "string",
"product_url": "string",
"timestamp": "string",
"user_id": "integer"
}
```
- **email**: The email address of the user involved in the transaction.
- **index**: A numeric index associated with the transaction. This could represent the position or order of the transaction in a sequence.
- **price**: A string representing the price of the product. It includes a currency code (e.g., "XXX") followed by the amount.
- **product_url**: A URL that points to the product involved in the transaction.
- **timestamp**: The timestamp of when the transaction occurred, formatted in ISO 8601.
- **user_id**: A numeric identifier for the user. This is typically a unique ID assigned to each user in the system.
## Example message
```json
{
"email": "wzieme@ykczius.edu",
"index": 0,
"price": "XXX 5651308.100000",
"product_url": "http://yjomdta.top/DxvGsCn.php",
"timestamp": "2024-08-16T15:51:19.799474084Z",
"user_id": 1
}
```
## Use cases
You can use the `transactions` topic for various purposes, including:
- **Analytics**: Tracking and analyzing user transactions to understand buying behavior, popular products, etc.
- **Monitoring**: Observing transaction patterns to detect anomalies, such as unusual spikes or drops in transaction volume.
- **Data Processing**: Feeding transaction data into other systems, such as a data warehouse or real-time processing pipelines, for further processing and analysis.

View File

@@ -0,0 +1,73 @@
= Modify the Wasm Transform in the Quickstart
This directory contains the Go source code (`transform.go`) for the data transform that is used in the Redpanda Self-Managed quickstart.
If you're following the quickstart, you *do not* need to modify or rebuild this code. The Docker Compose configuration automatically deploys a pre-built transform called `regex.wasm`.
However, if you want to customize the data transform logic, continue reading.
== Why customize the transform?
- **Custom filtering**: Filter by a different regex or apply multiple conditions.
- **Data manipulation**: Transform records before writing them out. For example, redacting sensitive data or combining fields.
- **Extended functionality**: Add advanced logging, error handling, or multi-topic routing.
== Prerequisites
You need the following:
- At least Go 1.20 installed.
+
[source,bash]
----
go version
----
- The Redpanda CLI (`rpk`) installed.
- A running Redpanda cluster. If you're using the local quickstart with Docker Compose, ensure the cluster is up and running. Or, point `rpk` to another Redpanda environment.
== Modify and deploy your transform
. Open link:transform.go[transform.go] and make your changes. For example:
+
--
- Change the regex logic to handle different use cases.
- Add environment variables to control new features.
- Extend the `doRegexFilter()` function to manipulate records.
--
. Compile your Go code into a `.wasm` file:
+
[source,bash]
----
rpk transform build
----
+
This command compiles your Go source and produces a `.wasm` file that you can deploy to Redpanda.
. Deploy the new transform.
+
If your Docker Compose setup already has a service to deploy the transform, you can restart that service.
+
Otherwise, you can deploy your updated `.wasm` manually using `rpk transform deploy`.
. Produce messages into the input topic. For example:
+
[source,bash]
----
echo '{"key":"alice@university.edu","value":"test message"}' | rpk topic produce logins
----
. Consume from the output topic. For example:
+
[source,bash]
----
rpk topic consume edu-filtered-domains --num 1
----
== Suggested reading
- link:https://docs.redpanda.com/current/reference/rpk/[Redpanda `rpk` CLI Reference^].
- link:https://docs.redpanda.com/current/develop/data-transforms/build/[Develop Data Transforms^].
- https://golang.org/ref/mod[Go Modules^] for managing dependencies and builds in Go.
- https://docs.docker.com/compose/[Docker Compose^] for customizing your environment.

View File

@@ -0,0 +1,5 @@
module regex
go 1.20
require github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0

View File

@@ -0,0 +1,2 @@
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 h1:KxgHJZsHsrT3YX7DMpu/vJN4TZN3KFm1jzrCFLyOepA=
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0/go.mod h1:QGgiwwf/BIsD1b7EiyQ/Apzw+RLSpasRDdpOCiefQFQ=

View File

@@ -0,0 +1,122 @@
package main
// This data transform filters records based on a customizable regex pattern.
// If a record's key or value
// (determined by an environment variable) matches the specified regex,
// the record is forwarded to the output.
// Otherwise, it is dropped.
//
// Usage:
// 1. Provide the following environment variables in your Docker or configuration setup:
// - PATTERN : (required) a regular expression that determines what you want to match.
// - MATCH_VALUE : (optional) a boolean to decide whether to check the record value. If false,
// the record key is checked. Default is false.
//
// Example environment variables:
// PATTERN=".*\\.edu$"
// MATCH_VALUE="true"
//
// Logs:
// This transform logs information about each record and whether it matched.
// The logs appear in the _redpanda.transform_logs topic, so you can debug how your records are being processed.
//
// Build instructions:
// go mod tidy
// rpk transform build
//
// For more details on building transforms with the Redpanda SDK, see:
// https://docs.redpanda.com/current/develop/data-transforms
//
import (
"log"
"os"
"regexp"
"strings"
"github.com/redpanda-data/redpanda/src/transform-sdk/go/transform"
)
var (
re *regexp.Regexp
checkValue bool
)
func isTrueVar(v string) bool {
switch strings.ToLower(v) {
case "yes", "ok", "1", "true":
return true
default:
return false
}
}
// The main() function runs only once at startup. It performs all initialization steps:
// - Reads and compiles the regex pattern.
// - Determines whether to match on the key or value.
// - Registers the doRegexFilter() function to process records.
func main() {
// Set logging preferences, including timestamp and UTC time.
log.SetPrefix("[regex-transform] ")
log.SetFlags(log.Ldate | log.Ltime | log.LUTC | log.Lmicroseconds)
// Start logging the transformation process
log.Println("Starting transform...")
// Read the PATTERN environment variable to get the regex pattern.
pattern, ok := os.LookupEnv("PATTERN")
if !ok {
log.Fatal("Missing PATTERN environment variable")
}
// Log the regex pattern being used.
log.Printf("Using PATTERN: %q\n", pattern)
// Compile the regex pattern for later use.
re = regexp.MustCompile(pattern)
// Read the MATCH_VALUE environment variable to determine whether to check the record's value.
mk, ok := os.LookupEnv("MATCH_VALUE")
checkValue = ok && isTrueVar(mk)
log.Printf("MATCH_VALUE set to: %t\n", checkValue)
log.Println("Initialization complete, waiting for records...")
// Listen for records to be written, calling doRegexFilter() for each record.
transform.OnRecordWritten(doRegexFilter)
}
// The doRegexFilter() function executes each time a new record is written.
// It checks whether the record's key or value (based on MATCH_VALUE) matches the compiled regex.
// If it matches, the record is forwarded, if not, it's dropped.
func doRegexFilter(e transform.WriteEvent, w transform.RecordWriter) error {
// This stores the data to be checked (either the key or value).
var dataToCheck []byte
// Depending on the MATCH_VALUE environment variable, decide whether to check the record's key or value.
if checkValue {
// Use the value of the record if MATCH_VALUE is true.
dataToCheck = e.Record().Value
log.Printf("Checking record value: %s\n", string(dataToCheck))
} else {
// Use the key of the record if MATCH_VALUE is false.
dataToCheck = e.Record().Key
log.Printf("Checking record key: %s\n", string(dataToCheck))
}
// If there is no key or value to check, log and skip the record.
if dataToCheck == nil {
log.Println("Record has no key/value to check, skipping.")
return nil
}
// Check if the data matches the regex pattern.
pass := re.Match(dataToCheck)
if pass {
// If the record matches the pattern, log and write the record to the output topic.
log.Printf("Record matched pattern, passing through. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
return w.Write(e.Record())
} else {
// If the record does not match the pattern, log and drop the record.
log.Printf("Record did not match pattern, dropping. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
// Do not write the record if it doesn't match the pattern.
return nil
}
}

View File

@@ -0,0 +1,33 @@
# Transform metadata used by the rpk transform build command.
# This metadata file tells rpk:
# 1) The transforms display name, which also becomes the base for the .wasm file name.
# 2) A brief description of what it does.
# 3) Defaults for environment variables.
# 4) Input and output topics (if you want to define them here rather than in the deploy command).
# Human-readable name of the transform. rpk transform build uses this for the generated .wasm file.
name: regex
description: |
Filters the input topic to records that only match a regular expression.
Regular expressions are implemented using Go's regexp library, which uses the syntax of RE2.
See the RE2 wiki for allowed syntax: https://github.com/google/re2/wiki/Syntax
Environment variables:
- PATTERN: The regular expression that will match against records (required).
- MATCH_VALUE: By default, the regex matches keys, but if set to "true", the regex matches values.
# By default, no input topic is set here. (You can set it in your deploy command if preferred.)
input-topic: ""
# By default, no output topic is set here. (You can set it in your deploy command if preferred.)
output-topic: ""
# Indicates the specific TinyGo environment used to compile your transform.
language: tinygo-no-goroutines
env:
# The PATTERN variable must be provided at deploy time.
# Example: --var=PATTERN=".*@example.com"
PATTERN: '<required>'

View File

@@ -0,0 +1,24 @@
# This file configures `rpk` to connect to a remote Redpanda cluster running in the same local network as `rpk`.
# Configuration for connecting to the Kafka API of the Redpanda cluster.
kafka_api:
# SASL (Simple Authentication and Security Layer) settings for authentication.
sasl:
user: superuser # The username used for authentication
password: secretpassword # The password associated with the username
mechanism: scram-sha-256 # Authentication mechanism; SCRAM-SHA-256 provides secure password-based authentication
# List of Kafka brokers in the Redpanda cluster.
# These brokers ensure high availability and fault tolerance for Kafka-based communication.
brokers:
- 127.0.0.1:19092 # Broker 1: Accessible on localhost, port 19092
- 127.0.0.1:29092 # Broker 2: Accessible on localhost, port 29092
- 127.0.0.1:39092 # Broker 3: Accessible on localhost, port 39092
# Configuration for connecting to the Redpanda Admin API.
# The Admin API allows you to perform administrative tasks such as managing configurations, monitoring, and scaling.
admin_api:
# List of Admin API endpoints for managing the cluster.
addresses:
- 127.0.0.1:19644 # Admin API for Broker 1: Accessible on localhost, port 19644
- 127.0.0.1:29644 # Admin API for Broker 2: Accessible on localhost, port 29644
- 127.0.0.1:39644 # Admin API for Broker 3: Accessible on localhost, port 39644

View File

@@ -0,0 +1,37 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Transactions",
"type": "object",
"properties": {
"email": {
"type": "string",
"format": "email",
"description": "The email address of the user involved in the transaction."
},
"index": {
"type": "integer",
"description": "A numeric index associated with the transaction."
},
"price": {
"type": "string",
"pattern": "^XXX \\d+\\.\\d{6}$",
"description": "A string representing the price of the product, including a currency code followed by the amount."
},
"product_url": {
"type": "string",
"format": "uri",
"description": "A URL that points to the product involved in the transaction."
},
"timestamp": {
"type": "string",
"format": "date-time",
"description": "The timestamp of when the transaction occurred, formatted in ISO 8601."
},
"user_id": {
"type": "integer",
"description": "A numeric identifier for the user."
}
},
"required": ["email", "index", "price", "product_url", "timestamp", "user_id"],
"additionalProperties": false
}

View File

@@ -0,0 +1,46 @@
# Transactions Topic Documentation
This document provides an overview of the `transactions` topic in the Redpanda cluster. The topic is designed to capture autogenerated transaction events with various attributes.
## Schema Overview
Each message in the `transactions` topic adheres to the following JSON schema:
```json
{
"email": "string",
"index": "integer",
"price": "string",
"product_url": "string",
"timestamp": "string",
"user_id": "integer"
}
```
- **email**: The email address of the user involved in the transaction.
- **index**: A numeric index associated with the transaction. This could represent the position or order of the transaction in a sequence.
- **price**: A string representing the price of the product. It includes a currency code (e.g., "XXX") followed by the amount.
- **product_url**: A URL that points to the product involved in the transaction.
- **timestamp**: The timestamp of when the transaction occurred, formatted in ISO 8601.
- **user_id**: A numeric identifier for the user. This is typically a unique ID assigned to each user in the system.
## Example message
```json
{
"email": "wzieme@ykczius.edu",
"index": 0,
"price": "XXX 5651308.100000",
"product_url": "http://yjomdta.top/DxvGsCn.php",
"timestamp": "2024-08-16T15:51:19.799474084Z",
"user_id": 1
}
```
## Use cases
You can use the `transactions` topic for various purposes, including:
- **Analytics**: Tracking and analyzing user transactions to understand buying behavior, popular products, etc.
- **Monitoring**: Observing transaction patterns to detect anomalies, such as unusual spikes or drops in transaction volume.
- **Data Processing**: Feeding transaction data into other systems, such as a data warehouse or real-time processing pipelines, for further processing and analysis.

View File

@@ -0,0 +1,73 @@
= Modify the Wasm Transform in the Quickstart
This directory contains the Go source code (`transform.go`) for the data transform that is used in the Redpanda Self-Managed quickstart.
If you're following the quickstart, you *do not* need to modify or rebuild this code. The Docker Compose configuration automatically deploys a pre-built transform called `regex.wasm`.
However, if you want to customize the data transform logic, continue reading.
== Why customize the transform?
- **Custom filtering**: Filter by a different regex or apply multiple conditions.
- **Data manipulation**: Transform records before writing them out. For example, redacting sensitive data or combining fields.
- **Extended functionality**: Add advanced logging, error handling, or multi-topic routing.
== Prerequisites
You need the following:
- At least Go 1.20 installed.
+
[source,bash]
----
go version
----
- The Redpanda CLI (`rpk`) installed.
- A running Redpanda cluster. If you're using the local quickstart with Docker Compose, ensure the cluster is up and running. Or, point `rpk` to another Redpanda environment.
== Modify and deploy your transform
. Open link:transform.go[transform.go] and make your changes. For example:
+
--
- Change the regex logic to handle different use cases.
- Add environment variables to control new features.
- Extend the `doRegexFilter()` function to manipulate records.
--
. Compile your Go code into a `.wasm` file:
+
[source,bash]
----
rpk transform build
----
+
This command compiles your Go source and produces a `.wasm` file that you can deploy to Redpanda.
. Deploy the new transform.
+
If your Docker Compose setup already has a service to deploy the transform, you can restart that service.
+
Otherwise, you can deploy your updated `.wasm` manually using `rpk transform deploy`.
. Produce messages into the input topic. For example:
+
[source,bash]
----
echo '{"key":"alice@university.edu","value":"test message"}' | rpk topic produce logins
----
. Consume from the output topic. For example:
+
[source,bash]
----
rpk topic consume edu-filtered-domains --num 1
----
== Suggested reading
- link:https://docs.redpanda.com/current/reference/rpk/[Redpanda `rpk` CLI Reference^].
- link:https://docs.redpanda.com/current/develop/data-transforms/build/[Develop Data Transforms^].
- https://golang.org/ref/mod[Go Modules^] for managing dependencies and builds in Go.
- https://docs.docker.com/compose/[Docker Compose^] for customizing your environment.

View File

@@ -0,0 +1,5 @@
module regex
go 1.20
require github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0

View File

@@ -0,0 +1,2 @@
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 h1:KxgHJZsHsrT3YX7DMpu/vJN4TZN3KFm1jzrCFLyOepA=
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0/go.mod h1:QGgiwwf/BIsD1b7EiyQ/Apzw+RLSpasRDdpOCiefQFQ=

View File

@@ -0,0 +1,122 @@
package main
// This data transform filters records based on a customizable regex pattern.
// If a record's key or value
// (determined by an environment variable) matches the specified regex,
// the record is forwarded to the output.
// Otherwise, it is dropped.
//
// Usage:
// 1. Provide the following environment variables in your Docker or configuration setup:
// - PATTERN : (required) a regular expression that determines what you want to match.
// - MATCH_VALUE : (optional) a boolean to decide whether to check the record value. If false,
// the record key is checked. Default is false.
//
// Example environment variables:
// PATTERN=".*\\.edu$"
// MATCH_VALUE="true"
//
// Logs:
// This transform logs information about each record and whether it matched.
// The logs appear in the _redpanda.transform_logs topic, so you can debug how your records are being processed.
//
// Build instructions:
// go mod tidy
// rpk transform build
//
// For more details on building transforms with the Redpanda SDK, see:
// https://docs.redpanda.com/current/develop/data-transforms
//
import (
"log"
"os"
"regexp"
"strings"
"github.com/redpanda-data/redpanda/src/transform-sdk/go/transform"
)
var (
re *regexp.Regexp
checkValue bool
)
func isTrueVar(v string) bool {
switch strings.ToLower(v) {
case "yes", "ok", "1", "true":
return true
default:
return false
}
}
// The main() function runs only once at startup. It performs all initialization steps:
// - Reads and compiles the regex pattern.
// - Determines whether to match on the key or value.
// - Registers the doRegexFilter() function to process records.
func main() {
// Set logging preferences, including timestamp and UTC time.
log.SetPrefix("[regex-transform] ")
log.SetFlags(log.Ldate | log.Ltime | log.LUTC | log.Lmicroseconds)
// Start logging the transformation process
log.Println("Starting transform...")
// Read the PATTERN environment variable to get the regex pattern.
pattern, ok := os.LookupEnv("PATTERN")
if !ok {
log.Fatal("Missing PATTERN environment variable")
}
// Log the regex pattern being used.
log.Printf("Using PATTERN: %q\n", pattern)
// Compile the regex pattern for later use.
re = regexp.MustCompile(pattern)
// Read the MATCH_VALUE environment variable to determine whether to check the record's value.
mk, ok := os.LookupEnv("MATCH_VALUE")
checkValue = ok && isTrueVar(mk)
log.Printf("MATCH_VALUE set to: %t\n", checkValue)
log.Println("Initialization complete, waiting for records...")
// Listen for records to be written, calling doRegexFilter() for each record.
transform.OnRecordWritten(doRegexFilter)
}
// The doRegexFilter() function executes each time a new record is written.
// It checks whether the record's key or value (based on MATCH_VALUE) matches the compiled regex.
// If it matches, the record is forwarded, if not, it's dropped.
func doRegexFilter(e transform.WriteEvent, w transform.RecordWriter) error {
// This stores the data to be checked (either the key or value).
var dataToCheck []byte
// Depending on the MATCH_VALUE environment variable, decide whether to check the record's key or value.
if checkValue {
// Use the value of the record if MATCH_VALUE is true.
dataToCheck = e.Record().Value
log.Printf("Checking record value: %s\n", string(dataToCheck))
} else {
// Use the key of the record if MATCH_VALUE is false.
dataToCheck = e.Record().Key
log.Printf("Checking record key: %s\n", string(dataToCheck))
}
// If there is no key or value to check, log and skip the record.
if dataToCheck == nil {
log.Println("Record has no key/value to check, skipping.")
return nil
}
// Check if the data matches the regex pattern.
pass := re.Match(dataToCheck)
if pass {
// If the record matches the pattern, log and write the record to the output topic.
log.Printf("Record matched pattern, passing through. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
return w.Write(e.Record())
} else {
// If the record does not match the pattern, log and drop the record.
log.Printf("Record did not match pattern, dropping. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
// Do not write the record if it doesn't match the pattern.
return nil
}
}

View File

@@ -0,0 +1,33 @@
# Transform metadata used by the rpk transform build command.
# This metadata file tells rpk:
# 1) The transforms display name, which also becomes the base for the .wasm file name.
# 2) A brief description of what it does.
# 3) Defaults for environment variables.
# 4) Input and output topics (if you want to define them here rather than in the deploy command).
# Human-readable name of the transform. rpk transform build uses this for the generated .wasm file.
name: regex
description: |
Filters the input topic to records that only match a regular expression.
Regular expressions are implemented using Go's regexp library, which uses the syntax of RE2.
See the RE2 wiki for allowed syntax: https://github.com/google/re2/wiki/Syntax
Environment variables:
- PATTERN: The regular expression that will match against records (required).
- MATCH_VALUE: By default, the regex matches keys, but if set to "true", the regex matches values.
# By default, no input topic is set here. (You can set it in your deploy command if preferred.)
input-topic: ""
# By default, no output topic is set here. (You can set it in your deploy command if preferred.)
output-topic: ""
# Indicates the specific TinyGo environment used to compile your transform.
language: tinygo-no-goroutines
env:
# The PATTERN variable must be provided at deploy time.
# Example: --var=PATTERN=".*@example.com"
PATTERN: '<required>'

View File

@@ -0,0 +1,24 @@
# This file configures `rpk` to connect to a remote Redpanda cluster running in the same local network as `rpk`.
# Configuration for connecting to the Kafka API of the Redpanda cluster.
kafka_api:
# SASL (Simple Authentication and Security Layer) settings for authentication.
sasl:
user: superuser # The username used for authentication
password: secretpassword # The password associated with the username
mechanism: scram-sha-256 # Authentication mechanism; SCRAM-SHA-256 provides secure password-based authentication
# List of Kafka brokers in the Redpanda cluster.
# These brokers ensure high availability and fault tolerance for Kafka-based communication.
brokers:
- 127.0.0.1:19092 # Broker 1: Accessible on localhost, port 19092
- 127.0.0.1:29092 # Broker 2: Accessible on localhost, port 29092
- 127.0.0.1:39092 # Broker 3: Accessible on localhost, port 39092
# Configuration for connecting to the Redpanda Admin API.
# The Admin API allows you to perform administrative tasks such as managing configurations, monitoring, and scaling.
admin_api:
# List of Admin API endpoints for managing the cluster.
addresses:
- 127.0.0.1:19644 # Admin API for Broker 1: Accessible on localhost, port 19644
- 127.0.0.1:29644 # Admin API for Broker 2: Accessible on localhost, port 29644
- 127.0.0.1:39644 # Admin API for Broker 3: Accessible on localhost, port 39644

View File

@@ -0,0 +1,37 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Transactions",
"type": "object",
"properties": {
"email": {
"type": "string",
"format": "email",
"description": "The email address of the user involved in the transaction."
},
"index": {
"type": "integer",
"description": "A numeric index associated with the transaction."
},
"price": {
"type": "string",
"pattern": "^XXX \\d+\\.\\d{6}$",
"description": "A string representing the price of the product, including a currency code followed by the amount."
},
"product_url": {
"type": "string",
"format": "uri",
"description": "A URL that points to the product involved in the transaction."
},
"timestamp": {
"type": "string",
"format": "date-time",
"description": "The timestamp of when the transaction occurred, formatted in ISO 8601."
},
"user_id": {
"type": "integer",
"description": "A numeric identifier for the user."
}
},
"required": ["email", "index", "price", "product_url", "timestamp", "user_id"],
"additionalProperties": false
}

View File

@@ -0,0 +1,46 @@
# Transactions Topic Documentation
This document provides an overview of the `transactions` topic in the Redpanda cluster. The topic is designed to capture autogenerated transaction events with various attributes.
## Schema Overview
Each message in the `transactions` topic adheres to the following JSON schema:
```json
{
"email": "string",
"index": "integer",
"price": "string",
"product_url": "string",
"timestamp": "string",
"user_id": "integer"
}
```
- **email**: The email address of the user involved in the transaction.
- **index**: A numeric index associated with the transaction. This could represent the position or order of the transaction in a sequence.
- **price**: A string representing the price of the product. It includes a currency code (e.g., "XXX") followed by the amount.
- **product_url**: A URL that points to the product involved in the transaction.
- **timestamp**: The timestamp of when the transaction occurred, formatted in ISO 8601.
- **user_id**: A numeric identifier for the user. This is typically a unique ID assigned to each user in the system.
## Example message
```json
{
"email": "wzieme@ykczius.edu",
"index": 0,
"price": "XXX 5651308.100000",
"product_url": "http://yjomdta.top/DxvGsCn.php",
"timestamp": "2024-08-16T15:51:19.799474084Z",
"user_id": 1
}
```
## Use cases
You can use the `transactions` topic for various purposes, including:
- **Analytics**: Tracking and analyzing user transactions to understand buying behavior, popular products, etc.
- **Monitoring**: Observing transaction patterns to detect anomalies, such as unusual spikes or drops in transaction volume.
- **Data Processing**: Feeding transaction data into other systems, such as a data warehouse or real-time processing pipelines, for further processing and analysis.

View File

@@ -0,0 +1,73 @@
= Modify the Wasm Transform in the Quickstart
This directory contains the Go source code (`transform.go`) for the data transform that is used in the Redpanda Self-Managed quickstart.
If you're following the quickstart, you *do not* need to modify or rebuild this code. The Docker Compose configuration automatically deploys a pre-built transform called `regex.wasm`.
However, if you want to customize the data transform logic, continue reading.
== Why customize the transform?
- **Custom filtering**: Filter by a different regex or apply multiple conditions.
- **Data manipulation**: Transform records before writing them out. For example, redacting sensitive data or combining fields.
- **Extended functionality**: Add advanced logging, error handling, or multi-topic routing.
== Prerequisites
You need the following:
- At least Go 1.20 installed.
+
[source,bash]
----
go version
----
- The Redpanda CLI (`rpk`) installed.
- A running Redpanda cluster. If you're using the local quickstart with Docker Compose, ensure the cluster is up and running. Or, point `rpk` to another Redpanda environment.
== Modify and deploy your transform
. Open link:transform.go[transform.go] and make your changes. For example:
+
--
- Change the regex logic to handle different use cases.
- Add environment variables to control new features.
- Extend the `doRegexFilter()` function to manipulate records.
--
. Compile your Go code into a `.wasm` file:
+
[source,bash]
----
rpk transform build
----
+
This command compiles your Go source and produces a `.wasm` file that you can deploy to Redpanda.
. Deploy the new transform.
+
If your Docker Compose setup already has a service to deploy the transform, you can restart that service.
+
Otherwise, you can deploy your updated `.wasm` manually using `rpk transform deploy`.
. Produce messages into the input topic. For example:
+
[source,bash]
----
echo '{"key":"alice@university.edu","value":"test message"}' | rpk topic produce logins
----
. Consume from the output topic. For example:
+
[source,bash]
----
rpk topic consume edu-filtered-domains --num 1
----
== Suggested reading
- link:https://docs.redpanda.com/current/reference/rpk/[Redpanda `rpk` CLI Reference^].
- link:https://docs.redpanda.com/current/develop/data-transforms/build/[Develop Data Transforms^].
- https://golang.org/ref/mod[Go Modules^] for managing dependencies and builds in Go.
- https://docs.docker.com/compose/[Docker Compose^] for customizing your environment.

View File

@@ -0,0 +1,5 @@
module regex
go 1.20
require github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0

View File

@@ -0,0 +1,2 @@
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 h1:KxgHJZsHsrT3YX7DMpu/vJN4TZN3KFm1jzrCFLyOepA=
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0/go.mod h1:QGgiwwf/BIsD1b7EiyQ/Apzw+RLSpasRDdpOCiefQFQ=

View File

@@ -0,0 +1,122 @@
package main
// This data transform filters records based on a customizable regex pattern.
// If a record's key or value
// (determined by an environment variable) matches the specified regex,
// the record is forwarded to the output.
// Otherwise, it is dropped.
//
// Usage:
// 1. Provide the following environment variables in your Docker or configuration setup:
// - PATTERN : (required) a regular expression that determines what you want to match.
// - MATCH_VALUE : (optional) a boolean to decide whether to check the record value. If false,
// the record key is checked. Default is false.
//
// Example environment variables:
// PATTERN=".*\\.edu$"
// MATCH_VALUE="true"
//
// Logs:
// This transform logs information about each record and whether it matched.
// The logs appear in the _redpanda.transform_logs topic, so you can debug how your records are being processed.
//
// Build instructions:
// go mod tidy
// rpk transform build
//
// For more details on building transforms with the Redpanda SDK, see:
// https://docs.redpanda.com/current/develop/data-transforms
//
import (
"log"
"os"
"regexp"
"strings"
"github.com/redpanda-data/redpanda/src/transform-sdk/go/transform"
)
var (
re *regexp.Regexp
checkValue bool
)
func isTrueVar(v string) bool {
switch strings.ToLower(v) {
case "yes", "ok", "1", "true":
return true
default:
return false
}
}
// The main() function runs only once at startup. It performs all initialization steps:
// - Reads and compiles the regex pattern.
// - Determines whether to match on the key or value.
// - Registers the doRegexFilter() function to process records.
func main() {
// Set logging preferences, including timestamp and UTC time.
log.SetPrefix("[regex-transform] ")
log.SetFlags(log.Ldate | log.Ltime | log.LUTC | log.Lmicroseconds)
// Start logging the transformation process
log.Println("Starting transform...")
// Read the PATTERN environment variable to get the regex pattern.
pattern, ok := os.LookupEnv("PATTERN")
if !ok {
log.Fatal("Missing PATTERN environment variable")
}
// Log the regex pattern being used.
log.Printf("Using PATTERN: %q\n", pattern)
// Compile the regex pattern for later use.
re = regexp.MustCompile(pattern)
// Read the MATCH_VALUE environment variable to determine whether to check the record's value.
mk, ok := os.LookupEnv("MATCH_VALUE")
checkValue = ok && isTrueVar(mk)
log.Printf("MATCH_VALUE set to: %t\n", checkValue)
log.Println("Initialization complete, waiting for records...")
// Listen for records to be written, calling doRegexFilter() for each record.
transform.OnRecordWritten(doRegexFilter)
}
// The doRegexFilter() function executes each time a new record is written.
// It checks whether the record's key or value (based on MATCH_VALUE) matches the compiled regex.
// If it matches, the record is forwarded, if not, it's dropped.
func doRegexFilter(e transform.WriteEvent, w transform.RecordWriter) error {
// This stores the data to be checked (either the key or value).
var dataToCheck []byte
// Depending on the MATCH_VALUE environment variable, decide whether to check the record's key or value.
if checkValue {
// Use the value of the record if MATCH_VALUE is true.
dataToCheck = e.Record().Value
log.Printf("Checking record value: %s\n", string(dataToCheck))
} else {
// Use the key of the record if MATCH_VALUE is false.
dataToCheck = e.Record().Key
log.Printf("Checking record key: %s\n", string(dataToCheck))
}
// If there is no key or value to check, log and skip the record.
if dataToCheck == nil {
log.Println("Record has no key/value to check, skipping.")
return nil
}
// Check if the data matches the regex pattern.
pass := re.Match(dataToCheck)
if pass {
// If the record matches the pattern, log and write the record to the output topic.
log.Printf("Record matched pattern, passing through. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
return w.Write(e.Record())
} else {
// If the record does not match the pattern, log and drop the record.
log.Printf("Record did not match pattern, dropping. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
// Do not write the record if it doesn't match the pattern.
return nil
}
}

View File

@@ -0,0 +1,33 @@
# Transform metadata used by the rpk transform build command.
# This metadata file tells rpk:
# 1) The transforms display name, which also becomes the base for the .wasm file name.
# 2) A brief description of what it does.
# 3) Defaults for environment variables.
# 4) Input and output topics (if you want to define them here rather than in the deploy command).
# Human-readable name of the transform. rpk transform build uses this for the generated .wasm file.
name: regex
description: |
Filters the input topic to records that only match a regular expression.
Regular expressions are implemented using Go's regexp library, which uses the syntax of RE2.
See the RE2 wiki for allowed syntax: https://github.com/google/re2/wiki/Syntax
Environment variables:
- PATTERN: The regular expression that will match against records (required).
- MATCH_VALUE: By default, the regex matches keys, but if set to "true", the regex matches values.
# By default, no input topic is set here. (You can set it in your deploy command if preferred.)
input-topic: ""
# By default, no output topic is set here. (You can set it in your deploy command if preferred.)
output-topic: ""
# Indicates the specific TinyGo environment used to compile your transform.
language: tinygo-no-goroutines
env:
# The PATTERN variable must be provided at deploy time.
# Example: --var=PATTERN=".*@example.com"
PATTERN: '<required>'

View File

@@ -0,0 +1,24 @@
# This file configures `rpk` to connect to a remote Redpanda cluster running in the same local network as `rpk`.
# Configuration for connecting to the Kafka API of the Redpanda cluster.
kafka_api:
# SASL (Simple Authentication and Security Layer) settings for authentication.
sasl:
user: superuser # The username used for authentication
password: secretpassword # The password associated with the username
mechanism: scram-sha-256 # Authentication mechanism; SCRAM-SHA-256 provides secure password-based authentication
# List of Kafka brokers in the Redpanda cluster.
# These brokers ensure high availability and fault tolerance for Kafka-based communication.
brokers:
- 127.0.0.1:19092 # Broker 1: Accessible on localhost, port 19092
- 127.0.0.1:29092 # Broker 2: Accessible on localhost, port 29092
- 127.0.0.1:39092 # Broker 3: Accessible on localhost, port 39092
# Configuration for connecting to the Redpanda Admin API.
# The Admin API allows you to perform administrative tasks such as managing configurations, monitoring, and scaling.
admin_api:
# List of Admin API endpoints for managing the cluster.
addresses:
- 127.0.0.1:19644 # Admin API for Broker 1: Accessible on localhost, port 19644
- 127.0.0.1:29644 # Admin API for Broker 2: Accessible on localhost, port 29644
- 127.0.0.1:39644 # Admin API for Broker 3: Accessible on localhost, port 39644

View File

@@ -0,0 +1,37 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Transactions",
"type": "object",
"properties": {
"email": {
"type": "string",
"format": "email",
"description": "The email address of the user involved in the transaction."
},
"index": {
"type": "integer",
"description": "A numeric index associated with the transaction."
},
"price": {
"type": "string",
"pattern": "^XXX \\d+\\.\\d{6}$",
"description": "A string representing the price of the product, including a currency code followed by the amount."
},
"product_url": {
"type": "string",
"format": "uri",
"description": "A URL that points to the product involved in the transaction."
},
"timestamp": {
"type": "string",
"format": "date-time",
"description": "The timestamp of when the transaction occurred, formatted in ISO 8601."
},
"user_id": {
"type": "integer",
"description": "A numeric identifier for the user."
}
},
"required": ["email", "index", "price", "product_url", "timestamp", "user_id"],
"additionalProperties": false
}

View File

@@ -0,0 +1,46 @@
# Transactions Topic Documentation
This document provides an overview of the `transactions` topic in the Redpanda cluster. The topic is designed to capture autogenerated transaction events with various attributes.
## Schema Overview
Each message in the `transactions` topic adheres to the following JSON schema:
```json
{
"email": "string",
"index": "integer",
"price": "string",
"product_url": "string",
"timestamp": "string",
"user_id": "integer"
}
```
- **email**: The email address of the user involved in the transaction.
- **index**: A numeric index associated with the transaction. This could represent the position or order of the transaction in a sequence.
- **price**: A string representing the price of the product. It includes a currency code (e.g., "XXX") followed by the amount.
- **product_url**: A URL that points to the product involved in the transaction.
- **timestamp**: The timestamp of when the transaction occurred, formatted in ISO 8601.
- **user_id**: A numeric identifier for the user. This is typically a unique ID assigned to each user in the system.
## Example message
```json
{
"email": "wzieme@ykczius.edu",
"index": 0,
"price": "XXX 5651308.100000",
"product_url": "http://yjomdta.top/DxvGsCn.php",
"timestamp": "2024-08-16T15:51:19.799474084Z",
"user_id": 1
}
```
## Use cases
You can use the `transactions` topic for various purposes, including:
- **Analytics**: Tracking and analyzing user transactions to understand buying behavior, popular products, etc.
- **Monitoring**: Observing transaction patterns to detect anomalies, such as unusual spikes or drops in transaction volume.
- **Data Processing**: Feeding transaction data into other systems, such as a data warehouse or real-time processing pipelines, for further processing and analysis.

View File

@@ -0,0 +1,73 @@
= Modify the Wasm Transform in the Quickstart
This directory contains the Go source code (`transform.go`) for the data transform that is used in the Redpanda Self-Managed quickstart.
If you're following the quickstart, you *do not* need to modify or rebuild this code. The Docker Compose configuration automatically deploys a pre-built transform called `regex.wasm`.
However, if you want to customize the data transform logic, continue reading.
== Why customize the transform?
- **Custom filtering**: Filter by a different regex or apply multiple conditions.
- **Data manipulation**: Transform records before writing them out. For example, redacting sensitive data or combining fields.
- **Extended functionality**: Add advanced logging, error handling, or multi-topic routing.
== Prerequisites
You need the following:
- At least Go 1.20 installed.
+
[source,bash]
----
go version
----
- The Redpanda CLI (`rpk`) installed.
- A running Redpanda cluster. If you're using the local quickstart with Docker Compose, ensure the cluster is up and running. Or, point `rpk` to another Redpanda environment.
== Modify and deploy your transform
. Open link:transform.go[transform.go] and make your changes. For example:
+
--
- Change the regex logic to handle different use cases.
- Add environment variables to control new features.
- Extend the `doRegexFilter()` function to manipulate records.
--
. Compile your Go code into a `.wasm` file:
+
[source,bash]
----
rpk transform build
----
+
This command compiles your Go source and produces a `.wasm` file that you can deploy to Redpanda.
. Deploy the new transform.
+
If your Docker Compose setup already has a service to deploy the transform, you can restart that service.
+
Otherwise, you can deploy your updated `.wasm` manually using `rpk transform deploy`.
. Produce messages into the input topic. For example:
+
[source,bash]
----
echo '{"key":"alice@university.edu","value":"test message"}' | rpk topic produce logins
----
. Consume from the output topic. For example:
+
[source,bash]
----
rpk topic consume edu-filtered-domains --num 1
----
== Suggested reading
- link:https://docs.redpanda.com/current/reference/rpk/[Redpanda `rpk` CLI Reference^].
- link:https://docs.redpanda.com/current/develop/data-transforms/build/[Develop Data Transforms^].
- https://golang.org/ref/mod[Go Modules^] for managing dependencies and builds in Go.
- https://docs.docker.com/compose/[Docker Compose^] for customizing your environment.

View File

@@ -0,0 +1,5 @@
module regex
go 1.20
require github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0

View File

@@ -0,0 +1,2 @@
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 h1:KxgHJZsHsrT3YX7DMpu/vJN4TZN3KFm1jzrCFLyOepA=
github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0/go.mod h1:QGgiwwf/BIsD1b7EiyQ/Apzw+RLSpasRDdpOCiefQFQ=

View File

@@ -0,0 +1,122 @@
package main
// This data transform filters records based on a customizable regex pattern.
// If a record's key or value
// (determined by an environment variable) matches the specified regex,
// the record is forwarded to the output.
// Otherwise, it is dropped.
//
// Usage:
// 1. Provide the following environment variables in your Docker or configuration setup:
// - PATTERN : (required) a regular expression that determines what you want to match.
// - MATCH_VALUE : (optional) a boolean to decide whether to check the record value. If false,
// the record key is checked. Default is false.
//
// Example environment variables:
// PATTERN=".*\\.edu$"
// MATCH_VALUE="true"
//
// Logs:
// This transform logs information about each record and whether it matched.
// The logs appear in the _redpanda.transform_logs topic, so you can debug how your records are being processed.
//
// Build instructions:
// go mod tidy
// rpk transform build
//
// For more details on building transforms with the Redpanda SDK, see:
// https://docs.redpanda.com/current/develop/data-transforms
//
import (
"log"
"os"
"regexp"
"strings"
"github.com/redpanda-data/redpanda/src/transform-sdk/go/transform"
)
var (
re *regexp.Regexp
checkValue bool
)
func isTrueVar(v string) bool {
switch strings.ToLower(v) {
case "yes", "ok", "1", "true":
return true
default:
return false
}
}
// The main() function runs only once at startup. It performs all initialization steps:
// - Reads and compiles the regex pattern.
// - Determines whether to match on the key or value.
// - Registers the doRegexFilter() function to process records.
func main() {
// Set logging preferences, including timestamp and UTC time.
log.SetPrefix("[regex-transform] ")
log.SetFlags(log.Ldate | log.Ltime | log.LUTC | log.Lmicroseconds)
// Start logging the transformation process
log.Println("Starting transform...")
// Read the PATTERN environment variable to get the regex pattern.
pattern, ok := os.LookupEnv("PATTERN")
if !ok {
log.Fatal("Missing PATTERN environment variable")
}
// Log the regex pattern being used.
log.Printf("Using PATTERN: %q\n", pattern)
// Compile the regex pattern for later use.
re = regexp.MustCompile(pattern)
// Read the MATCH_VALUE environment variable to determine whether to check the record's value.
mk, ok := os.LookupEnv("MATCH_VALUE")
checkValue = ok && isTrueVar(mk)
log.Printf("MATCH_VALUE set to: %t\n", checkValue)
log.Println("Initialization complete, waiting for records...")
// Listen for records to be written, calling doRegexFilter() for each record.
transform.OnRecordWritten(doRegexFilter)
}
// The doRegexFilter() function executes each time a new record is written.
// It checks whether the record's key or value (based on MATCH_VALUE) matches the compiled regex.
// If it matches, the record is forwarded, if not, it's dropped.
func doRegexFilter(e transform.WriteEvent, w transform.RecordWriter) error {
// This stores the data to be checked (either the key or value).
var dataToCheck []byte
// Depending on the MATCH_VALUE environment variable, decide whether to check the record's key or value.
if checkValue {
// Use the value of the record if MATCH_VALUE is true.
dataToCheck = e.Record().Value
log.Printf("Checking record value: %s\n", string(dataToCheck))
} else {
// Use the key of the record if MATCH_VALUE is false.
dataToCheck = e.Record().Key
log.Printf("Checking record key: %s\n", string(dataToCheck))
}
// If there is no key or value to check, log and skip the record.
if dataToCheck == nil {
log.Println("Record has no key/value to check, skipping.")
return nil
}
// Check if the data matches the regex pattern.
pass := re.Match(dataToCheck)
if pass {
// If the record matches the pattern, log and write the record to the output topic.
log.Printf("Record matched pattern, passing through. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
return w.Write(e.Record())
} else {
// If the record does not match the pattern, log and drop the record.
log.Printf("Record did not match pattern, dropping. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value))
// Do not write the record if it doesn't match the pattern.
return nil
}
}

View File

@@ -0,0 +1,33 @@
# Transform metadata used by the rpk transform build command.
# This metadata file tells rpk:
# 1) The transforms display name, which also becomes the base for the .wasm file name.
# 2) A brief description of what it does.
# 3) Defaults for environment variables.
# 4) Input and output topics (if you want to define them here rather than in the deploy command).
# Human-readable name of the transform. rpk transform build uses this for the generated .wasm file.
name: regex
description: |
Filters the input topic to records that only match a regular expression.
Regular expressions are implemented using Go's regexp library, which uses the syntax of RE2.
See the RE2 wiki for allowed syntax: https://github.com/google/re2/wiki/Syntax
Environment variables:
- PATTERN: The regular expression that will match against records (required).
- MATCH_VALUE: By default, the regex matches keys, but if set to "true", the regex matches values.
# By default, no input topic is set here. (You can set it in your deploy command if preferred.)
input-topic: ""
# By default, no output topic is set here. (You can set it in your deploy command if preferred.)
output-topic: ""
# Indicates the specific TinyGo environment used to compile your transform.
language: tinygo-no-goroutines
env:
# The PATTERN variable must be provided at deploy time.
# Example: --var=PATTERN=".*@example.com"
PATTERN: '<required>'