diff --git a/docker-compose/bootstrap.yml b/docker-compose/bootstrap.yml new file mode 100644 index 0000000..00f3003 --- /dev/null +++ b/docker-compose/bootstrap.yml @@ -0,0 +1,56 @@ +# ================================================================= +# This file defines initial cluster properties for a Redpanda cluster. +# Some of these settings are intended for quickstart development and evaluation +# and are not suitable for production environments. +# +# For more information on bootstrap files, see: +# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#configure-a-bootstrap-file +# ================================================================= + +# +# Enable SASL authentication for the Kafka and Admin APIs. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#admin_api_require_auth +admin_api_require_auth: true +# At least one superuser is required to be able to create other SASL users +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#superusers +superusers: + - superuser +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_sasl +enable_sasl: true +# Allow topics to be created on first access. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#auto_create_topics_enabled +auto_create_topics_enabled: true +# Enable data transforms. +# https://docs.redpanda.com/current/develop/data-transforms/how-transforms-work/ +data_transforms_enabled: true +# Enable audit logging (enterprise feature). +# https://docs.redpanda.com/current/manage/audit-logging/ +audit_enabled: true +# Enable Tiered Storage (enterprise feature). +# https://docs.redpanda.com/current/manage/tiered-storage/ +cloud_storage_enabled: true +cloud_storage_region: local +cloud_storage_access_key: minio +cloud_storage_secret_key: redpandaTieredStorage7 +cloud_storage_api_endpoint: minio +cloud_storage_api_endpoint_port: 9000 +cloud_storage_disable_tls: true +cloud_storage_bucket: redpanda +# Forces segments to be uploaded to Tiered Storage faster for the purposes of the quickstart +# https://docs.redpanda.com/current/reference/properties/object-storage-properties/#cloud_storage_segment_max_upload_interval_sec +cloud_storage_segment_max_upload_interval_sec: 60 +# Continuous Data Balancing (enterprise feature) continuously monitors your node and rack availability and disk usage. This enables self-healing clusters that dynamically balance partitions, ensuring smooth operations and optimal cluster performance. +# https://docs.redpanda.com/current/manage/cluster-maintenance/continuous-data-balancing/ +partition_autobalancing_mode: continuous +# Enable Redpanda to collect consumer group metrics. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_consumer_group_metrics +enable_consumer_group_metrics: + - "group" + - "partition" + - "consumer_lag" +# Lower the interval for the quickstart +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#consumer_group_lag_collection_interval_sec +consumer_group_lag_collection_interval_sec: 60 +# Enable Redpanda to collect host metrics. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_host_metrics +enable_host_metrics: true \ No newline at end of file diff --git a/docker-compose/docker-compose.yml b/docker-compose/docker-compose.yml new file mode 100644 index 0000000..9b636cb --- /dev/null +++ b/docker-compose/docker-compose.yml @@ -0,0 +1,403 @@ +name: redpanda-quickstart-multi-broker +networks: + redpanda_network: + driver: bridge +volumes: + redpanda-0: null + redpanda-1: null + redpanda-2: null + minio: null +services: + ################## + # Redpanda Brokers # + ################## + redpanda-0: + command: + - redpanda + - start + - --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:19092 + # Address the broker advertises to clients that connect to the Kafka API. + # Use the internal addresses to connect to the Redpanda brokers + # from inside the same Docker network. + # Use the external addresses to connect to the Redpanda brokers + # from outside the Docker network. + - --advertise-kafka-addr internal://redpanda-0:9092,external://localhost:19092 + - --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:18082 + # Address the broker advertises to clients that connect to the HTTP Proxy. + - --advertise-pandaproxy-addr internal://redpanda-0:8082,external://localhost:18082 + - --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:18081 + # Redpanda brokers use the RPC API to communicate with each other internally. + - --rpc-addr redpanda-0:33145 + - --advertise-rpc-addr redpanda-0:33145 + # Mode dev-container uses well-known configuration properties for development in containers. + - --mode dev-container + # Tells Seastar (the framework Redpanda uses under the hood) to use 1 core on the system. + - --smp 1 + - --default-log-level=info + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + container_name: redpanda-0 + # Sets the username and password of the bootstrap SCRAM superuser + # See https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#bootstrap-a-user-account + environment: + RP_BOOTSTRAP_USER: "superuser:secretpassword" + volumes: + - redpanda-0:/var/lib/redpanda/data + - ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml + networks: + - redpanda_network + ports: + - 18081:18081 + - 18082:18082 + - 19092:19092 + - 19644:9644 + healthcheck: + test: ["CMD", "rpk", "cluster", "info", "-X", "user=superuser", "-X", "pass=secretpassword"] + interval: 10s + timeout: 15s + retries: 10 + depends_on: + minio: + condition: service_healthy + redpanda-1: + command: + - redpanda + - start + - --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:29092 + - --advertise-kafka-addr internal://redpanda-1:9092,external://localhost:29092 + - --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:28082 + - --advertise-pandaproxy-addr internal://redpanda-1:8082,external://localhost:28082 + - --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:28081 + - --rpc-addr redpanda-1:33145 + - --advertise-rpc-addr redpanda-1:33145 + - --mode dev-container + - --smp 1 + - --default-log-level=info + - --seeds redpanda-0:33145 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + container_name: redpanda-1 + environment: + RP_BOOTSTRAP_USER: "superuser:secretpassword" + volumes: + - redpanda-1:/var/lib/redpanda/data + - ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml + networks: + - redpanda_network + ports: + - 28081:28081 + - 28082:28082 + - 29092:29092 + - 29644:9644 + depends_on: + - redpanda-0 + - minio + redpanda-2: + command: + - redpanda + - start + - --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:39092 + - --advertise-kafka-addr internal://redpanda-2:9092,external://localhost:39092 + - --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:38082 + - --advertise-pandaproxy-addr internal://redpanda-2:8082,external://localhost:38082 + - --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:38081 + - --rpc-addr redpanda-2:33145 + - --advertise-rpc-addr redpanda-2:33145 + - --mode dev-container + - --smp 1 + - --default-log-level=info + - --seeds redpanda-0:33145 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + container_name: redpanda-2 + environment: + RP_BOOTSTRAP_USER: "superuser:secretpassword" + volumes: + - redpanda-2:/var/lib/redpanda/data + - ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml + networks: + - redpanda_network + ports: + - 38081:38081 + - 38082:38082 + - 39092:39092 + - 39644:9644 + depends_on: + - redpanda-0 + - minio + #################### + # Redpanda Console # + #################### + console: + container_name: redpanda-console + image: docker.redpanda.com/redpandadata/console:v3.2.2 + networks: + - redpanda_network + entrypoint: /bin/sh + command: -c 'echo "$$CONSOLE_CONFIG_FILE" > /tmp/config.yml && /app/console' + volumes: + - ./config:/tmp/config/ + environment: + CONFIG_FILEPATH: ${CONFIG_FILEPATH:-/tmp/config.yml} + CONSOLE_CONFIG_FILE: | + # Configure a connection to the Redpanda cluster + # See https://docs.redpanda.com/current/console/config/connect-to-redpanda/ + kafka: + brokers: ["redpanda-0:9092"] + sasl: + enabled: true + impersonateUser: true + schemaRegistry: + enabled: true + urls: ["http://redpanda-0:8081","http://redpanda-1:8081","http://redpanda-2:8081"] + authentication: + impersonateUser: true + redpanda: + adminApi: + enabled: true + urls: ["http://redpanda-0:9644","http://redpanda-1:9644","http://redpanda-2:9644"] + authentication: + basic: + username: superuser + password: secretpassword + impersonateUser: false + console: + # Configures Redpanda Console to fetch topic documentation from GitHub and display it in the UI. + # See https://docs.redpanda.com/current/console/config/topic-documentation/ + topicDocumentation: + enabled: true + git: + enabled: true + repository: + url: https://github.com/redpanda-data/docs + branch: main + baseDirectory: tests/docker-compose + authentication: + jwtSigningKey: vazxnT+ZHtxKslK6QlDGovcYnSjTk/lKMmZ+mHrBVE+YdVDkLgSuP6AszAKe9Gvq + basic: + enabled: true + authorization: + roleBindings: + - roleName: admin + users: + - loginType: basic + name: superuser + ports: + - 8080:8080 + depends_on: + redpanda-0: + condition: service_healthy + createtopic: + condition: service_completed_successfully + registerschema: + condition: service_completed_successfully + deploytransform: + condition: service_completed_successfully + #################### + # Redpanda Connect # + #################### + connect: + container_name: redpanda-connect + image: docker.redpanda.com/redpandadata/connect + networks: + - redpanda_network + entrypoint: /bin/sh + depends_on: + redpanda-0: + condition: service_healthy + command: -c 'echo "$$CONNECT_CFG_FILE" > /tmp/connect.yml; /redpanda-connect -c /tmp/connect.yml' + environment: + # This Redpanda Connect configuration creates fake data, + # processes it, and writes the output to a set of topics. + # + # Input: + # - Uses Redpanda Connect's generate input to generate fake data. + # See https://docs.redpanda.com/redpanda-connect/components/inputs/generate/ + # Pipeline: + # - Bloblang mapping to batch each input and map 1 message to 'logins' + # topic, and a random number (1-3) of messages to 'transaction' topic + # - Unarchive processor to parse the JSON array and extract each + # element into its own message. + # See https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/ + # Output: + # - kafka_franz output to write the messages to the Redpanda brokers. + # See https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/ + CONNECT_CFG_FILE: | + input: + generate: + interval: 1s + mapping: | + let first_name = fake("first_name") + let last_name = fake("last_name") + + root.user_id = counter() + root.name = $$first_name + " " + $$last_name + root.email = ($$first_name.slice(0,1) + $$last_name + "@" + fake("domain_name")).lowercase() + root.ip = fake("ipv4") + root.login_time = now() + pipeline: + processors: + - mapping: | + root = range(0, random_int(min:2, max:4)).map_each(cust -> this) + - unarchive: + format: "json_array" + - mapping: | + if batch_index() == 0 { + meta topic = "logins" + root = this + } else { + meta topic = "transactions" + root.user_id = this.user_id + root.email = this.email + root.index = batch_index() - 1 + root.product_url = fake("url") + root.price = fake("amount_with_currency") + root.timestamp = now() + } + output: + kafka_franz: + seed_brokers: [ "redpanda-0:9092" ] + topic: $${! metadata("topic") } + sasl: + - mechanism: SCRAM-SHA-256 + password: secretpassword + username: superuser + #################### + # rpk container to create the edu-filtered-domains topic # + # See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-create/ + #################### + createtopic: + command: + - topic + - create + - edu-filtered-domains + - -X user=superuser + - -X pass=secretpassword + - -X brokers=redpanda-0:9092 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + networks: + - redpanda_network + depends_on: + redpanda-0: + condition: service_healthy + #################### + # rpk container to register the schema # + # See https://docs.redpanda.com/current/manage/schema-reg/schema-reg-api/ + #################### + registerschema: + command: + - registry + - schema + - create + - transactions + - --schema + - /etc/redpanda/transactions-schema.json + - -X user=superuser + - -X pass=secretpassword + - -X registry.hosts=redpanda-0:8081 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + # Mount the local directory that contains your schema to the container. + volumes: + - ./transactions-schema.json:/etc/redpanda/transactions-schema.json + networks: + - redpanda_network + depends_on: + redpanda-0: + condition: service_healthy + #################### + # rpk container to deploy a consumer group # + # See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-consume/ + #################### + consumergroup: + command: + - topic + - consume + - transactions + - --group + - transactions-consumer + - -X user=superuser + - -X pass=secretpassword + - -X brokers=redpanda-0:9092 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + networks: + - redpanda_network + depends_on: + createtopic: + condition: service_completed_successfully + deploytransform: + condition: service_completed_successfully + #################### + # rpk container to deploy the pre-built data transform # + # See https://docs.redpanda.com/current/develop/data-transforms/deploy/ + #################### + deploytransform: + command: + - transform + - deploy + - --file=/etc/redpanda/regex.wasm + - --name=regex + - --input-topic=logins + - --output-topic=edu-filtered-domains + - --var=PATTERN="[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.edu" + - --var=MATCH_VALUE=true + - -X user=superuser + - -X pass=secretpassword + - -X admin.hosts=redpanda-0:9644 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + volumes: + - ./transform/regex.wasm:/etc/redpanda/regex.wasm + networks: + - redpanda_network + depends_on: + createtopic: + condition: service_completed_successfully + #################### + # MinIO for Tiered Storage # + # See https://min.io/ + # + # NOTE: MinIO is included in this quickstart for development and evaluation purposes only. + # It is not supported for production deployments of Redpanda. + # + # For production environments, use one of the supported object storage providers: + # https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/requirements/#object-storage-providers-for-tiered-storage + #################### + minio: + container_name: minio + image: minio/minio:RELEASE.2025-05-24T17-08-30Z + command: server --console-address ":9001" /data + ports: + - 9000:9000 + - 9001:9001 + environment: + MINIO_ROOT_USER: minio + MINIO_ROOT_PASSWORD: redpandaTieredStorage7 + MINIO_SERVER_URL: "http://minio:9000" + MINIO_REGION_NAME: local + MINIO_DOMAIN: minio + volumes: + - minio:/data + networks: + redpanda_network: + aliases: + - redpanda.minio + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/ready"] + interval: 10s + timeout: 5s + retries: 3 + mc: + depends_on: + minio: + condition: service_healthy + image: minio/mc:RELEASE.2025-05-21T01-59-54Z + container_name: mc + networks: + - redpanda_network + environment: + - AWS_ACCESS_KEY_ID=minio + - AWS_SECRET_ACCESS_KEY=redpandaTieredStorage7 + - AWS_REGION=local + entrypoint: > + /bin/sh -c " + until (/usr/bin/mc alias set minio http://minio:9000 minio redpandaTieredStorage7) do echo '...waiting...' && sleep 1; done; + /usr/bin/mc mb minio/redpanda; + /usr/bin/mc policy set public minio/redpanda; + tail -f /dev/null + " diff --git a/docker-compose/generate-profiles.yaml b/docker-compose/generate-profiles.yaml new file mode 100644 index 0000000..baf9a6f --- /dev/null +++ b/docker-compose/generate-profiles.yaml @@ -0,0 +1,77 @@ +input: + # Use the 'generate' input + # https://docs.redpanda.com/redpanda-connect/components/inputs/generate/ + generate: + # The interval at which new records are generated. + interval: 1s + # The mapping section defines how each generated record is structured. + # The language used here is called Bloblang. + # https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/ + mapping: | + # Generate a fake first name using the 'first_name' faker function. + let first_name = fake("first_name") + + # Generate a fake last name using the 'last_name' faker function. + let last_name = fake("last_name") + + # Define possible subscription levels for users. + let subscription_levels = ["Free", "Basic", "Premium"] + + # Define possible notification channels for user preferences. + let notifications = ["email", "sms", "push" ] + + # Define supported languages for user preferences. + let languages = ["en", "es", "fr", "de", "zh", "jp"] + + # Assign a unique user ID using a UUID digit generator. + root.user_id = fake("uuid_digit") + + # Assign the generated first name to the 'first_name' field. + root.first_name = $first_name + + # Assign the generated last name to the 'last_name' field. + root.last_name = $last_name + + # Construct the user's email by combining the first initial, last name, and a fake domain name. + # The email is converted to lowercase for consistency. + root.email = ($first_name.slice(0,1) + $last_name + "@" + fake("domain_name")).lowercase() + + # Assign a fake registration date using the 'date' faker function. + root.registration_date = fake("date") + + # Assign the current timestamp as the last login time. + root.last_login = now() + + # Randomly assign a subscription level by selecting an index from the 'subscription_levels' array. + root.subscription_level = $subscription_levels.index(random_int(min: 0, max: 2)) + + # Randomly assign a language preference by selecting an index from the 'languages' array. + root.preferences.language = $languages.index(random_int(min: 0, max: 5)) + + # Randomly assign a notification preference by selecting an index from the 'notifications' array. + root.preferences.notifications = $notifications.index(random_int(min: 0, max: 2)) +pipeline: + processors: + - mapping: | + # Set the target topic for the generated records to 'profiles'. + meta topic = "profiles" + + # Assign the entire record (root) to be sent to the specified topic. + root = this +output: + # Use the 'kafka_franz' output to send the result back to Redpanda + # https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/ + kafka_franz: + # Define the list of seed brokers for the Kafka cluster. + seed_brokers: [ "localhost:19092", "localhost:29092", "localhost:39092"] + # Dynamically assign the topic based on the metadata specified in the processors. + # In this case, it resolves to the 'profiles' topic. + topic: ${! metadata("topic") } + # Configure SASL authentication to securely connect to the Kafka brokers. + sasl: + - # Specify the SASL mechanism to use for authentication. + mechanism: SCRAM-SHA-256 + # The password for the SASL authentication. + password: secretpassword + # The username for the SASL authentication. + username: superuser diff --git a/docker-compose/redpanda-quickstart/docker-compose/bootstrap.yml b/docker-compose/redpanda-quickstart/docker-compose/bootstrap.yml new file mode 100644 index 0000000..00f3003 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/bootstrap.yml @@ -0,0 +1,56 @@ +# ================================================================= +# This file defines initial cluster properties for a Redpanda cluster. +# Some of these settings are intended for quickstart development and evaluation +# and are not suitable for production environments. +# +# For more information on bootstrap files, see: +# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#configure-a-bootstrap-file +# ================================================================= + +# +# Enable SASL authentication for the Kafka and Admin APIs. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#admin_api_require_auth +admin_api_require_auth: true +# At least one superuser is required to be able to create other SASL users +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#superusers +superusers: + - superuser +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_sasl +enable_sasl: true +# Allow topics to be created on first access. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#auto_create_topics_enabled +auto_create_topics_enabled: true +# Enable data transforms. +# https://docs.redpanda.com/current/develop/data-transforms/how-transforms-work/ +data_transforms_enabled: true +# Enable audit logging (enterprise feature). +# https://docs.redpanda.com/current/manage/audit-logging/ +audit_enabled: true +# Enable Tiered Storage (enterprise feature). +# https://docs.redpanda.com/current/manage/tiered-storage/ +cloud_storage_enabled: true +cloud_storage_region: local +cloud_storage_access_key: minio +cloud_storage_secret_key: redpandaTieredStorage7 +cloud_storage_api_endpoint: minio +cloud_storage_api_endpoint_port: 9000 +cloud_storage_disable_tls: true +cloud_storage_bucket: redpanda +# Forces segments to be uploaded to Tiered Storage faster for the purposes of the quickstart +# https://docs.redpanda.com/current/reference/properties/object-storage-properties/#cloud_storage_segment_max_upload_interval_sec +cloud_storage_segment_max_upload_interval_sec: 60 +# Continuous Data Balancing (enterprise feature) continuously monitors your node and rack availability and disk usage. This enables self-healing clusters that dynamically balance partitions, ensuring smooth operations and optimal cluster performance. +# https://docs.redpanda.com/current/manage/cluster-maintenance/continuous-data-balancing/ +partition_autobalancing_mode: continuous +# Enable Redpanda to collect consumer group metrics. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_consumer_group_metrics +enable_consumer_group_metrics: + - "group" + - "partition" + - "consumer_lag" +# Lower the interval for the quickstart +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#consumer_group_lag_collection_interval_sec +consumer_group_lag_collection_interval_sec: 60 +# Enable Redpanda to collect host metrics. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_host_metrics +enable_host_metrics: true \ No newline at end of file diff --git a/docker-compose/redpanda-quickstart/docker-compose/docker-compose.yml b/docker-compose/redpanda-quickstart/docker-compose/docker-compose.yml new file mode 100644 index 0000000..9b636cb --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/docker-compose.yml @@ -0,0 +1,403 @@ +name: redpanda-quickstart-multi-broker +networks: + redpanda_network: + driver: bridge +volumes: + redpanda-0: null + redpanda-1: null + redpanda-2: null + minio: null +services: + ################## + # Redpanda Brokers # + ################## + redpanda-0: + command: + - redpanda + - start + - --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:19092 + # Address the broker advertises to clients that connect to the Kafka API. + # Use the internal addresses to connect to the Redpanda brokers + # from inside the same Docker network. + # Use the external addresses to connect to the Redpanda brokers + # from outside the Docker network. + - --advertise-kafka-addr internal://redpanda-0:9092,external://localhost:19092 + - --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:18082 + # Address the broker advertises to clients that connect to the HTTP Proxy. + - --advertise-pandaproxy-addr internal://redpanda-0:8082,external://localhost:18082 + - --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:18081 + # Redpanda brokers use the RPC API to communicate with each other internally. + - --rpc-addr redpanda-0:33145 + - --advertise-rpc-addr redpanda-0:33145 + # Mode dev-container uses well-known configuration properties for development in containers. + - --mode dev-container + # Tells Seastar (the framework Redpanda uses under the hood) to use 1 core on the system. + - --smp 1 + - --default-log-level=info + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + container_name: redpanda-0 + # Sets the username and password of the bootstrap SCRAM superuser + # See https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#bootstrap-a-user-account + environment: + RP_BOOTSTRAP_USER: "superuser:secretpassword" + volumes: + - redpanda-0:/var/lib/redpanda/data + - ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml + networks: + - redpanda_network + ports: + - 18081:18081 + - 18082:18082 + - 19092:19092 + - 19644:9644 + healthcheck: + test: ["CMD", "rpk", "cluster", "info", "-X", "user=superuser", "-X", "pass=secretpassword"] + interval: 10s + timeout: 15s + retries: 10 + depends_on: + minio: + condition: service_healthy + redpanda-1: + command: + - redpanda + - start + - --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:29092 + - --advertise-kafka-addr internal://redpanda-1:9092,external://localhost:29092 + - --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:28082 + - --advertise-pandaproxy-addr internal://redpanda-1:8082,external://localhost:28082 + - --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:28081 + - --rpc-addr redpanda-1:33145 + - --advertise-rpc-addr redpanda-1:33145 + - --mode dev-container + - --smp 1 + - --default-log-level=info + - --seeds redpanda-0:33145 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + container_name: redpanda-1 + environment: + RP_BOOTSTRAP_USER: "superuser:secretpassword" + volumes: + - redpanda-1:/var/lib/redpanda/data + - ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml + networks: + - redpanda_network + ports: + - 28081:28081 + - 28082:28082 + - 29092:29092 + - 29644:9644 + depends_on: + - redpanda-0 + - minio + redpanda-2: + command: + - redpanda + - start + - --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:39092 + - --advertise-kafka-addr internal://redpanda-2:9092,external://localhost:39092 + - --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:38082 + - --advertise-pandaproxy-addr internal://redpanda-2:8082,external://localhost:38082 + - --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:38081 + - --rpc-addr redpanda-2:33145 + - --advertise-rpc-addr redpanda-2:33145 + - --mode dev-container + - --smp 1 + - --default-log-level=info + - --seeds redpanda-0:33145 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + container_name: redpanda-2 + environment: + RP_BOOTSTRAP_USER: "superuser:secretpassword" + volumes: + - redpanda-2:/var/lib/redpanda/data + - ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml + networks: + - redpanda_network + ports: + - 38081:38081 + - 38082:38082 + - 39092:39092 + - 39644:9644 + depends_on: + - redpanda-0 + - minio + #################### + # Redpanda Console # + #################### + console: + container_name: redpanda-console + image: docker.redpanda.com/redpandadata/console:v3.2.2 + networks: + - redpanda_network + entrypoint: /bin/sh + command: -c 'echo "$$CONSOLE_CONFIG_FILE" > /tmp/config.yml && /app/console' + volumes: + - ./config:/tmp/config/ + environment: + CONFIG_FILEPATH: ${CONFIG_FILEPATH:-/tmp/config.yml} + CONSOLE_CONFIG_FILE: | + # Configure a connection to the Redpanda cluster + # See https://docs.redpanda.com/current/console/config/connect-to-redpanda/ + kafka: + brokers: ["redpanda-0:9092"] + sasl: + enabled: true + impersonateUser: true + schemaRegistry: + enabled: true + urls: ["http://redpanda-0:8081","http://redpanda-1:8081","http://redpanda-2:8081"] + authentication: + impersonateUser: true + redpanda: + adminApi: + enabled: true + urls: ["http://redpanda-0:9644","http://redpanda-1:9644","http://redpanda-2:9644"] + authentication: + basic: + username: superuser + password: secretpassword + impersonateUser: false + console: + # Configures Redpanda Console to fetch topic documentation from GitHub and display it in the UI. + # See https://docs.redpanda.com/current/console/config/topic-documentation/ + topicDocumentation: + enabled: true + git: + enabled: true + repository: + url: https://github.com/redpanda-data/docs + branch: main + baseDirectory: tests/docker-compose + authentication: + jwtSigningKey: vazxnT+ZHtxKslK6QlDGovcYnSjTk/lKMmZ+mHrBVE+YdVDkLgSuP6AszAKe9Gvq + basic: + enabled: true + authorization: + roleBindings: + - roleName: admin + users: + - loginType: basic + name: superuser + ports: + - 8080:8080 + depends_on: + redpanda-0: + condition: service_healthy + createtopic: + condition: service_completed_successfully + registerschema: + condition: service_completed_successfully + deploytransform: + condition: service_completed_successfully + #################### + # Redpanda Connect # + #################### + connect: + container_name: redpanda-connect + image: docker.redpanda.com/redpandadata/connect + networks: + - redpanda_network + entrypoint: /bin/sh + depends_on: + redpanda-0: + condition: service_healthy + command: -c 'echo "$$CONNECT_CFG_FILE" > /tmp/connect.yml; /redpanda-connect -c /tmp/connect.yml' + environment: + # This Redpanda Connect configuration creates fake data, + # processes it, and writes the output to a set of topics. + # + # Input: + # - Uses Redpanda Connect's generate input to generate fake data. + # See https://docs.redpanda.com/redpanda-connect/components/inputs/generate/ + # Pipeline: + # - Bloblang mapping to batch each input and map 1 message to 'logins' + # topic, and a random number (1-3) of messages to 'transaction' topic + # - Unarchive processor to parse the JSON array and extract each + # element into its own message. + # See https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/ + # Output: + # - kafka_franz output to write the messages to the Redpanda brokers. + # See https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/ + CONNECT_CFG_FILE: | + input: + generate: + interval: 1s + mapping: | + let first_name = fake("first_name") + let last_name = fake("last_name") + + root.user_id = counter() + root.name = $$first_name + " " + $$last_name + root.email = ($$first_name.slice(0,1) + $$last_name + "@" + fake("domain_name")).lowercase() + root.ip = fake("ipv4") + root.login_time = now() + pipeline: + processors: + - mapping: | + root = range(0, random_int(min:2, max:4)).map_each(cust -> this) + - unarchive: + format: "json_array" + - mapping: | + if batch_index() == 0 { + meta topic = "logins" + root = this + } else { + meta topic = "transactions" + root.user_id = this.user_id + root.email = this.email + root.index = batch_index() - 1 + root.product_url = fake("url") + root.price = fake("amount_with_currency") + root.timestamp = now() + } + output: + kafka_franz: + seed_brokers: [ "redpanda-0:9092" ] + topic: $${! metadata("topic") } + sasl: + - mechanism: SCRAM-SHA-256 + password: secretpassword + username: superuser + #################### + # rpk container to create the edu-filtered-domains topic # + # See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-create/ + #################### + createtopic: + command: + - topic + - create + - edu-filtered-domains + - -X user=superuser + - -X pass=secretpassword + - -X brokers=redpanda-0:9092 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + networks: + - redpanda_network + depends_on: + redpanda-0: + condition: service_healthy + #################### + # rpk container to register the schema # + # See https://docs.redpanda.com/current/manage/schema-reg/schema-reg-api/ + #################### + registerschema: + command: + - registry + - schema + - create + - transactions + - --schema + - /etc/redpanda/transactions-schema.json + - -X user=superuser + - -X pass=secretpassword + - -X registry.hosts=redpanda-0:8081 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + # Mount the local directory that contains your schema to the container. + volumes: + - ./transactions-schema.json:/etc/redpanda/transactions-schema.json + networks: + - redpanda_network + depends_on: + redpanda-0: + condition: service_healthy + #################### + # rpk container to deploy a consumer group # + # See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-consume/ + #################### + consumergroup: + command: + - topic + - consume + - transactions + - --group + - transactions-consumer + - -X user=superuser + - -X pass=secretpassword + - -X brokers=redpanda-0:9092 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + networks: + - redpanda_network + depends_on: + createtopic: + condition: service_completed_successfully + deploytransform: + condition: service_completed_successfully + #################### + # rpk container to deploy the pre-built data transform # + # See https://docs.redpanda.com/current/develop/data-transforms/deploy/ + #################### + deploytransform: + command: + - transform + - deploy + - --file=/etc/redpanda/regex.wasm + - --name=regex + - --input-topic=logins + - --output-topic=edu-filtered-domains + - --var=PATTERN="[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.edu" + - --var=MATCH_VALUE=true + - -X user=superuser + - -X pass=secretpassword + - -X admin.hosts=redpanda-0:9644 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + volumes: + - ./transform/regex.wasm:/etc/redpanda/regex.wasm + networks: + - redpanda_network + depends_on: + createtopic: + condition: service_completed_successfully + #################### + # MinIO for Tiered Storage # + # See https://min.io/ + # + # NOTE: MinIO is included in this quickstart for development and evaluation purposes only. + # It is not supported for production deployments of Redpanda. + # + # For production environments, use one of the supported object storage providers: + # https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/requirements/#object-storage-providers-for-tiered-storage + #################### + minio: + container_name: minio + image: minio/minio:RELEASE.2025-05-24T17-08-30Z + command: server --console-address ":9001" /data + ports: + - 9000:9000 + - 9001:9001 + environment: + MINIO_ROOT_USER: minio + MINIO_ROOT_PASSWORD: redpandaTieredStorage7 + MINIO_SERVER_URL: "http://minio:9000" + MINIO_REGION_NAME: local + MINIO_DOMAIN: minio + volumes: + - minio:/data + networks: + redpanda_network: + aliases: + - redpanda.minio + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/ready"] + interval: 10s + timeout: 5s + retries: 3 + mc: + depends_on: + minio: + condition: service_healthy + image: minio/mc:RELEASE.2025-05-21T01-59-54Z + container_name: mc + networks: + - redpanda_network + environment: + - AWS_ACCESS_KEY_ID=minio + - AWS_SECRET_ACCESS_KEY=redpandaTieredStorage7 + - AWS_REGION=local + entrypoint: > + /bin/sh -c " + until (/usr/bin/mc alias set minio http://minio:9000 minio redpandaTieredStorage7) do echo '...waiting...' && sleep 1; done; + /usr/bin/mc mb minio/redpanda; + /usr/bin/mc policy set public minio/redpanda; + tail -f /dev/null + " diff --git a/docker-compose/redpanda-quickstart/docker-compose/generate-profiles.yaml b/docker-compose/redpanda-quickstart/docker-compose/generate-profiles.yaml new file mode 100644 index 0000000..baf9a6f --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/generate-profiles.yaml @@ -0,0 +1,77 @@ +input: + # Use the 'generate' input + # https://docs.redpanda.com/redpanda-connect/components/inputs/generate/ + generate: + # The interval at which new records are generated. + interval: 1s + # The mapping section defines how each generated record is structured. + # The language used here is called Bloblang. + # https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/ + mapping: | + # Generate a fake first name using the 'first_name' faker function. + let first_name = fake("first_name") + + # Generate a fake last name using the 'last_name' faker function. + let last_name = fake("last_name") + + # Define possible subscription levels for users. + let subscription_levels = ["Free", "Basic", "Premium"] + + # Define possible notification channels for user preferences. + let notifications = ["email", "sms", "push" ] + + # Define supported languages for user preferences. + let languages = ["en", "es", "fr", "de", "zh", "jp"] + + # Assign a unique user ID using a UUID digit generator. + root.user_id = fake("uuid_digit") + + # Assign the generated first name to the 'first_name' field. + root.first_name = $first_name + + # Assign the generated last name to the 'last_name' field. + root.last_name = $last_name + + # Construct the user's email by combining the first initial, last name, and a fake domain name. + # The email is converted to lowercase for consistency. + root.email = ($first_name.slice(0,1) + $last_name + "@" + fake("domain_name")).lowercase() + + # Assign a fake registration date using the 'date' faker function. + root.registration_date = fake("date") + + # Assign the current timestamp as the last login time. + root.last_login = now() + + # Randomly assign a subscription level by selecting an index from the 'subscription_levels' array. + root.subscription_level = $subscription_levels.index(random_int(min: 0, max: 2)) + + # Randomly assign a language preference by selecting an index from the 'languages' array. + root.preferences.language = $languages.index(random_int(min: 0, max: 5)) + + # Randomly assign a notification preference by selecting an index from the 'notifications' array. + root.preferences.notifications = $notifications.index(random_int(min: 0, max: 2)) +pipeline: + processors: + - mapping: | + # Set the target topic for the generated records to 'profiles'. + meta topic = "profiles" + + # Assign the entire record (root) to be sent to the specified topic. + root = this +output: + # Use the 'kafka_franz' output to send the result back to Redpanda + # https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/ + kafka_franz: + # Define the list of seed brokers for the Kafka cluster. + seed_brokers: [ "localhost:19092", "localhost:29092", "localhost:39092"] + # Dynamically assign the topic based on the metadata specified in the processors. + # In this case, it resolves to the 'profiles' topic. + topic: ${! metadata("topic") } + # Configure SASL authentication to securely connect to the Kafka brokers. + sasl: + - # Specify the SASL mechanism to use for authentication. + mechanism: SCRAM-SHA-256 + # The password for the SASL authentication. + password: secretpassword + # The username for the SASL authentication. + username: superuser diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/bootstrap.yml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/bootstrap.yml new file mode 100644 index 0000000..00f3003 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/bootstrap.yml @@ -0,0 +1,56 @@ +# ================================================================= +# This file defines initial cluster properties for a Redpanda cluster. +# Some of these settings are intended for quickstart development and evaluation +# and are not suitable for production environments. +# +# For more information on bootstrap files, see: +# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#configure-a-bootstrap-file +# ================================================================= + +# +# Enable SASL authentication for the Kafka and Admin APIs. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#admin_api_require_auth +admin_api_require_auth: true +# At least one superuser is required to be able to create other SASL users +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#superusers +superusers: + - superuser +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_sasl +enable_sasl: true +# Allow topics to be created on first access. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#auto_create_topics_enabled +auto_create_topics_enabled: true +# Enable data transforms. +# https://docs.redpanda.com/current/develop/data-transforms/how-transforms-work/ +data_transforms_enabled: true +# Enable audit logging (enterprise feature). +# https://docs.redpanda.com/current/manage/audit-logging/ +audit_enabled: true +# Enable Tiered Storage (enterprise feature). +# https://docs.redpanda.com/current/manage/tiered-storage/ +cloud_storage_enabled: true +cloud_storage_region: local +cloud_storage_access_key: minio +cloud_storage_secret_key: redpandaTieredStorage7 +cloud_storage_api_endpoint: minio +cloud_storage_api_endpoint_port: 9000 +cloud_storage_disable_tls: true +cloud_storage_bucket: redpanda +# Forces segments to be uploaded to Tiered Storage faster for the purposes of the quickstart +# https://docs.redpanda.com/current/reference/properties/object-storage-properties/#cloud_storage_segment_max_upload_interval_sec +cloud_storage_segment_max_upload_interval_sec: 60 +# Continuous Data Balancing (enterprise feature) continuously monitors your node and rack availability and disk usage. This enables self-healing clusters that dynamically balance partitions, ensuring smooth operations and optimal cluster performance. +# https://docs.redpanda.com/current/manage/cluster-maintenance/continuous-data-balancing/ +partition_autobalancing_mode: continuous +# Enable Redpanda to collect consumer group metrics. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_consumer_group_metrics +enable_consumer_group_metrics: + - "group" + - "partition" + - "consumer_lag" +# Lower the interval for the quickstart +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#consumer_group_lag_collection_interval_sec +consumer_group_lag_collection_interval_sec: 60 +# Enable Redpanda to collect host metrics. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_host_metrics +enable_host_metrics: true \ No newline at end of file diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/docker-compose.yml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/docker-compose.yml new file mode 100644 index 0000000..9b636cb --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/docker-compose.yml @@ -0,0 +1,403 @@ +name: redpanda-quickstart-multi-broker +networks: + redpanda_network: + driver: bridge +volumes: + redpanda-0: null + redpanda-1: null + redpanda-2: null + minio: null +services: + ################## + # Redpanda Brokers # + ################## + redpanda-0: + command: + - redpanda + - start + - --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:19092 + # Address the broker advertises to clients that connect to the Kafka API. + # Use the internal addresses to connect to the Redpanda brokers + # from inside the same Docker network. + # Use the external addresses to connect to the Redpanda brokers + # from outside the Docker network. + - --advertise-kafka-addr internal://redpanda-0:9092,external://localhost:19092 + - --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:18082 + # Address the broker advertises to clients that connect to the HTTP Proxy. + - --advertise-pandaproxy-addr internal://redpanda-0:8082,external://localhost:18082 + - --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:18081 + # Redpanda brokers use the RPC API to communicate with each other internally. + - --rpc-addr redpanda-0:33145 + - --advertise-rpc-addr redpanda-0:33145 + # Mode dev-container uses well-known configuration properties for development in containers. + - --mode dev-container + # Tells Seastar (the framework Redpanda uses under the hood) to use 1 core on the system. + - --smp 1 + - --default-log-level=info + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + container_name: redpanda-0 + # Sets the username and password of the bootstrap SCRAM superuser + # See https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#bootstrap-a-user-account + environment: + RP_BOOTSTRAP_USER: "superuser:secretpassword" + volumes: + - redpanda-0:/var/lib/redpanda/data + - ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml + networks: + - redpanda_network + ports: + - 18081:18081 + - 18082:18082 + - 19092:19092 + - 19644:9644 + healthcheck: + test: ["CMD", "rpk", "cluster", "info", "-X", "user=superuser", "-X", "pass=secretpassword"] + interval: 10s + timeout: 15s + retries: 10 + depends_on: + minio: + condition: service_healthy + redpanda-1: + command: + - redpanda + - start + - --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:29092 + - --advertise-kafka-addr internal://redpanda-1:9092,external://localhost:29092 + - --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:28082 + - --advertise-pandaproxy-addr internal://redpanda-1:8082,external://localhost:28082 + - --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:28081 + - --rpc-addr redpanda-1:33145 + - --advertise-rpc-addr redpanda-1:33145 + - --mode dev-container + - --smp 1 + - --default-log-level=info + - --seeds redpanda-0:33145 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + container_name: redpanda-1 + environment: + RP_BOOTSTRAP_USER: "superuser:secretpassword" + volumes: + - redpanda-1:/var/lib/redpanda/data + - ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml + networks: + - redpanda_network + ports: + - 28081:28081 + - 28082:28082 + - 29092:29092 + - 29644:9644 + depends_on: + - redpanda-0 + - minio + redpanda-2: + command: + - redpanda + - start + - --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:39092 + - --advertise-kafka-addr internal://redpanda-2:9092,external://localhost:39092 + - --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:38082 + - --advertise-pandaproxy-addr internal://redpanda-2:8082,external://localhost:38082 + - --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:38081 + - --rpc-addr redpanda-2:33145 + - --advertise-rpc-addr redpanda-2:33145 + - --mode dev-container + - --smp 1 + - --default-log-level=info + - --seeds redpanda-0:33145 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + container_name: redpanda-2 + environment: + RP_BOOTSTRAP_USER: "superuser:secretpassword" + volumes: + - redpanda-2:/var/lib/redpanda/data + - ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml + networks: + - redpanda_network + ports: + - 38081:38081 + - 38082:38082 + - 39092:39092 + - 39644:9644 + depends_on: + - redpanda-0 + - minio + #################### + # Redpanda Console # + #################### + console: + container_name: redpanda-console + image: docker.redpanda.com/redpandadata/console:v3.2.2 + networks: + - redpanda_network + entrypoint: /bin/sh + command: -c 'echo "$$CONSOLE_CONFIG_FILE" > /tmp/config.yml && /app/console' + volumes: + - ./config:/tmp/config/ + environment: + CONFIG_FILEPATH: ${CONFIG_FILEPATH:-/tmp/config.yml} + CONSOLE_CONFIG_FILE: | + # Configure a connection to the Redpanda cluster + # See https://docs.redpanda.com/current/console/config/connect-to-redpanda/ + kafka: + brokers: ["redpanda-0:9092"] + sasl: + enabled: true + impersonateUser: true + schemaRegistry: + enabled: true + urls: ["http://redpanda-0:8081","http://redpanda-1:8081","http://redpanda-2:8081"] + authentication: + impersonateUser: true + redpanda: + adminApi: + enabled: true + urls: ["http://redpanda-0:9644","http://redpanda-1:9644","http://redpanda-2:9644"] + authentication: + basic: + username: superuser + password: secretpassword + impersonateUser: false + console: + # Configures Redpanda Console to fetch topic documentation from GitHub and display it in the UI. + # See https://docs.redpanda.com/current/console/config/topic-documentation/ + topicDocumentation: + enabled: true + git: + enabled: true + repository: + url: https://github.com/redpanda-data/docs + branch: main + baseDirectory: tests/docker-compose + authentication: + jwtSigningKey: vazxnT+ZHtxKslK6QlDGovcYnSjTk/lKMmZ+mHrBVE+YdVDkLgSuP6AszAKe9Gvq + basic: + enabled: true + authorization: + roleBindings: + - roleName: admin + users: + - loginType: basic + name: superuser + ports: + - 8080:8080 + depends_on: + redpanda-0: + condition: service_healthy + createtopic: + condition: service_completed_successfully + registerschema: + condition: service_completed_successfully + deploytransform: + condition: service_completed_successfully + #################### + # Redpanda Connect # + #################### + connect: + container_name: redpanda-connect + image: docker.redpanda.com/redpandadata/connect + networks: + - redpanda_network + entrypoint: /bin/sh + depends_on: + redpanda-0: + condition: service_healthy + command: -c 'echo "$$CONNECT_CFG_FILE" > /tmp/connect.yml; /redpanda-connect -c /tmp/connect.yml' + environment: + # This Redpanda Connect configuration creates fake data, + # processes it, and writes the output to a set of topics. + # + # Input: + # - Uses Redpanda Connect's generate input to generate fake data. + # See https://docs.redpanda.com/redpanda-connect/components/inputs/generate/ + # Pipeline: + # - Bloblang mapping to batch each input and map 1 message to 'logins' + # topic, and a random number (1-3) of messages to 'transaction' topic + # - Unarchive processor to parse the JSON array and extract each + # element into its own message. + # See https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/ + # Output: + # - kafka_franz output to write the messages to the Redpanda brokers. + # See https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/ + CONNECT_CFG_FILE: | + input: + generate: + interval: 1s + mapping: | + let first_name = fake("first_name") + let last_name = fake("last_name") + + root.user_id = counter() + root.name = $$first_name + " " + $$last_name + root.email = ($$first_name.slice(0,1) + $$last_name + "@" + fake("domain_name")).lowercase() + root.ip = fake("ipv4") + root.login_time = now() + pipeline: + processors: + - mapping: | + root = range(0, random_int(min:2, max:4)).map_each(cust -> this) + - unarchive: + format: "json_array" + - mapping: | + if batch_index() == 0 { + meta topic = "logins" + root = this + } else { + meta topic = "transactions" + root.user_id = this.user_id + root.email = this.email + root.index = batch_index() - 1 + root.product_url = fake("url") + root.price = fake("amount_with_currency") + root.timestamp = now() + } + output: + kafka_franz: + seed_brokers: [ "redpanda-0:9092" ] + topic: $${! metadata("topic") } + sasl: + - mechanism: SCRAM-SHA-256 + password: secretpassword + username: superuser + #################### + # rpk container to create the edu-filtered-domains topic # + # See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-create/ + #################### + createtopic: + command: + - topic + - create + - edu-filtered-domains + - -X user=superuser + - -X pass=secretpassword + - -X brokers=redpanda-0:9092 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + networks: + - redpanda_network + depends_on: + redpanda-0: + condition: service_healthy + #################### + # rpk container to register the schema # + # See https://docs.redpanda.com/current/manage/schema-reg/schema-reg-api/ + #################### + registerschema: + command: + - registry + - schema + - create + - transactions + - --schema + - /etc/redpanda/transactions-schema.json + - -X user=superuser + - -X pass=secretpassword + - -X registry.hosts=redpanda-0:8081 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + # Mount the local directory that contains your schema to the container. + volumes: + - ./transactions-schema.json:/etc/redpanda/transactions-schema.json + networks: + - redpanda_network + depends_on: + redpanda-0: + condition: service_healthy + #################### + # rpk container to deploy a consumer group # + # See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-consume/ + #################### + consumergroup: + command: + - topic + - consume + - transactions + - --group + - transactions-consumer + - -X user=superuser + - -X pass=secretpassword + - -X brokers=redpanda-0:9092 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + networks: + - redpanda_network + depends_on: + createtopic: + condition: service_completed_successfully + deploytransform: + condition: service_completed_successfully + #################### + # rpk container to deploy the pre-built data transform # + # See https://docs.redpanda.com/current/develop/data-transforms/deploy/ + #################### + deploytransform: + command: + - transform + - deploy + - --file=/etc/redpanda/regex.wasm + - --name=regex + - --input-topic=logins + - --output-topic=edu-filtered-domains + - --var=PATTERN="[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.edu" + - --var=MATCH_VALUE=true + - -X user=superuser + - -X pass=secretpassword + - -X admin.hosts=redpanda-0:9644 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + volumes: + - ./transform/regex.wasm:/etc/redpanda/regex.wasm + networks: + - redpanda_network + depends_on: + createtopic: + condition: service_completed_successfully + #################### + # MinIO for Tiered Storage # + # See https://min.io/ + # + # NOTE: MinIO is included in this quickstart for development and evaluation purposes only. + # It is not supported for production deployments of Redpanda. + # + # For production environments, use one of the supported object storage providers: + # https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/requirements/#object-storage-providers-for-tiered-storage + #################### + minio: + container_name: minio + image: minio/minio:RELEASE.2025-05-24T17-08-30Z + command: server --console-address ":9001" /data + ports: + - 9000:9000 + - 9001:9001 + environment: + MINIO_ROOT_USER: minio + MINIO_ROOT_PASSWORD: redpandaTieredStorage7 + MINIO_SERVER_URL: "http://minio:9000" + MINIO_REGION_NAME: local + MINIO_DOMAIN: minio + volumes: + - minio:/data + networks: + redpanda_network: + aliases: + - redpanda.minio + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/ready"] + interval: 10s + timeout: 5s + retries: 3 + mc: + depends_on: + minio: + condition: service_healthy + image: minio/mc:RELEASE.2025-05-21T01-59-54Z + container_name: mc + networks: + - redpanda_network + environment: + - AWS_ACCESS_KEY_ID=minio + - AWS_SECRET_ACCESS_KEY=redpandaTieredStorage7 + - AWS_REGION=local + entrypoint: > + /bin/sh -c " + until (/usr/bin/mc alias set minio http://minio:9000 minio redpandaTieredStorage7) do echo '...waiting...' && sleep 1; done; + /usr/bin/mc mb minio/redpanda; + /usr/bin/mc policy set public minio/redpanda; + tail -f /dev/null + " diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/generate-profiles.yaml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/generate-profiles.yaml new file mode 100644 index 0000000..baf9a6f --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/generate-profiles.yaml @@ -0,0 +1,77 @@ +input: + # Use the 'generate' input + # https://docs.redpanda.com/redpanda-connect/components/inputs/generate/ + generate: + # The interval at which new records are generated. + interval: 1s + # The mapping section defines how each generated record is structured. + # The language used here is called Bloblang. + # https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/ + mapping: | + # Generate a fake first name using the 'first_name' faker function. + let first_name = fake("first_name") + + # Generate a fake last name using the 'last_name' faker function. + let last_name = fake("last_name") + + # Define possible subscription levels for users. + let subscription_levels = ["Free", "Basic", "Premium"] + + # Define possible notification channels for user preferences. + let notifications = ["email", "sms", "push" ] + + # Define supported languages for user preferences. + let languages = ["en", "es", "fr", "de", "zh", "jp"] + + # Assign a unique user ID using a UUID digit generator. + root.user_id = fake("uuid_digit") + + # Assign the generated first name to the 'first_name' field. + root.first_name = $first_name + + # Assign the generated last name to the 'last_name' field. + root.last_name = $last_name + + # Construct the user's email by combining the first initial, last name, and a fake domain name. + # The email is converted to lowercase for consistency. + root.email = ($first_name.slice(0,1) + $last_name + "@" + fake("domain_name")).lowercase() + + # Assign a fake registration date using the 'date' faker function. + root.registration_date = fake("date") + + # Assign the current timestamp as the last login time. + root.last_login = now() + + # Randomly assign a subscription level by selecting an index from the 'subscription_levels' array. + root.subscription_level = $subscription_levels.index(random_int(min: 0, max: 2)) + + # Randomly assign a language preference by selecting an index from the 'languages' array. + root.preferences.language = $languages.index(random_int(min: 0, max: 5)) + + # Randomly assign a notification preference by selecting an index from the 'notifications' array. + root.preferences.notifications = $notifications.index(random_int(min: 0, max: 2)) +pipeline: + processors: + - mapping: | + # Set the target topic for the generated records to 'profiles'. + meta topic = "profiles" + + # Assign the entire record (root) to be sent to the specified topic. + root = this +output: + # Use the 'kafka_franz' output to send the result back to Redpanda + # https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/ + kafka_franz: + # Define the list of seed brokers for the Kafka cluster. + seed_brokers: [ "localhost:19092", "localhost:29092", "localhost:39092"] + # Dynamically assign the topic based on the metadata specified in the processors. + # In this case, it resolves to the 'profiles' topic. + topic: ${! metadata("topic") } + # Configure SASL authentication to securely connect to the Kafka brokers. + sasl: + - # Specify the SASL mechanism to use for authentication. + mechanism: SCRAM-SHA-256 + # The password for the SASL authentication. + password: secretpassword + # The username for the SASL authentication. + username: superuser diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/bootstrap.yml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/bootstrap.yml new file mode 100644 index 0000000..00f3003 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/bootstrap.yml @@ -0,0 +1,56 @@ +# ================================================================= +# This file defines initial cluster properties for a Redpanda cluster. +# Some of these settings are intended for quickstart development and evaluation +# and are not suitable for production environments. +# +# For more information on bootstrap files, see: +# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#configure-a-bootstrap-file +# ================================================================= + +# +# Enable SASL authentication for the Kafka and Admin APIs. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#admin_api_require_auth +admin_api_require_auth: true +# At least one superuser is required to be able to create other SASL users +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#superusers +superusers: + - superuser +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_sasl +enable_sasl: true +# Allow topics to be created on first access. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#auto_create_topics_enabled +auto_create_topics_enabled: true +# Enable data transforms. +# https://docs.redpanda.com/current/develop/data-transforms/how-transforms-work/ +data_transforms_enabled: true +# Enable audit logging (enterprise feature). +# https://docs.redpanda.com/current/manage/audit-logging/ +audit_enabled: true +# Enable Tiered Storage (enterprise feature). +# https://docs.redpanda.com/current/manage/tiered-storage/ +cloud_storage_enabled: true +cloud_storage_region: local +cloud_storage_access_key: minio +cloud_storage_secret_key: redpandaTieredStorage7 +cloud_storage_api_endpoint: minio +cloud_storage_api_endpoint_port: 9000 +cloud_storage_disable_tls: true +cloud_storage_bucket: redpanda +# Forces segments to be uploaded to Tiered Storage faster for the purposes of the quickstart +# https://docs.redpanda.com/current/reference/properties/object-storage-properties/#cloud_storage_segment_max_upload_interval_sec +cloud_storage_segment_max_upload_interval_sec: 60 +# Continuous Data Balancing (enterprise feature) continuously monitors your node and rack availability and disk usage. This enables self-healing clusters that dynamically balance partitions, ensuring smooth operations and optimal cluster performance. +# https://docs.redpanda.com/current/manage/cluster-maintenance/continuous-data-balancing/ +partition_autobalancing_mode: continuous +# Enable Redpanda to collect consumer group metrics. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_consumer_group_metrics +enable_consumer_group_metrics: + - "group" + - "partition" + - "consumer_lag" +# Lower the interval for the quickstart +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#consumer_group_lag_collection_interval_sec +consumer_group_lag_collection_interval_sec: 60 +# Enable Redpanda to collect host metrics. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_host_metrics +enable_host_metrics: true \ No newline at end of file diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/docker-compose.yml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/docker-compose.yml new file mode 100644 index 0000000..9b636cb --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/docker-compose.yml @@ -0,0 +1,403 @@ +name: redpanda-quickstart-multi-broker +networks: + redpanda_network: + driver: bridge +volumes: + redpanda-0: null + redpanda-1: null + redpanda-2: null + minio: null +services: + ################## + # Redpanda Brokers # + ################## + redpanda-0: + command: + - redpanda + - start + - --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:19092 + # Address the broker advertises to clients that connect to the Kafka API. + # Use the internal addresses to connect to the Redpanda brokers + # from inside the same Docker network. + # Use the external addresses to connect to the Redpanda brokers + # from outside the Docker network. + - --advertise-kafka-addr internal://redpanda-0:9092,external://localhost:19092 + - --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:18082 + # Address the broker advertises to clients that connect to the HTTP Proxy. + - --advertise-pandaproxy-addr internal://redpanda-0:8082,external://localhost:18082 + - --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:18081 + # Redpanda brokers use the RPC API to communicate with each other internally. + - --rpc-addr redpanda-0:33145 + - --advertise-rpc-addr redpanda-0:33145 + # Mode dev-container uses well-known configuration properties for development in containers. + - --mode dev-container + # Tells Seastar (the framework Redpanda uses under the hood) to use 1 core on the system. + - --smp 1 + - --default-log-level=info + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + container_name: redpanda-0 + # Sets the username and password of the bootstrap SCRAM superuser + # See https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#bootstrap-a-user-account + environment: + RP_BOOTSTRAP_USER: "superuser:secretpassword" + volumes: + - redpanda-0:/var/lib/redpanda/data + - ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml + networks: + - redpanda_network + ports: + - 18081:18081 + - 18082:18082 + - 19092:19092 + - 19644:9644 + healthcheck: + test: ["CMD", "rpk", "cluster", "info", "-X", "user=superuser", "-X", "pass=secretpassword"] + interval: 10s + timeout: 15s + retries: 10 + depends_on: + minio: + condition: service_healthy + redpanda-1: + command: + - redpanda + - start + - --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:29092 + - --advertise-kafka-addr internal://redpanda-1:9092,external://localhost:29092 + - --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:28082 + - --advertise-pandaproxy-addr internal://redpanda-1:8082,external://localhost:28082 + - --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:28081 + - --rpc-addr redpanda-1:33145 + - --advertise-rpc-addr redpanda-1:33145 + - --mode dev-container + - --smp 1 + - --default-log-level=info + - --seeds redpanda-0:33145 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + container_name: redpanda-1 + environment: + RP_BOOTSTRAP_USER: "superuser:secretpassword" + volumes: + - redpanda-1:/var/lib/redpanda/data + - ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml + networks: + - redpanda_network + ports: + - 28081:28081 + - 28082:28082 + - 29092:29092 + - 29644:9644 + depends_on: + - redpanda-0 + - minio + redpanda-2: + command: + - redpanda + - start + - --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:39092 + - --advertise-kafka-addr internal://redpanda-2:9092,external://localhost:39092 + - --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:38082 + - --advertise-pandaproxy-addr internal://redpanda-2:8082,external://localhost:38082 + - --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:38081 + - --rpc-addr redpanda-2:33145 + - --advertise-rpc-addr redpanda-2:33145 + - --mode dev-container + - --smp 1 + - --default-log-level=info + - --seeds redpanda-0:33145 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + container_name: redpanda-2 + environment: + RP_BOOTSTRAP_USER: "superuser:secretpassword" + volumes: + - redpanda-2:/var/lib/redpanda/data + - ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml + networks: + - redpanda_network + ports: + - 38081:38081 + - 38082:38082 + - 39092:39092 + - 39644:9644 + depends_on: + - redpanda-0 + - minio + #################### + # Redpanda Console # + #################### + console: + container_name: redpanda-console + image: docker.redpanda.com/redpandadata/console:v3.2.2 + networks: + - redpanda_network + entrypoint: /bin/sh + command: -c 'echo "$$CONSOLE_CONFIG_FILE" > /tmp/config.yml && /app/console' + volumes: + - ./config:/tmp/config/ + environment: + CONFIG_FILEPATH: ${CONFIG_FILEPATH:-/tmp/config.yml} + CONSOLE_CONFIG_FILE: | + # Configure a connection to the Redpanda cluster + # See https://docs.redpanda.com/current/console/config/connect-to-redpanda/ + kafka: + brokers: ["redpanda-0:9092"] + sasl: + enabled: true + impersonateUser: true + schemaRegistry: + enabled: true + urls: ["http://redpanda-0:8081","http://redpanda-1:8081","http://redpanda-2:8081"] + authentication: + impersonateUser: true + redpanda: + adminApi: + enabled: true + urls: ["http://redpanda-0:9644","http://redpanda-1:9644","http://redpanda-2:9644"] + authentication: + basic: + username: superuser + password: secretpassword + impersonateUser: false + console: + # Configures Redpanda Console to fetch topic documentation from GitHub and display it in the UI. + # See https://docs.redpanda.com/current/console/config/topic-documentation/ + topicDocumentation: + enabled: true + git: + enabled: true + repository: + url: https://github.com/redpanda-data/docs + branch: main + baseDirectory: tests/docker-compose + authentication: + jwtSigningKey: vazxnT+ZHtxKslK6QlDGovcYnSjTk/lKMmZ+mHrBVE+YdVDkLgSuP6AszAKe9Gvq + basic: + enabled: true + authorization: + roleBindings: + - roleName: admin + users: + - loginType: basic + name: superuser + ports: + - 8080:8080 + depends_on: + redpanda-0: + condition: service_healthy + createtopic: + condition: service_completed_successfully + registerschema: + condition: service_completed_successfully + deploytransform: + condition: service_completed_successfully + #################### + # Redpanda Connect # + #################### + connect: + container_name: redpanda-connect + image: docker.redpanda.com/redpandadata/connect + networks: + - redpanda_network + entrypoint: /bin/sh + depends_on: + redpanda-0: + condition: service_healthy + command: -c 'echo "$$CONNECT_CFG_FILE" > /tmp/connect.yml; /redpanda-connect -c /tmp/connect.yml' + environment: + # This Redpanda Connect configuration creates fake data, + # processes it, and writes the output to a set of topics. + # + # Input: + # - Uses Redpanda Connect's generate input to generate fake data. + # See https://docs.redpanda.com/redpanda-connect/components/inputs/generate/ + # Pipeline: + # - Bloblang mapping to batch each input and map 1 message to 'logins' + # topic, and a random number (1-3) of messages to 'transaction' topic + # - Unarchive processor to parse the JSON array and extract each + # element into its own message. + # See https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/ + # Output: + # - kafka_franz output to write the messages to the Redpanda brokers. + # See https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/ + CONNECT_CFG_FILE: | + input: + generate: + interval: 1s + mapping: | + let first_name = fake("first_name") + let last_name = fake("last_name") + + root.user_id = counter() + root.name = $$first_name + " " + $$last_name + root.email = ($$first_name.slice(0,1) + $$last_name + "@" + fake("domain_name")).lowercase() + root.ip = fake("ipv4") + root.login_time = now() + pipeline: + processors: + - mapping: | + root = range(0, random_int(min:2, max:4)).map_each(cust -> this) + - unarchive: + format: "json_array" + - mapping: | + if batch_index() == 0 { + meta topic = "logins" + root = this + } else { + meta topic = "transactions" + root.user_id = this.user_id + root.email = this.email + root.index = batch_index() - 1 + root.product_url = fake("url") + root.price = fake("amount_with_currency") + root.timestamp = now() + } + output: + kafka_franz: + seed_brokers: [ "redpanda-0:9092" ] + topic: $${! metadata("topic") } + sasl: + - mechanism: SCRAM-SHA-256 + password: secretpassword + username: superuser + #################### + # rpk container to create the edu-filtered-domains topic # + # See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-create/ + #################### + createtopic: + command: + - topic + - create + - edu-filtered-domains + - -X user=superuser + - -X pass=secretpassword + - -X brokers=redpanda-0:9092 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + networks: + - redpanda_network + depends_on: + redpanda-0: + condition: service_healthy + #################### + # rpk container to register the schema # + # See https://docs.redpanda.com/current/manage/schema-reg/schema-reg-api/ + #################### + registerschema: + command: + - registry + - schema + - create + - transactions + - --schema + - /etc/redpanda/transactions-schema.json + - -X user=superuser + - -X pass=secretpassword + - -X registry.hosts=redpanda-0:8081 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + # Mount the local directory that contains your schema to the container. + volumes: + - ./transactions-schema.json:/etc/redpanda/transactions-schema.json + networks: + - redpanda_network + depends_on: + redpanda-0: + condition: service_healthy + #################### + # rpk container to deploy a consumer group # + # See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-consume/ + #################### + consumergroup: + command: + - topic + - consume + - transactions + - --group + - transactions-consumer + - -X user=superuser + - -X pass=secretpassword + - -X brokers=redpanda-0:9092 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + networks: + - redpanda_network + depends_on: + createtopic: + condition: service_completed_successfully + deploytransform: + condition: service_completed_successfully + #################### + # rpk container to deploy the pre-built data transform # + # See https://docs.redpanda.com/current/develop/data-transforms/deploy/ + #################### + deploytransform: + command: + - transform + - deploy + - --file=/etc/redpanda/regex.wasm + - --name=regex + - --input-topic=logins + - --output-topic=edu-filtered-domains + - --var=PATTERN="[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.edu" + - --var=MATCH_VALUE=true + - -X user=superuser + - -X pass=secretpassword + - -X admin.hosts=redpanda-0:9644 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + volumes: + - ./transform/regex.wasm:/etc/redpanda/regex.wasm + networks: + - redpanda_network + depends_on: + createtopic: + condition: service_completed_successfully + #################### + # MinIO for Tiered Storage # + # See https://min.io/ + # + # NOTE: MinIO is included in this quickstart for development and evaluation purposes only. + # It is not supported for production deployments of Redpanda. + # + # For production environments, use one of the supported object storage providers: + # https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/requirements/#object-storage-providers-for-tiered-storage + #################### + minio: + container_name: minio + image: minio/minio:RELEASE.2025-05-24T17-08-30Z + command: server --console-address ":9001" /data + ports: + - 9000:9000 + - 9001:9001 + environment: + MINIO_ROOT_USER: minio + MINIO_ROOT_PASSWORD: redpandaTieredStorage7 + MINIO_SERVER_URL: "http://minio:9000" + MINIO_REGION_NAME: local + MINIO_DOMAIN: minio + volumes: + - minio:/data + networks: + redpanda_network: + aliases: + - redpanda.minio + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/ready"] + interval: 10s + timeout: 5s + retries: 3 + mc: + depends_on: + minio: + condition: service_healthy + image: minio/mc:RELEASE.2025-05-21T01-59-54Z + container_name: mc + networks: + - redpanda_network + environment: + - AWS_ACCESS_KEY_ID=minio + - AWS_SECRET_ACCESS_KEY=redpandaTieredStorage7 + - AWS_REGION=local + entrypoint: > + /bin/sh -c " + until (/usr/bin/mc alias set minio http://minio:9000 minio redpandaTieredStorage7) do echo '...waiting...' && sleep 1; done; + /usr/bin/mc mb minio/redpanda; + /usr/bin/mc policy set public minio/redpanda; + tail -f /dev/null + " diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/generate-profiles.yaml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/generate-profiles.yaml new file mode 100644 index 0000000..baf9a6f --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/generate-profiles.yaml @@ -0,0 +1,77 @@ +input: + # Use the 'generate' input + # https://docs.redpanda.com/redpanda-connect/components/inputs/generate/ + generate: + # The interval at which new records are generated. + interval: 1s + # The mapping section defines how each generated record is structured. + # The language used here is called Bloblang. + # https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/ + mapping: | + # Generate a fake first name using the 'first_name' faker function. + let first_name = fake("first_name") + + # Generate a fake last name using the 'last_name' faker function. + let last_name = fake("last_name") + + # Define possible subscription levels for users. + let subscription_levels = ["Free", "Basic", "Premium"] + + # Define possible notification channels for user preferences. + let notifications = ["email", "sms", "push" ] + + # Define supported languages for user preferences. + let languages = ["en", "es", "fr", "de", "zh", "jp"] + + # Assign a unique user ID using a UUID digit generator. + root.user_id = fake("uuid_digit") + + # Assign the generated first name to the 'first_name' field. + root.first_name = $first_name + + # Assign the generated last name to the 'last_name' field. + root.last_name = $last_name + + # Construct the user's email by combining the first initial, last name, and a fake domain name. + # The email is converted to lowercase for consistency. + root.email = ($first_name.slice(0,1) + $last_name + "@" + fake("domain_name")).lowercase() + + # Assign a fake registration date using the 'date' faker function. + root.registration_date = fake("date") + + # Assign the current timestamp as the last login time. + root.last_login = now() + + # Randomly assign a subscription level by selecting an index from the 'subscription_levels' array. + root.subscription_level = $subscription_levels.index(random_int(min: 0, max: 2)) + + # Randomly assign a language preference by selecting an index from the 'languages' array. + root.preferences.language = $languages.index(random_int(min: 0, max: 5)) + + # Randomly assign a notification preference by selecting an index from the 'notifications' array. + root.preferences.notifications = $notifications.index(random_int(min: 0, max: 2)) +pipeline: + processors: + - mapping: | + # Set the target topic for the generated records to 'profiles'. + meta topic = "profiles" + + # Assign the entire record (root) to be sent to the specified topic. + root = this +output: + # Use the 'kafka_franz' output to send the result back to Redpanda + # https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/ + kafka_franz: + # Define the list of seed brokers for the Kafka cluster. + seed_brokers: [ "localhost:19092", "localhost:29092", "localhost:39092"] + # Dynamically assign the topic based on the metadata specified in the processors. + # In this case, it resolves to the 'profiles' topic. + topic: ${! metadata("topic") } + # Configure SASL authentication to securely connect to the Kafka brokers. + sasl: + - # Specify the SASL mechanism to use for authentication. + mechanism: SCRAM-SHA-256 + # The password for the SASL authentication. + password: secretpassword + # The username for the SASL authentication. + username: superuser diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/bootstrap.yml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/bootstrap.yml new file mode 100644 index 0000000..00f3003 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/bootstrap.yml @@ -0,0 +1,56 @@ +# ================================================================= +# This file defines initial cluster properties for a Redpanda cluster. +# Some of these settings are intended for quickstart development and evaluation +# and are not suitable for production environments. +# +# For more information on bootstrap files, see: +# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#configure-a-bootstrap-file +# ================================================================= + +# +# Enable SASL authentication for the Kafka and Admin APIs. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#admin_api_require_auth +admin_api_require_auth: true +# At least one superuser is required to be able to create other SASL users +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#superusers +superusers: + - superuser +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_sasl +enable_sasl: true +# Allow topics to be created on first access. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#auto_create_topics_enabled +auto_create_topics_enabled: true +# Enable data transforms. +# https://docs.redpanda.com/current/develop/data-transforms/how-transforms-work/ +data_transforms_enabled: true +# Enable audit logging (enterprise feature). +# https://docs.redpanda.com/current/manage/audit-logging/ +audit_enabled: true +# Enable Tiered Storage (enterprise feature). +# https://docs.redpanda.com/current/manage/tiered-storage/ +cloud_storage_enabled: true +cloud_storage_region: local +cloud_storage_access_key: minio +cloud_storage_secret_key: redpandaTieredStorage7 +cloud_storage_api_endpoint: minio +cloud_storage_api_endpoint_port: 9000 +cloud_storage_disable_tls: true +cloud_storage_bucket: redpanda +# Forces segments to be uploaded to Tiered Storage faster for the purposes of the quickstart +# https://docs.redpanda.com/current/reference/properties/object-storage-properties/#cloud_storage_segment_max_upload_interval_sec +cloud_storage_segment_max_upload_interval_sec: 60 +# Continuous Data Balancing (enterprise feature) continuously monitors your node and rack availability and disk usage. This enables self-healing clusters that dynamically balance partitions, ensuring smooth operations and optimal cluster performance. +# https://docs.redpanda.com/current/manage/cluster-maintenance/continuous-data-balancing/ +partition_autobalancing_mode: continuous +# Enable Redpanda to collect consumer group metrics. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_consumer_group_metrics +enable_consumer_group_metrics: + - "group" + - "partition" + - "consumer_lag" +# Lower the interval for the quickstart +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#consumer_group_lag_collection_interval_sec +consumer_group_lag_collection_interval_sec: 60 +# Enable Redpanda to collect host metrics. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_host_metrics +enable_host_metrics: true \ No newline at end of file diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/docker-compose.yml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/docker-compose.yml new file mode 100644 index 0000000..9b636cb --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/docker-compose.yml @@ -0,0 +1,403 @@ +name: redpanda-quickstart-multi-broker +networks: + redpanda_network: + driver: bridge +volumes: + redpanda-0: null + redpanda-1: null + redpanda-2: null + minio: null +services: + ################## + # Redpanda Brokers # + ################## + redpanda-0: + command: + - redpanda + - start + - --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:19092 + # Address the broker advertises to clients that connect to the Kafka API. + # Use the internal addresses to connect to the Redpanda brokers + # from inside the same Docker network. + # Use the external addresses to connect to the Redpanda brokers + # from outside the Docker network. + - --advertise-kafka-addr internal://redpanda-0:9092,external://localhost:19092 + - --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:18082 + # Address the broker advertises to clients that connect to the HTTP Proxy. + - --advertise-pandaproxy-addr internal://redpanda-0:8082,external://localhost:18082 + - --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:18081 + # Redpanda brokers use the RPC API to communicate with each other internally. + - --rpc-addr redpanda-0:33145 + - --advertise-rpc-addr redpanda-0:33145 + # Mode dev-container uses well-known configuration properties for development in containers. + - --mode dev-container + # Tells Seastar (the framework Redpanda uses under the hood) to use 1 core on the system. + - --smp 1 + - --default-log-level=info + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + container_name: redpanda-0 + # Sets the username and password of the bootstrap SCRAM superuser + # See https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#bootstrap-a-user-account + environment: + RP_BOOTSTRAP_USER: "superuser:secretpassword" + volumes: + - redpanda-0:/var/lib/redpanda/data + - ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml + networks: + - redpanda_network + ports: + - 18081:18081 + - 18082:18082 + - 19092:19092 + - 19644:9644 + healthcheck: + test: ["CMD", "rpk", "cluster", "info", "-X", "user=superuser", "-X", "pass=secretpassword"] + interval: 10s + timeout: 15s + retries: 10 + depends_on: + minio: + condition: service_healthy + redpanda-1: + command: + - redpanda + - start + - --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:29092 + - --advertise-kafka-addr internal://redpanda-1:9092,external://localhost:29092 + - --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:28082 + - --advertise-pandaproxy-addr internal://redpanda-1:8082,external://localhost:28082 + - --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:28081 + - --rpc-addr redpanda-1:33145 + - --advertise-rpc-addr redpanda-1:33145 + - --mode dev-container + - --smp 1 + - --default-log-level=info + - --seeds redpanda-0:33145 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + container_name: redpanda-1 + environment: + RP_BOOTSTRAP_USER: "superuser:secretpassword" + volumes: + - redpanda-1:/var/lib/redpanda/data + - ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml + networks: + - redpanda_network + ports: + - 28081:28081 + - 28082:28082 + - 29092:29092 + - 29644:9644 + depends_on: + - redpanda-0 + - minio + redpanda-2: + command: + - redpanda + - start + - --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:39092 + - --advertise-kafka-addr internal://redpanda-2:9092,external://localhost:39092 + - --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:38082 + - --advertise-pandaproxy-addr internal://redpanda-2:8082,external://localhost:38082 + - --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:38081 + - --rpc-addr redpanda-2:33145 + - --advertise-rpc-addr redpanda-2:33145 + - --mode dev-container + - --smp 1 + - --default-log-level=info + - --seeds redpanda-0:33145 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + container_name: redpanda-2 + environment: + RP_BOOTSTRAP_USER: "superuser:secretpassword" + volumes: + - redpanda-2:/var/lib/redpanda/data + - ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml + networks: + - redpanda_network + ports: + - 38081:38081 + - 38082:38082 + - 39092:39092 + - 39644:9644 + depends_on: + - redpanda-0 + - minio + #################### + # Redpanda Console # + #################### + console: + container_name: redpanda-console + image: docker.redpanda.com/redpandadata/console:v3.2.2 + networks: + - redpanda_network + entrypoint: /bin/sh + command: -c 'echo "$$CONSOLE_CONFIG_FILE" > /tmp/config.yml && /app/console' + volumes: + - ./config:/tmp/config/ + environment: + CONFIG_FILEPATH: ${CONFIG_FILEPATH:-/tmp/config.yml} + CONSOLE_CONFIG_FILE: | + # Configure a connection to the Redpanda cluster + # See https://docs.redpanda.com/current/console/config/connect-to-redpanda/ + kafka: + brokers: ["redpanda-0:9092"] + sasl: + enabled: true + impersonateUser: true + schemaRegistry: + enabled: true + urls: ["http://redpanda-0:8081","http://redpanda-1:8081","http://redpanda-2:8081"] + authentication: + impersonateUser: true + redpanda: + adminApi: + enabled: true + urls: ["http://redpanda-0:9644","http://redpanda-1:9644","http://redpanda-2:9644"] + authentication: + basic: + username: superuser + password: secretpassword + impersonateUser: false + console: + # Configures Redpanda Console to fetch topic documentation from GitHub and display it in the UI. + # See https://docs.redpanda.com/current/console/config/topic-documentation/ + topicDocumentation: + enabled: true + git: + enabled: true + repository: + url: https://github.com/redpanda-data/docs + branch: main + baseDirectory: tests/docker-compose + authentication: + jwtSigningKey: vazxnT+ZHtxKslK6QlDGovcYnSjTk/lKMmZ+mHrBVE+YdVDkLgSuP6AszAKe9Gvq + basic: + enabled: true + authorization: + roleBindings: + - roleName: admin + users: + - loginType: basic + name: superuser + ports: + - 8080:8080 + depends_on: + redpanda-0: + condition: service_healthy + createtopic: + condition: service_completed_successfully + registerschema: + condition: service_completed_successfully + deploytransform: + condition: service_completed_successfully + #################### + # Redpanda Connect # + #################### + connect: + container_name: redpanda-connect + image: docker.redpanda.com/redpandadata/connect + networks: + - redpanda_network + entrypoint: /bin/sh + depends_on: + redpanda-0: + condition: service_healthy + command: -c 'echo "$$CONNECT_CFG_FILE" > /tmp/connect.yml; /redpanda-connect -c /tmp/connect.yml' + environment: + # This Redpanda Connect configuration creates fake data, + # processes it, and writes the output to a set of topics. + # + # Input: + # - Uses Redpanda Connect's generate input to generate fake data. + # See https://docs.redpanda.com/redpanda-connect/components/inputs/generate/ + # Pipeline: + # - Bloblang mapping to batch each input and map 1 message to 'logins' + # topic, and a random number (1-3) of messages to 'transaction' topic + # - Unarchive processor to parse the JSON array and extract each + # element into its own message. + # See https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/ + # Output: + # - kafka_franz output to write the messages to the Redpanda brokers. + # See https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/ + CONNECT_CFG_FILE: | + input: + generate: + interval: 1s + mapping: | + let first_name = fake("first_name") + let last_name = fake("last_name") + + root.user_id = counter() + root.name = $$first_name + " " + $$last_name + root.email = ($$first_name.slice(0,1) + $$last_name + "@" + fake("domain_name")).lowercase() + root.ip = fake("ipv4") + root.login_time = now() + pipeline: + processors: + - mapping: | + root = range(0, random_int(min:2, max:4)).map_each(cust -> this) + - unarchive: + format: "json_array" + - mapping: | + if batch_index() == 0 { + meta topic = "logins" + root = this + } else { + meta topic = "transactions" + root.user_id = this.user_id + root.email = this.email + root.index = batch_index() - 1 + root.product_url = fake("url") + root.price = fake("amount_with_currency") + root.timestamp = now() + } + output: + kafka_franz: + seed_brokers: [ "redpanda-0:9092" ] + topic: $${! metadata("topic") } + sasl: + - mechanism: SCRAM-SHA-256 + password: secretpassword + username: superuser + #################### + # rpk container to create the edu-filtered-domains topic # + # See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-create/ + #################### + createtopic: + command: + - topic + - create + - edu-filtered-domains + - -X user=superuser + - -X pass=secretpassword + - -X brokers=redpanda-0:9092 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + networks: + - redpanda_network + depends_on: + redpanda-0: + condition: service_healthy + #################### + # rpk container to register the schema # + # See https://docs.redpanda.com/current/manage/schema-reg/schema-reg-api/ + #################### + registerschema: + command: + - registry + - schema + - create + - transactions + - --schema + - /etc/redpanda/transactions-schema.json + - -X user=superuser + - -X pass=secretpassword + - -X registry.hosts=redpanda-0:8081 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + # Mount the local directory that contains your schema to the container. + volumes: + - ./transactions-schema.json:/etc/redpanda/transactions-schema.json + networks: + - redpanda_network + depends_on: + redpanda-0: + condition: service_healthy + #################### + # rpk container to deploy a consumer group # + # See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-consume/ + #################### + consumergroup: + command: + - topic + - consume + - transactions + - --group + - transactions-consumer + - -X user=superuser + - -X pass=secretpassword + - -X brokers=redpanda-0:9092 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + networks: + - redpanda_network + depends_on: + createtopic: + condition: service_completed_successfully + deploytransform: + condition: service_completed_successfully + #################### + # rpk container to deploy the pre-built data transform # + # See https://docs.redpanda.com/current/develop/data-transforms/deploy/ + #################### + deploytransform: + command: + - transform + - deploy + - --file=/etc/redpanda/regex.wasm + - --name=regex + - --input-topic=logins + - --output-topic=edu-filtered-domains + - --var=PATTERN="[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.edu" + - --var=MATCH_VALUE=true + - -X user=superuser + - -X pass=secretpassword + - -X admin.hosts=redpanda-0:9644 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + volumes: + - ./transform/regex.wasm:/etc/redpanda/regex.wasm + networks: + - redpanda_network + depends_on: + createtopic: + condition: service_completed_successfully + #################### + # MinIO for Tiered Storage # + # See https://min.io/ + # + # NOTE: MinIO is included in this quickstart for development and evaluation purposes only. + # It is not supported for production deployments of Redpanda. + # + # For production environments, use one of the supported object storage providers: + # https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/requirements/#object-storage-providers-for-tiered-storage + #################### + minio: + container_name: minio + image: minio/minio:RELEASE.2025-05-24T17-08-30Z + command: server --console-address ":9001" /data + ports: + - 9000:9000 + - 9001:9001 + environment: + MINIO_ROOT_USER: minio + MINIO_ROOT_PASSWORD: redpandaTieredStorage7 + MINIO_SERVER_URL: "http://minio:9000" + MINIO_REGION_NAME: local + MINIO_DOMAIN: minio + volumes: + - minio:/data + networks: + redpanda_network: + aliases: + - redpanda.minio + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/ready"] + interval: 10s + timeout: 5s + retries: 3 + mc: + depends_on: + minio: + condition: service_healthy + image: minio/mc:RELEASE.2025-05-21T01-59-54Z + container_name: mc + networks: + - redpanda_network + environment: + - AWS_ACCESS_KEY_ID=minio + - AWS_SECRET_ACCESS_KEY=redpandaTieredStorage7 + - AWS_REGION=local + entrypoint: > + /bin/sh -c " + until (/usr/bin/mc alias set minio http://minio:9000 minio redpandaTieredStorage7) do echo '...waiting...' && sleep 1; done; + /usr/bin/mc mb minio/redpanda; + /usr/bin/mc policy set public minio/redpanda; + tail -f /dev/null + " diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/generate-profiles.yaml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/generate-profiles.yaml new file mode 100644 index 0000000..baf9a6f --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/generate-profiles.yaml @@ -0,0 +1,77 @@ +input: + # Use the 'generate' input + # https://docs.redpanda.com/redpanda-connect/components/inputs/generate/ + generate: + # The interval at which new records are generated. + interval: 1s + # The mapping section defines how each generated record is structured. + # The language used here is called Bloblang. + # https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/ + mapping: | + # Generate a fake first name using the 'first_name' faker function. + let first_name = fake("first_name") + + # Generate a fake last name using the 'last_name' faker function. + let last_name = fake("last_name") + + # Define possible subscription levels for users. + let subscription_levels = ["Free", "Basic", "Premium"] + + # Define possible notification channels for user preferences. + let notifications = ["email", "sms", "push" ] + + # Define supported languages for user preferences. + let languages = ["en", "es", "fr", "de", "zh", "jp"] + + # Assign a unique user ID using a UUID digit generator. + root.user_id = fake("uuid_digit") + + # Assign the generated first name to the 'first_name' field. + root.first_name = $first_name + + # Assign the generated last name to the 'last_name' field. + root.last_name = $last_name + + # Construct the user's email by combining the first initial, last name, and a fake domain name. + # The email is converted to lowercase for consistency. + root.email = ($first_name.slice(0,1) + $last_name + "@" + fake("domain_name")).lowercase() + + # Assign a fake registration date using the 'date' faker function. + root.registration_date = fake("date") + + # Assign the current timestamp as the last login time. + root.last_login = now() + + # Randomly assign a subscription level by selecting an index from the 'subscription_levels' array. + root.subscription_level = $subscription_levels.index(random_int(min: 0, max: 2)) + + # Randomly assign a language preference by selecting an index from the 'languages' array. + root.preferences.language = $languages.index(random_int(min: 0, max: 5)) + + # Randomly assign a notification preference by selecting an index from the 'notifications' array. + root.preferences.notifications = $notifications.index(random_int(min: 0, max: 2)) +pipeline: + processors: + - mapping: | + # Set the target topic for the generated records to 'profiles'. + meta topic = "profiles" + + # Assign the entire record (root) to be sent to the specified topic. + root = this +output: + # Use the 'kafka_franz' output to send the result back to Redpanda + # https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/ + kafka_franz: + # Define the list of seed brokers for the Kafka cluster. + seed_brokers: [ "localhost:19092", "localhost:29092", "localhost:39092"] + # Dynamically assign the topic based on the metadata specified in the processors. + # In this case, it resolves to the 'profiles' topic. + topic: ${! metadata("topic") } + # Configure SASL authentication to securely connect to the Kafka brokers. + sasl: + - # Specify the SASL mechanism to use for authentication. + mechanism: SCRAM-SHA-256 + # The password for the SASL authentication. + password: secretpassword + # The username for the SASL authentication. + username: superuser diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/bootstrap.yml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/bootstrap.yml new file mode 100644 index 0000000..00f3003 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/bootstrap.yml @@ -0,0 +1,56 @@ +# ================================================================= +# This file defines initial cluster properties for a Redpanda cluster. +# Some of these settings are intended for quickstart development and evaluation +# and are not suitable for production environments. +# +# For more information on bootstrap files, see: +# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#configure-a-bootstrap-file +# ================================================================= + +# +# Enable SASL authentication for the Kafka and Admin APIs. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#admin_api_require_auth +admin_api_require_auth: true +# At least one superuser is required to be able to create other SASL users +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#superusers +superusers: + - superuser +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_sasl +enable_sasl: true +# Allow topics to be created on first access. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#auto_create_topics_enabled +auto_create_topics_enabled: true +# Enable data transforms. +# https://docs.redpanda.com/current/develop/data-transforms/how-transforms-work/ +data_transforms_enabled: true +# Enable audit logging (enterprise feature). +# https://docs.redpanda.com/current/manage/audit-logging/ +audit_enabled: true +# Enable Tiered Storage (enterprise feature). +# https://docs.redpanda.com/current/manage/tiered-storage/ +cloud_storage_enabled: true +cloud_storage_region: local +cloud_storage_access_key: minio +cloud_storage_secret_key: redpandaTieredStorage7 +cloud_storage_api_endpoint: minio +cloud_storage_api_endpoint_port: 9000 +cloud_storage_disable_tls: true +cloud_storage_bucket: redpanda +# Forces segments to be uploaded to Tiered Storage faster for the purposes of the quickstart +# https://docs.redpanda.com/current/reference/properties/object-storage-properties/#cloud_storage_segment_max_upload_interval_sec +cloud_storage_segment_max_upload_interval_sec: 60 +# Continuous Data Balancing (enterprise feature) continuously monitors your node and rack availability and disk usage. This enables self-healing clusters that dynamically balance partitions, ensuring smooth operations and optimal cluster performance. +# https://docs.redpanda.com/current/manage/cluster-maintenance/continuous-data-balancing/ +partition_autobalancing_mode: continuous +# Enable Redpanda to collect consumer group metrics. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_consumer_group_metrics +enable_consumer_group_metrics: + - "group" + - "partition" + - "consumer_lag" +# Lower the interval for the quickstart +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#consumer_group_lag_collection_interval_sec +consumer_group_lag_collection_interval_sec: 60 +# Enable Redpanda to collect host metrics. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_host_metrics +enable_host_metrics: true \ No newline at end of file diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/docker-compose.yml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/docker-compose.yml new file mode 100644 index 0000000..9b636cb --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/docker-compose.yml @@ -0,0 +1,403 @@ +name: redpanda-quickstart-multi-broker +networks: + redpanda_network: + driver: bridge +volumes: + redpanda-0: null + redpanda-1: null + redpanda-2: null + minio: null +services: + ################## + # Redpanda Brokers # + ################## + redpanda-0: + command: + - redpanda + - start + - --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:19092 + # Address the broker advertises to clients that connect to the Kafka API. + # Use the internal addresses to connect to the Redpanda brokers + # from inside the same Docker network. + # Use the external addresses to connect to the Redpanda brokers + # from outside the Docker network. + - --advertise-kafka-addr internal://redpanda-0:9092,external://localhost:19092 + - --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:18082 + # Address the broker advertises to clients that connect to the HTTP Proxy. + - --advertise-pandaproxy-addr internal://redpanda-0:8082,external://localhost:18082 + - --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:18081 + # Redpanda brokers use the RPC API to communicate with each other internally. + - --rpc-addr redpanda-0:33145 + - --advertise-rpc-addr redpanda-0:33145 + # Mode dev-container uses well-known configuration properties for development in containers. + - --mode dev-container + # Tells Seastar (the framework Redpanda uses under the hood) to use 1 core on the system. + - --smp 1 + - --default-log-level=info + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + container_name: redpanda-0 + # Sets the username and password of the bootstrap SCRAM superuser + # See https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#bootstrap-a-user-account + environment: + RP_BOOTSTRAP_USER: "superuser:secretpassword" + volumes: + - redpanda-0:/var/lib/redpanda/data + - ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml + networks: + - redpanda_network + ports: + - 18081:18081 + - 18082:18082 + - 19092:19092 + - 19644:9644 + healthcheck: + test: ["CMD", "rpk", "cluster", "info", "-X", "user=superuser", "-X", "pass=secretpassword"] + interval: 10s + timeout: 15s + retries: 10 + depends_on: + minio: + condition: service_healthy + redpanda-1: + command: + - redpanda + - start + - --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:29092 + - --advertise-kafka-addr internal://redpanda-1:9092,external://localhost:29092 + - --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:28082 + - --advertise-pandaproxy-addr internal://redpanda-1:8082,external://localhost:28082 + - --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:28081 + - --rpc-addr redpanda-1:33145 + - --advertise-rpc-addr redpanda-1:33145 + - --mode dev-container + - --smp 1 + - --default-log-level=info + - --seeds redpanda-0:33145 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + container_name: redpanda-1 + environment: + RP_BOOTSTRAP_USER: "superuser:secretpassword" + volumes: + - redpanda-1:/var/lib/redpanda/data + - ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml + networks: + - redpanda_network + ports: + - 28081:28081 + - 28082:28082 + - 29092:29092 + - 29644:9644 + depends_on: + - redpanda-0 + - minio + redpanda-2: + command: + - redpanda + - start + - --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:39092 + - --advertise-kafka-addr internal://redpanda-2:9092,external://localhost:39092 + - --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:38082 + - --advertise-pandaproxy-addr internal://redpanda-2:8082,external://localhost:38082 + - --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:38081 + - --rpc-addr redpanda-2:33145 + - --advertise-rpc-addr redpanda-2:33145 + - --mode dev-container + - --smp 1 + - --default-log-level=info + - --seeds redpanda-0:33145 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + container_name: redpanda-2 + environment: + RP_BOOTSTRAP_USER: "superuser:secretpassword" + volumes: + - redpanda-2:/var/lib/redpanda/data + - ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml + networks: + - redpanda_network + ports: + - 38081:38081 + - 38082:38082 + - 39092:39092 + - 39644:9644 + depends_on: + - redpanda-0 + - minio + #################### + # Redpanda Console # + #################### + console: + container_name: redpanda-console + image: docker.redpanda.com/redpandadata/console:v3.2.2 + networks: + - redpanda_network + entrypoint: /bin/sh + command: -c 'echo "$$CONSOLE_CONFIG_FILE" > /tmp/config.yml && /app/console' + volumes: + - ./config:/tmp/config/ + environment: + CONFIG_FILEPATH: ${CONFIG_FILEPATH:-/tmp/config.yml} + CONSOLE_CONFIG_FILE: | + # Configure a connection to the Redpanda cluster + # See https://docs.redpanda.com/current/console/config/connect-to-redpanda/ + kafka: + brokers: ["redpanda-0:9092"] + sasl: + enabled: true + impersonateUser: true + schemaRegistry: + enabled: true + urls: ["http://redpanda-0:8081","http://redpanda-1:8081","http://redpanda-2:8081"] + authentication: + impersonateUser: true + redpanda: + adminApi: + enabled: true + urls: ["http://redpanda-0:9644","http://redpanda-1:9644","http://redpanda-2:9644"] + authentication: + basic: + username: superuser + password: secretpassword + impersonateUser: false + console: + # Configures Redpanda Console to fetch topic documentation from GitHub and display it in the UI. + # See https://docs.redpanda.com/current/console/config/topic-documentation/ + topicDocumentation: + enabled: true + git: + enabled: true + repository: + url: https://github.com/redpanda-data/docs + branch: main + baseDirectory: tests/docker-compose + authentication: + jwtSigningKey: vazxnT+ZHtxKslK6QlDGovcYnSjTk/lKMmZ+mHrBVE+YdVDkLgSuP6AszAKe9Gvq + basic: + enabled: true + authorization: + roleBindings: + - roleName: admin + users: + - loginType: basic + name: superuser + ports: + - 8080:8080 + depends_on: + redpanda-0: + condition: service_healthy + createtopic: + condition: service_completed_successfully + registerschema: + condition: service_completed_successfully + deploytransform: + condition: service_completed_successfully + #################### + # Redpanda Connect # + #################### + connect: + container_name: redpanda-connect + image: docker.redpanda.com/redpandadata/connect + networks: + - redpanda_network + entrypoint: /bin/sh + depends_on: + redpanda-0: + condition: service_healthy + command: -c 'echo "$$CONNECT_CFG_FILE" > /tmp/connect.yml; /redpanda-connect -c /tmp/connect.yml' + environment: + # This Redpanda Connect configuration creates fake data, + # processes it, and writes the output to a set of topics. + # + # Input: + # - Uses Redpanda Connect's generate input to generate fake data. + # See https://docs.redpanda.com/redpanda-connect/components/inputs/generate/ + # Pipeline: + # - Bloblang mapping to batch each input and map 1 message to 'logins' + # topic, and a random number (1-3) of messages to 'transaction' topic + # - Unarchive processor to parse the JSON array and extract each + # element into its own message. + # See https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/ + # Output: + # - kafka_franz output to write the messages to the Redpanda brokers. + # See https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/ + CONNECT_CFG_FILE: | + input: + generate: + interval: 1s + mapping: | + let first_name = fake("first_name") + let last_name = fake("last_name") + + root.user_id = counter() + root.name = $$first_name + " " + $$last_name + root.email = ($$first_name.slice(0,1) + $$last_name + "@" + fake("domain_name")).lowercase() + root.ip = fake("ipv4") + root.login_time = now() + pipeline: + processors: + - mapping: | + root = range(0, random_int(min:2, max:4)).map_each(cust -> this) + - unarchive: + format: "json_array" + - mapping: | + if batch_index() == 0 { + meta topic = "logins" + root = this + } else { + meta topic = "transactions" + root.user_id = this.user_id + root.email = this.email + root.index = batch_index() - 1 + root.product_url = fake("url") + root.price = fake("amount_with_currency") + root.timestamp = now() + } + output: + kafka_franz: + seed_brokers: [ "redpanda-0:9092" ] + topic: $${! metadata("topic") } + sasl: + - mechanism: SCRAM-SHA-256 + password: secretpassword + username: superuser + #################### + # rpk container to create the edu-filtered-domains topic # + # See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-create/ + #################### + createtopic: + command: + - topic + - create + - edu-filtered-domains + - -X user=superuser + - -X pass=secretpassword + - -X brokers=redpanda-0:9092 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + networks: + - redpanda_network + depends_on: + redpanda-0: + condition: service_healthy + #################### + # rpk container to register the schema # + # See https://docs.redpanda.com/current/manage/schema-reg/schema-reg-api/ + #################### + registerschema: + command: + - registry + - schema + - create + - transactions + - --schema + - /etc/redpanda/transactions-schema.json + - -X user=superuser + - -X pass=secretpassword + - -X registry.hosts=redpanda-0:8081 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + # Mount the local directory that contains your schema to the container. + volumes: + - ./transactions-schema.json:/etc/redpanda/transactions-schema.json + networks: + - redpanda_network + depends_on: + redpanda-0: + condition: service_healthy + #################### + # rpk container to deploy a consumer group # + # See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-consume/ + #################### + consumergroup: + command: + - topic + - consume + - transactions + - --group + - transactions-consumer + - -X user=superuser + - -X pass=secretpassword + - -X brokers=redpanda-0:9092 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + networks: + - redpanda_network + depends_on: + createtopic: + condition: service_completed_successfully + deploytransform: + condition: service_completed_successfully + #################### + # rpk container to deploy the pre-built data transform # + # See https://docs.redpanda.com/current/develop/data-transforms/deploy/ + #################### + deploytransform: + command: + - transform + - deploy + - --file=/etc/redpanda/regex.wasm + - --name=regex + - --input-topic=logins + - --output-topic=edu-filtered-domains + - --var=PATTERN="[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.edu" + - --var=MATCH_VALUE=true + - -X user=superuser + - -X pass=secretpassword + - -X admin.hosts=redpanda-0:9644 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + volumes: + - ./transform/regex.wasm:/etc/redpanda/regex.wasm + networks: + - redpanda_network + depends_on: + createtopic: + condition: service_completed_successfully + #################### + # MinIO for Tiered Storage # + # See https://min.io/ + # + # NOTE: MinIO is included in this quickstart for development and evaluation purposes only. + # It is not supported for production deployments of Redpanda. + # + # For production environments, use one of the supported object storage providers: + # https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/requirements/#object-storage-providers-for-tiered-storage + #################### + minio: + container_name: minio + image: minio/minio:RELEASE.2025-05-24T17-08-30Z + command: server --console-address ":9001" /data + ports: + - 9000:9000 + - 9001:9001 + environment: + MINIO_ROOT_USER: minio + MINIO_ROOT_PASSWORD: redpandaTieredStorage7 + MINIO_SERVER_URL: "http://minio:9000" + MINIO_REGION_NAME: local + MINIO_DOMAIN: minio + volumes: + - minio:/data + networks: + redpanda_network: + aliases: + - redpanda.minio + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/ready"] + interval: 10s + timeout: 5s + retries: 3 + mc: + depends_on: + minio: + condition: service_healthy + image: minio/mc:RELEASE.2025-05-21T01-59-54Z + container_name: mc + networks: + - redpanda_network + environment: + - AWS_ACCESS_KEY_ID=minio + - AWS_SECRET_ACCESS_KEY=redpandaTieredStorage7 + - AWS_REGION=local + entrypoint: > + /bin/sh -c " + until (/usr/bin/mc alias set minio http://minio:9000 minio redpandaTieredStorage7) do echo '...waiting...' && sleep 1; done; + /usr/bin/mc mb minio/redpanda; + /usr/bin/mc policy set public minio/redpanda; + tail -f /dev/null + " diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/generate-profiles.yaml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/generate-profiles.yaml new file mode 100644 index 0000000..baf9a6f --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/generate-profiles.yaml @@ -0,0 +1,77 @@ +input: + # Use the 'generate' input + # https://docs.redpanda.com/redpanda-connect/components/inputs/generate/ + generate: + # The interval at which new records are generated. + interval: 1s + # The mapping section defines how each generated record is structured. + # The language used here is called Bloblang. + # https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/ + mapping: | + # Generate a fake first name using the 'first_name' faker function. + let first_name = fake("first_name") + + # Generate a fake last name using the 'last_name' faker function. + let last_name = fake("last_name") + + # Define possible subscription levels for users. + let subscription_levels = ["Free", "Basic", "Premium"] + + # Define possible notification channels for user preferences. + let notifications = ["email", "sms", "push" ] + + # Define supported languages for user preferences. + let languages = ["en", "es", "fr", "de", "zh", "jp"] + + # Assign a unique user ID using a UUID digit generator. + root.user_id = fake("uuid_digit") + + # Assign the generated first name to the 'first_name' field. + root.first_name = $first_name + + # Assign the generated last name to the 'last_name' field. + root.last_name = $last_name + + # Construct the user's email by combining the first initial, last name, and a fake domain name. + # The email is converted to lowercase for consistency. + root.email = ($first_name.slice(0,1) + $last_name + "@" + fake("domain_name")).lowercase() + + # Assign a fake registration date using the 'date' faker function. + root.registration_date = fake("date") + + # Assign the current timestamp as the last login time. + root.last_login = now() + + # Randomly assign a subscription level by selecting an index from the 'subscription_levels' array. + root.subscription_level = $subscription_levels.index(random_int(min: 0, max: 2)) + + # Randomly assign a language preference by selecting an index from the 'languages' array. + root.preferences.language = $languages.index(random_int(min: 0, max: 5)) + + # Randomly assign a notification preference by selecting an index from the 'notifications' array. + root.preferences.notifications = $notifications.index(random_int(min: 0, max: 2)) +pipeline: + processors: + - mapping: | + # Set the target topic for the generated records to 'profiles'. + meta topic = "profiles" + + # Assign the entire record (root) to be sent to the specified topic. + root = this +output: + # Use the 'kafka_franz' output to send the result back to Redpanda + # https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/ + kafka_franz: + # Define the list of seed brokers for the Kafka cluster. + seed_brokers: [ "localhost:19092", "localhost:29092", "localhost:39092"] + # Dynamically assign the topic based on the metadata specified in the processors. + # In this case, it resolves to the 'profiles' topic. + topic: ${! metadata("topic") } + # Configure SASL authentication to securely connect to the Kafka brokers. + sasl: + - # Specify the SASL mechanism to use for authentication. + mechanism: SCRAM-SHA-256 + # The password for the SASL authentication. + password: secretpassword + # The username for the SASL authentication. + username: superuser diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/bootstrap.yml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/bootstrap.yml new file mode 100644 index 0000000..00f3003 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/bootstrap.yml @@ -0,0 +1,56 @@ +# ================================================================= +# This file defines initial cluster properties for a Redpanda cluster. +# Some of these settings are intended for quickstart development and evaluation +# and are not suitable for production environments. +# +# For more information on bootstrap files, see: +# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#configure-a-bootstrap-file +# ================================================================= + +# +# Enable SASL authentication for the Kafka and Admin APIs. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#admin_api_require_auth +admin_api_require_auth: true +# At least one superuser is required to be able to create other SASL users +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#superusers +superusers: + - superuser +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_sasl +enable_sasl: true +# Allow topics to be created on first access. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#auto_create_topics_enabled +auto_create_topics_enabled: true +# Enable data transforms. +# https://docs.redpanda.com/current/develop/data-transforms/how-transforms-work/ +data_transforms_enabled: true +# Enable audit logging (enterprise feature). +# https://docs.redpanda.com/current/manage/audit-logging/ +audit_enabled: true +# Enable Tiered Storage (enterprise feature). +# https://docs.redpanda.com/current/manage/tiered-storage/ +cloud_storage_enabled: true +cloud_storage_region: local +cloud_storage_access_key: minio +cloud_storage_secret_key: redpandaTieredStorage7 +cloud_storage_api_endpoint: minio +cloud_storage_api_endpoint_port: 9000 +cloud_storage_disable_tls: true +cloud_storage_bucket: redpanda +# Forces segments to be uploaded to Tiered Storage faster for the purposes of the quickstart +# https://docs.redpanda.com/current/reference/properties/object-storage-properties/#cloud_storage_segment_max_upload_interval_sec +cloud_storage_segment_max_upload_interval_sec: 60 +# Continuous Data Balancing (enterprise feature) continuously monitors your node and rack availability and disk usage. This enables self-healing clusters that dynamically balance partitions, ensuring smooth operations and optimal cluster performance. +# https://docs.redpanda.com/current/manage/cluster-maintenance/continuous-data-balancing/ +partition_autobalancing_mode: continuous +# Enable Redpanda to collect consumer group metrics. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_consumer_group_metrics +enable_consumer_group_metrics: + - "group" + - "partition" + - "consumer_lag" +# Lower the interval for the quickstart +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#consumer_group_lag_collection_interval_sec +consumer_group_lag_collection_interval_sec: 60 +# Enable Redpanda to collect host metrics. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_host_metrics +enable_host_metrics: true \ No newline at end of file diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/docker-compose.yml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/docker-compose.yml new file mode 100644 index 0000000..9b636cb --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/docker-compose.yml @@ -0,0 +1,403 @@ +name: redpanda-quickstart-multi-broker +networks: + redpanda_network: + driver: bridge +volumes: + redpanda-0: null + redpanda-1: null + redpanda-2: null + minio: null +services: + ################## + # Redpanda Brokers # + ################## + redpanda-0: + command: + - redpanda + - start + - --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:19092 + # Address the broker advertises to clients that connect to the Kafka API. + # Use the internal addresses to connect to the Redpanda brokers + # from inside the same Docker network. + # Use the external addresses to connect to the Redpanda brokers + # from outside the Docker network. + - --advertise-kafka-addr internal://redpanda-0:9092,external://localhost:19092 + - --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:18082 + # Address the broker advertises to clients that connect to the HTTP Proxy. + - --advertise-pandaproxy-addr internal://redpanda-0:8082,external://localhost:18082 + - --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:18081 + # Redpanda brokers use the RPC API to communicate with each other internally. + - --rpc-addr redpanda-0:33145 + - --advertise-rpc-addr redpanda-0:33145 + # Mode dev-container uses well-known configuration properties for development in containers. + - --mode dev-container + # Tells Seastar (the framework Redpanda uses under the hood) to use 1 core on the system. + - --smp 1 + - --default-log-level=info + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + container_name: redpanda-0 + # Sets the username and password of the bootstrap SCRAM superuser + # See https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#bootstrap-a-user-account + environment: + RP_BOOTSTRAP_USER: "superuser:secretpassword" + volumes: + - redpanda-0:/var/lib/redpanda/data + - ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml + networks: + - redpanda_network + ports: + - 18081:18081 + - 18082:18082 + - 19092:19092 + - 19644:9644 + healthcheck: + test: ["CMD", "rpk", "cluster", "info", "-X", "user=superuser", "-X", "pass=secretpassword"] + interval: 10s + timeout: 15s + retries: 10 + depends_on: + minio: + condition: service_healthy + redpanda-1: + command: + - redpanda + - start + - --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:29092 + - --advertise-kafka-addr internal://redpanda-1:9092,external://localhost:29092 + - --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:28082 + - --advertise-pandaproxy-addr internal://redpanda-1:8082,external://localhost:28082 + - --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:28081 + - --rpc-addr redpanda-1:33145 + - --advertise-rpc-addr redpanda-1:33145 + - --mode dev-container + - --smp 1 + - --default-log-level=info + - --seeds redpanda-0:33145 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + container_name: redpanda-1 + environment: + RP_BOOTSTRAP_USER: "superuser:secretpassword" + volumes: + - redpanda-1:/var/lib/redpanda/data + - ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml + networks: + - redpanda_network + ports: + - 28081:28081 + - 28082:28082 + - 29092:29092 + - 29644:9644 + depends_on: + - redpanda-0 + - minio + redpanda-2: + command: + - redpanda + - start + - --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:39092 + - --advertise-kafka-addr internal://redpanda-2:9092,external://localhost:39092 + - --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:38082 + - --advertise-pandaproxy-addr internal://redpanda-2:8082,external://localhost:38082 + - --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:38081 + - --rpc-addr redpanda-2:33145 + - --advertise-rpc-addr redpanda-2:33145 + - --mode dev-container + - --smp 1 + - --default-log-level=info + - --seeds redpanda-0:33145 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + container_name: redpanda-2 + environment: + RP_BOOTSTRAP_USER: "superuser:secretpassword" + volumes: + - redpanda-2:/var/lib/redpanda/data + - ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml + networks: + - redpanda_network + ports: + - 38081:38081 + - 38082:38082 + - 39092:39092 + - 39644:9644 + depends_on: + - redpanda-0 + - minio + #################### + # Redpanda Console # + #################### + console: + container_name: redpanda-console + image: docker.redpanda.com/redpandadata/console:v3.2.2 + networks: + - redpanda_network + entrypoint: /bin/sh + command: -c 'echo "$$CONSOLE_CONFIG_FILE" > /tmp/config.yml && /app/console' + volumes: + - ./config:/tmp/config/ + environment: + CONFIG_FILEPATH: ${CONFIG_FILEPATH:-/tmp/config.yml} + CONSOLE_CONFIG_FILE: | + # Configure a connection to the Redpanda cluster + # See https://docs.redpanda.com/current/console/config/connect-to-redpanda/ + kafka: + brokers: ["redpanda-0:9092"] + sasl: + enabled: true + impersonateUser: true + schemaRegistry: + enabled: true + urls: ["http://redpanda-0:8081","http://redpanda-1:8081","http://redpanda-2:8081"] + authentication: + impersonateUser: true + redpanda: + adminApi: + enabled: true + urls: ["http://redpanda-0:9644","http://redpanda-1:9644","http://redpanda-2:9644"] + authentication: + basic: + username: superuser + password: secretpassword + impersonateUser: false + console: + # Configures Redpanda Console to fetch topic documentation from GitHub and display it in the UI. + # See https://docs.redpanda.com/current/console/config/topic-documentation/ + topicDocumentation: + enabled: true + git: + enabled: true + repository: + url: https://github.com/redpanda-data/docs + branch: main + baseDirectory: tests/docker-compose + authentication: + jwtSigningKey: vazxnT+ZHtxKslK6QlDGovcYnSjTk/lKMmZ+mHrBVE+YdVDkLgSuP6AszAKe9Gvq + basic: + enabled: true + authorization: + roleBindings: + - roleName: admin + users: + - loginType: basic + name: superuser + ports: + - 8080:8080 + depends_on: + redpanda-0: + condition: service_healthy + createtopic: + condition: service_completed_successfully + registerschema: + condition: service_completed_successfully + deploytransform: + condition: service_completed_successfully + #################### + # Redpanda Connect # + #################### + connect: + container_name: redpanda-connect + image: docker.redpanda.com/redpandadata/connect + networks: + - redpanda_network + entrypoint: /bin/sh + depends_on: + redpanda-0: + condition: service_healthy + command: -c 'echo "$$CONNECT_CFG_FILE" > /tmp/connect.yml; /redpanda-connect -c /tmp/connect.yml' + environment: + # This Redpanda Connect configuration creates fake data, + # processes it, and writes the output to a set of topics. + # + # Input: + # - Uses Redpanda Connect's generate input to generate fake data. + # See https://docs.redpanda.com/redpanda-connect/components/inputs/generate/ + # Pipeline: + # - Bloblang mapping to batch each input and map 1 message to 'logins' + # topic, and a random number (1-3) of messages to 'transaction' topic + # - Unarchive processor to parse the JSON array and extract each + # element into its own message. + # See https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/ + # Output: + # - kafka_franz output to write the messages to the Redpanda brokers. + # See https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/ + CONNECT_CFG_FILE: | + input: + generate: + interval: 1s + mapping: | + let first_name = fake("first_name") + let last_name = fake("last_name") + + root.user_id = counter() + root.name = $$first_name + " " + $$last_name + root.email = ($$first_name.slice(0,1) + $$last_name + "@" + fake("domain_name")).lowercase() + root.ip = fake("ipv4") + root.login_time = now() + pipeline: + processors: + - mapping: | + root = range(0, random_int(min:2, max:4)).map_each(cust -> this) + - unarchive: + format: "json_array" + - mapping: | + if batch_index() == 0 { + meta topic = "logins" + root = this + } else { + meta topic = "transactions" + root.user_id = this.user_id + root.email = this.email + root.index = batch_index() - 1 + root.product_url = fake("url") + root.price = fake("amount_with_currency") + root.timestamp = now() + } + output: + kafka_franz: + seed_brokers: [ "redpanda-0:9092" ] + topic: $${! metadata("topic") } + sasl: + - mechanism: SCRAM-SHA-256 + password: secretpassword + username: superuser + #################### + # rpk container to create the edu-filtered-domains topic # + # See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-create/ + #################### + createtopic: + command: + - topic + - create + - edu-filtered-domains + - -X user=superuser + - -X pass=secretpassword + - -X brokers=redpanda-0:9092 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + networks: + - redpanda_network + depends_on: + redpanda-0: + condition: service_healthy + #################### + # rpk container to register the schema # + # See https://docs.redpanda.com/current/manage/schema-reg/schema-reg-api/ + #################### + registerschema: + command: + - registry + - schema + - create + - transactions + - --schema + - /etc/redpanda/transactions-schema.json + - -X user=superuser + - -X pass=secretpassword + - -X registry.hosts=redpanda-0:8081 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + # Mount the local directory that contains your schema to the container. + volumes: + - ./transactions-schema.json:/etc/redpanda/transactions-schema.json + networks: + - redpanda_network + depends_on: + redpanda-0: + condition: service_healthy + #################### + # rpk container to deploy a consumer group # + # See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-consume/ + #################### + consumergroup: + command: + - topic + - consume + - transactions + - --group + - transactions-consumer + - -X user=superuser + - -X pass=secretpassword + - -X brokers=redpanda-0:9092 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + networks: + - redpanda_network + depends_on: + createtopic: + condition: service_completed_successfully + deploytransform: + condition: service_completed_successfully + #################### + # rpk container to deploy the pre-built data transform # + # See https://docs.redpanda.com/current/develop/data-transforms/deploy/ + #################### + deploytransform: + command: + - transform + - deploy + - --file=/etc/redpanda/regex.wasm + - --name=regex + - --input-topic=logins + - --output-topic=edu-filtered-domains + - --var=PATTERN="[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.edu" + - --var=MATCH_VALUE=true + - -X user=superuser + - -X pass=secretpassword + - -X admin.hosts=redpanda-0:9644 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + volumes: + - ./transform/regex.wasm:/etc/redpanda/regex.wasm + networks: + - redpanda_network + depends_on: + createtopic: + condition: service_completed_successfully + #################### + # MinIO for Tiered Storage # + # See https://min.io/ + # + # NOTE: MinIO is included in this quickstart for development and evaluation purposes only. + # It is not supported for production deployments of Redpanda. + # + # For production environments, use one of the supported object storage providers: + # https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/requirements/#object-storage-providers-for-tiered-storage + #################### + minio: + container_name: minio + image: minio/minio:RELEASE.2025-05-24T17-08-30Z + command: server --console-address ":9001" /data + ports: + - 9000:9000 + - 9001:9001 + environment: + MINIO_ROOT_USER: minio + MINIO_ROOT_PASSWORD: redpandaTieredStorage7 + MINIO_SERVER_URL: "http://minio:9000" + MINIO_REGION_NAME: local + MINIO_DOMAIN: minio + volumes: + - minio:/data + networks: + redpanda_network: + aliases: + - redpanda.minio + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/ready"] + interval: 10s + timeout: 5s + retries: 3 + mc: + depends_on: + minio: + condition: service_healthy + image: minio/mc:RELEASE.2025-05-21T01-59-54Z + container_name: mc + networks: + - redpanda_network + environment: + - AWS_ACCESS_KEY_ID=minio + - AWS_SECRET_ACCESS_KEY=redpandaTieredStorage7 + - AWS_REGION=local + entrypoint: > + /bin/sh -c " + until (/usr/bin/mc alias set minio http://minio:9000 minio redpandaTieredStorage7) do echo '...waiting...' && sleep 1; done; + /usr/bin/mc mb minio/redpanda; + /usr/bin/mc policy set public minio/redpanda; + tail -f /dev/null + " diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/generate-profiles.yaml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/generate-profiles.yaml new file mode 100644 index 0000000..baf9a6f --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/generate-profiles.yaml @@ -0,0 +1,77 @@ +input: + # Use the 'generate' input + # https://docs.redpanda.com/redpanda-connect/components/inputs/generate/ + generate: + # The interval at which new records are generated. + interval: 1s + # The mapping section defines how each generated record is structured. + # The language used here is called Bloblang. + # https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/ + mapping: | + # Generate a fake first name using the 'first_name' faker function. + let first_name = fake("first_name") + + # Generate a fake last name using the 'last_name' faker function. + let last_name = fake("last_name") + + # Define possible subscription levels for users. + let subscription_levels = ["Free", "Basic", "Premium"] + + # Define possible notification channels for user preferences. + let notifications = ["email", "sms", "push" ] + + # Define supported languages for user preferences. + let languages = ["en", "es", "fr", "de", "zh", "jp"] + + # Assign a unique user ID using a UUID digit generator. + root.user_id = fake("uuid_digit") + + # Assign the generated first name to the 'first_name' field. + root.first_name = $first_name + + # Assign the generated last name to the 'last_name' field. + root.last_name = $last_name + + # Construct the user's email by combining the first initial, last name, and a fake domain name. + # The email is converted to lowercase for consistency. + root.email = ($first_name.slice(0,1) + $last_name + "@" + fake("domain_name")).lowercase() + + # Assign a fake registration date using the 'date' faker function. + root.registration_date = fake("date") + + # Assign the current timestamp as the last login time. + root.last_login = now() + + # Randomly assign a subscription level by selecting an index from the 'subscription_levels' array. + root.subscription_level = $subscription_levels.index(random_int(min: 0, max: 2)) + + # Randomly assign a language preference by selecting an index from the 'languages' array. + root.preferences.language = $languages.index(random_int(min: 0, max: 5)) + + # Randomly assign a notification preference by selecting an index from the 'notifications' array. + root.preferences.notifications = $notifications.index(random_int(min: 0, max: 2)) +pipeline: + processors: + - mapping: | + # Set the target topic for the generated records to 'profiles'. + meta topic = "profiles" + + # Assign the entire record (root) to be sent to the specified topic. + root = this +output: + # Use the 'kafka_franz' output to send the result back to Redpanda + # https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/ + kafka_franz: + # Define the list of seed brokers for the Kafka cluster. + seed_brokers: [ "localhost:19092", "localhost:29092", "localhost:39092"] + # Dynamically assign the topic based on the metadata specified in the processors. + # In this case, it resolves to the 'profiles' topic. + topic: ${! metadata("topic") } + # Configure SASL authentication to securely connect to the Kafka brokers. + sasl: + - # Specify the SASL mechanism to use for authentication. + mechanism: SCRAM-SHA-256 + # The password for the SASL authentication. + password: secretpassword + # The username for the SASL authentication. + username: superuser diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/bootstrap.yml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/bootstrap.yml new file mode 100644 index 0000000..00f3003 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/bootstrap.yml @@ -0,0 +1,56 @@ +# ================================================================= +# This file defines initial cluster properties for a Redpanda cluster. +# Some of these settings are intended for quickstart development and evaluation +# and are not suitable for production environments. +# +# For more information on bootstrap files, see: +# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#configure-a-bootstrap-file +# ================================================================= + +# +# Enable SASL authentication for the Kafka and Admin APIs. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#admin_api_require_auth +admin_api_require_auth: true +# At least one superuser is required to be able to create other SASL users +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#superusers +superusers: + - superuser +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_sasl +enable_sasl: true +# Allow topics to be created on first access. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#auto_create_topics_enabled +auto_create_topics_enabled: true +# Enable data transforms. +# https://docs.redpanda.com/current/develop/data-transforms/how-transforms-work/ +data_transforms_enabled: true +# Enable audit logging (enterprise feature). +# https://docs.redpanda.com/current/manage/audit-logging/ +audit_enabled: true +# Enable Tiered Storage (enterprise feature). +# https://docs.redpanda.com/current/manage/tiered-storage/ +cloud_storage_enabled: true +cloud_storage_region: local +cloud_storage_access_key: minio +cloud_storage_secret_key: redpandaTieredStorage7 +cloud_storage_api_endpoint: minio +cloud_storage_api_endpoint_port: 9000 +cloud_storage_disable_tls: true +cloud_storage_bucket: redpanda +# Forces segments to be uploaded to Tiered Storage faster for the purposes of the quickstart +# https://docs.redpanda.com/current/reference/properties/object-storage-properties/#cloud_storage_segment_max_upload_interval_sec +cloud_storage_segment_max_upload_interval_sec: 60 +# Continuous Data Balancing (enterprise feature) continuously monitors your node and rack availability and disk usage. This enables self-healing clusters that dynamically balance partitions, ensuring smooth operations and optimal cluster performance. +# https://docs.redpanda.com/current/manage/cluster-maintenance/continuous-data-balancing/ +partition_autobalancing_mode: continuous +# Enable Redpanda to collect consumer group metrics. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_consumer_group_metrics +enable_consumer_group_metrics: + - "group" + - "partition" + - "consumer_lag" +# Lower the interval for the quickstart +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#consumer_group_lag_collection_interval_sec +consumer_group_lag_collection_interval_sec: 60 +# Enable Redpanda to collect host metrics. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_host_metrics +enable_host_metrics: true \ No newline at end of file diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/docker-compose.yml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/docker-compose.yml new file mode 100644 index 0000000..9b636cb --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/docker-compose.yml @@ -0,0 +1,403 @@ +name: redpanda-quickstart-multi-broker +networks: + redpanda_network: + driver: bridge +volumes: + redpanda-0: null + redpanda-1: null + redpanda-2: null + minio: null +services: + ################## + # Redpanda Brokers # + ################## + redpanda-0: + command: + - redpanda + - start + - --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:19092 + # Address the broker advertises to clients that connect to the Kafka API. + # Use the internal addresses to connect to the Redpanda brokers + # from inside the same Docker network. + # Use the external addresses to connect to the Redpanda brokers + # from outside the Docker network. + - --advertise-kafka-addr internal://redpanda-0:9092,external://localhost:19092 + - --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:18082 + # Address the broker advertises to clients that connect to the HTTP Proxy. + - --advertise-pandaproxy-addr internal://redpanda-0:8082,external://localhost:18082 + - --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:18081 + # Redpanda brokers use the RPC API to communicate with each other internally. + - --rpc-addr redpanda-0:33145 + - --advertise-rpc-addr redpanda-0:33145 + # Mode dev-container uses well-known configuration properties for development in containers. + - --mode dev-container + # Tells Seastar (the framework Redpanda uses under the hood) to use 1 core on the system. + - --smp 1 + - --default-log-level=info + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + container_name: redpanda-0 + # Sets the username and password of the bootstrap SCRAM superuser + # See https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#bootstrap-a-user-account + environment: + RP_BOOTSTRAP_USER: "superuser:secretpassword" + volumes: + - redpanda-0:/var/lib/redpanda/data + - ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml + networks: + - redpanda_network + ports: + - 18081:18081 + - 18082:18082 + - 19092:19092 + - 19644:9644 + healthcheck: + test: ["CMD", "rpk", "cluster", "info", "-X", "user=superuser", "-X", "pass=secretpassword"] + interval: 10s + timeout: 15s + retries: 10 + depends_on: + minio: + condition: service_healthy + redpanda-1: + command: + - redpanda + - start + - --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:29092 + - --advertise-kafka-addr internal://redpanda-1:9092,external://localhost:29092 + - --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:28082 + - --advertise-pandaproxy-addr internal://redpanda-1:8082,external://localhost:28082 + - --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:28081 + - --rpc-addr redpanda-1:33145 + - --advertise-rpc-addr redpanda-1:33145 + - --mode dev-container + - --smp 1 + - --default-log-level=info + - --seeds redpanda-0:33145 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + container_name: redpanda-1 + environment: + RP_BOOTSTRAP_USER: "superuser:secretpassword" + volumes: + - redpanda-1:/var/lib/redpanda/data + - ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml + networks: + - redpanda_network + ports: + - 28081:28081 + - 28082:28082 + - 29092:29092 + - 29644:9644 + depends_on: + - redpanda-0 + - minio + redpanda-2: + command: + - redpanda + - start + - --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:39092 + - --advertise-kafka-addr internal://redpanda-2:9092,external://localhost:39092 + - --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:38082 + - --advertise-pandaproxy-addr internal://redpanda-2:8082,external://localhost:38082 + - --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:38081 + - --rpc-addr redpanda-2:33145 + - --advertise-rpc-addr redpanda-2:33145 + - --mode dev-container + - --smp 1 + - --default-log-level=info + - --seeds redpanda-0:33145 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + container_name: redpanda-2 + environment: + RP_BOOTSTRAP_USER: "superuser:secretpassword" + volumes: + - redpanda-2:/var/lib/redpanda/data + - ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml + networks: + - redpanda_network + ports: + - 38081:38081 + - 38082:38082 + - 39092:39092 + - 39644:9644 + depends_on: + - redpanda-0 + - minio + #################### + # Redpanda Console # + #################### + console: + container_name: redpanda-console + image: docker.redpanda.com/redpandadata/console:v3.2.2 + networks: + - redpanda_network + entrypoint: /bin/sh + command: -c 'echo "$$CONSOLE_CONFIG_FILE" > /tmp/config.yml && /app/console' + volumes: + - ./config:/tmp/config/ + environment: + CONFIG_FILEPATH: ${CONFIG_FILEPATH:-/tmp/config.yml} + CONSOLE_CONFIG_FILE: | + # Configure a connection to the Redpanda cluster + # See https://docs.redpanda.com/current/console/config/connect-to-redpanda/ + kafka: + brokers: ["redpanda-0:9092"] + sasl: + enabled: true + impersonateUser: true + schemaRegistry: + enabled: true + urls: ["http://redpanda-0:8081","http://redpanda-1:8081","http://redpanda-2:8081"] + authentication: + impersonateUser: true + redpanda: + adminApi: + enabled: true + urls: ["http://redpanda-0:9644","http://redpanda-1:9644","http://redpanda-2:9644"] + authentication: + basic: + username: superuser + password: secretpassword + impersonateUser: false + console: + # Configures Redpanda Console to fetch topic documentation from GitHub and display it in the UI. + # See https://docs.redpanda.com/current/console/config/topic-documentation/ + topicDocumentation: + enabled: true + git: + enabled: true + repository: + url: https://github.com/redpanda-data/docs + branch: main + baseDirectory: tests/docker-compose + authentication: + jwtSigningKey: vazxnT+ZHtxKslK6QlDGovcYnSjTk/lKMmZ+mHrBVE+YdVDkLgSuP6AszAKe9Gvq + basic: + enabled: true + authorization: + roleBindings: + - roleName: admin + users: + - loginType: basic + name: superuser + ports: + - 8080:8080 + depends_on: + redpanda-0: + condition: service_healthy + createtopic: + condition: service_completed_successfully + registerschema: + condition: service_completed_successfully + deploytransform: + condition: service_completed_successfully + #################### + # Redpanda Connect # + #################### + connect: + container_name: redpanda-connect + image: docker.redpanda.com/redpandadata/connect + networks: + - redpanda_network + entrypoint: /bin/sh + depends_on: + redpanda-0: + condition: service_healthy + command: -c 'echo "$$CONNECT_CFG_FILE" > /tmp/connect.yml; /redpanda-connect -c /tmp/connect.yml' + environment: + # This Redpanda Connect configuration creates fake data, + # processes it, and writes the output to a set of topics. + # + # Input: + # - Uses Redpanda Connect's generate input to generate fake data. + # See https://docs.redpanda.com/redpanda-connect/components/inputs/generate/ + # Pipeline: + # - Bloblang mapping to batch each input and map 1 message to 'logins' + # topic, and a random number (1-3) of messages to 'transaction' topic + # - Unarchive processor to parse the JSON array and extract each + # element into its own message. + # See https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/ + # Output: + # - kafka_franz output to write the messages to the Redpanda brokers. + # See https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/ + CONNECT_CFG_FILE: | + input: + generate: + interval: 1s + mapping: | + let first_name = fake("first_name") + let last_name = fake("last_name") + + root.user_id = counter() + root.name = $$first_name + " " + $$last_name + root.email = ($$first_name.slice(0,1) + $$last_name + "@" + fake("domain_name")).lowercase() + root.ip = fake("ipv4") + root.login_time = now() + pipeline: + processors: + - mapping: | + root = range(0, random_int(min:2, max:4)).map_each(cust -> this) + - unarchive: + format: "json_array" + - mapping: | + if batch_index() == 0 { + meta topic = "logins" + root = this + } else { + meta topic = "transactions" + root.user_id = this.user_id + root.email = this.email + root.index = batch_index() - 1 + root.product_url = fake("url") + root.price = fake("amount_with_currency") + root.timestamp = now() + } + output: + kafka_franz: + seed_brokers: [ "redpanda-0:9092" ] + topic: $${! metadata("topic") } + sasl: + - mechanism: SCRAM-SHA-256 + password: secretpassword + username: superuser + #################### + # rpk container to create the edu-filtered-domains topic # + # See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-create/ + #################### + createtopic: + command: + - topic + - create + - edu-filtered-domains + - -X user=superuser + - -X pass=secretpassword + - -X brokers=redpanda-0:9092 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + networks: + - redpanda_network + depends_on: + redpanda-0: + condition: service_healthy + #################### + # rpk container to register the schema # + # See https://docs.redpanda.com/current/manage/schema-reg/schema-reg-api/ + #################### + registerschema: + command: + - registry + - schema + - create + - transactions + - --schema + - /etc/redpanda/transactions-schema.json + - -X user=superuser + - -X pass=secretpassword + - -X registry.hosts=redpanda-0:8081 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + # Mount the local directory that contains your schema to the container. + volumes: + - ./transactions-schema.json:/etc/redpanda/transactions-schema.json + networks: + - redpanda_network + depends_on: + redpanda-0: + condition: service_healthy + #################### + # rpk container to deploy a consumer group # + # See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-consume/ + #################### + consumergroup: + command: + - topic + - consume + - transactions + - --group + - transactions-consumer + - -X user=superuser + - -X pass=secretpassword + - -X brokers=redpanda-0:9092 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + networks: + - redpanda_network + depends_on: + createtopic: + condition: service_completed_successfully + deploytransform: + condition: service_completed_successfully + #################### + # rpk container to deploy the pre-built data transform # + # See https://docs.redpanda.com/current/develop/data-transforms/deploy/ + #################### + deploytransform: + command: + - transform + - deploy + - --file=/etc/redpanda/regex.wasm + - --name=regex + - --input-topic=logins + - --output-topic=edu-filtered-domains + - --var=PATTERN="[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.edu" + - --var=MATCH_VALUE=true + - -X user=superuser + - -X pass=secretpassword + - -X admin.hosts=redpanda-0:9644 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + volumes: + - ./transform/regex.wasm:/etc/redpanda/regex.wasm + networks: + - redpanda_network + depends_on: + createtopic: + condition: service_completed_successfully + #################### + # MinIO for Tiered Storage # + # See https://min.io/ + # + # NOTE: MinIO is included in this quickstart for development and evaluation purposes only. + # It is not supported for production deployments of Redpanda. + # + # For production environments, use one of the supported object storage providers: + # https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/requirements/#object-storage-providers-for-tiered-storage + #################### + minio: + container_name: minio + image: minio/minio:RELEASE.2025-05-24T17-08-30Z + command: server --console-address ":9001" /data + ports: + - 9000:9000 + - 9001:9001 + environment: + MINIO_ROOT_USER: minio + MINIO_ROOT_PASSWORD: redpandaTieredStorage7 + MINIO_SERVER_URL: "http://minio:9000" + MINIO_REGION_NAME: local + MINIO_DOMAIN: minio + volumes: + - minio:/data + networks: + redpanda_network: + aliases: + - redpanda.minio + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/ready"] + interval: 10s + timeout: 5s + retries: 3 + mc: + depends_on: + minio: + condition: service_healthy + image: minio/mc:RELEASE.2025-05-21T01-59-54Z + container_name: mc + networks: + - redpanda_network + environment: + - AWS_ACCESS_KEY_ID=minio + - AWS_SECRET_ACCESS_KEY=redpandaTieredStorage7 + - AWS_REGION=local + entrypoint: > + /bin/sh -c " + until (/usr/bin/mc alias set minio http://minio:9000 minio redpandaTieredStorage7) do echo '...waiting...' && sleep 1; done; + /usr/bin/mc mb minio/redpanda; + /usr/bin/mc policy set public minio/redpanda; + tail -f /dev/null + " diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/generate-profiles.yaml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/generate-profiles.yaml new file mode 100644 index 0000000..baf9a6f --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/generate-profiles.yaml @@ -0,0 +1,77 @@ +input: + # Use the 'generate' input + # https://docs.redpanda.com/redpanda-connect/components/inputs/generate/ + generate: + # The interval at which new records are generated. + interval: 1s + # The mapping section defines how each generated record is structured. + # The language used here is called Bloblang. + # https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/ + mapping: | + # Generate a fake first name using the 'first_name' faker function. + let first_name = fake("first_name") + + # Generate a fake last name using the 'last_name' faker function. + let last_name = fake("last_name") + + # Define possible subscription levels for users. + let subscription_levels = ["Free", "Basic", "Premium"] + + # Define possible notification channels for user preferences. + let notifications = ["email", "sms", "push" ] + + # Define supported languages for user preferences. + let languages = ["en", "es", "fr", "de", "zh", "jp"] + + # Assign a unique user ID using a UUID digit generator. + root.user_id = fake("uuid_digit") + + # Assign the generated first name to the 'first_name' field. + root.first_name = $first_name + + # Assign the generated last name to the 'last_name' field. + root.last_name = $last_name + + # Construct the user's email by combining the first initial, last name, and a fake domain name. + # The email is converted to lowercase for consistency. + root.email = ($first_name.slice(0,1) + $last_name + "@" + fake("domain_name")).lowercase() + + # Assign a fake registration date using the 'date' faker function. + root.registration_date = fake("date") + + # Assign the current timestamp as the last login time. + root.last_login = now() + + # Randomly assign a subscription level by selecting an index from the 'subscription_levels' array. + root.subscription_level = $subscription_levels.index(random_int(min: 0, max: 2)) + + # Randomly assign a language preference by selecting an index from the 'languages' array. + root.preferences.language = $languages.index(random_int(min: 0, max: 5)) + + # Randomly assign a notification preference by selecting an index from the 'notifications' array. + root.preferences.notifications = $notifications.index(random_int(min: 0, max: 2)) +pipeline: + processors: + - mapping: | + # Set the target topic for the generated records to 'profiles'. + meta topic = "profiles" + + # Assign the entire record (root) to be sent to the specified topic. + root = this +output: + # Use the 'kafka_franz' output to send the result back to Redpanda + # https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/ + kafka_franz: + # Define the list of seed brokers for the Kafka cluster. + seed_brokers: [ "localhost:19092", "localhost:29092", "localhost:39092"] + # Dynamically assign the topic based on the metadata specified in the processors. + # In this case, it resolves to the 'profiles' topic. + topic: ${! metadata("topic") } + # Configure SASL authentication to securely connect to the Kafka brokers. + sasl: + - # Specify the SASL mechanism to use for authentication. + mechanism: SCRAM-SHA-256 + # The password for the SASL authentication. + password: secretpassword + # The username for the SASL authentication. + username: superuser diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/bootstrap.yml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/bootstrap.yml new file mode 100644 index 0000000..00f3003 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/bootstrap.yml @@ -0,0 +1,56 @@ +# ================================================================= +# This file defines initial cluster properties for a Redpanda cluster. +# Some of these settings are intended for quickstart development and evaluation +# and are not suitable for production environments. +# +# For more information on bootstrap files, see: +# https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#configure-a-bootstrap-file +# ================================================================= + +# +# Enable SASL authentication for the Kafka and Admin APIs. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#admin_api_require_auth +admin_api_require_auth: true +# At least one superuser is required to be able to create other SASL users +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#superusers +superusers: + - superuser +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_sasl +enable_sasl: true +# Allow topics to be created on first access. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#auto_create_topics_enabled +auto_create_topics_enabled: true +# Enable data transforms. +# https://docs.redpanda.com/current/develop/data-transforms/how-transforms-work/ +data_transforms_enabled: true +# Enable audit logging (enterprise feature). +# https://docs.redpanda.com/current/manage/audit-logging/ +audit_enabled: true +# Enable Tiered Storage (enterprise feature). +# https://docs.redpanda.com/current/manage/tiered-storage/ +cloud_storage_enabled: true +cloud_storage_region: local +cloud_storage_access_key: minio +cloud_storage_secret_key: redpandaTieredStorage7 +cloud_storage_api_endpoint: minio +cloud_storage_api_endpoint_port: 9000 +cloud_storage_disable_tls: true +cloud_storage_bucket: redpanda +# Forces segments to be uploaded to Tiered Storage faster for the purposes of the quickstart +# https://docs.redpanda.com/current/reference/properties/object-storage-properties/#cloud_storage_segment_max_upload_interval_sec +cloud_storage_segment_max_upload_interval_sec: 60 +# Continuous Data Balancing (enterprise feature) continuously monitors your node and rack availability and disk usage. This enables self-healing clusters that dynamically balance partitions, ensuring smooth operations and optimal cluster performance. +# https://docs.redpanda.com/current/manage/cluster-maintenance/continuous-data-balancing/ +partition_autobalancing_mode: continuous +# Enable Redpanda to collect consumer group metrics. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_consumer_group_metrics +enable_consumer_group_metrics: + - "group" + - "partition" + - "consumer_lag" +# Lower the interval for the quickstart +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#consumer_group_lag_collection_interval_sec +consumer_group_lag_collection_interval_sec: 60 +# Enable Redpanda to collect host metrics. +# https://docs.redpanda.com/current/reference/properties/cluster-properties/#enable_host_metrics +enable_host_metrics: true \ No newline at end of file diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/docker-compose.yml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/docker-compose.yml new file mode 100644 index 0000000..9b636cb --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/docker-compose.yml @@ -0,0 +1,403 @@ +name: redpanda-quickstart-multi-broker +networks: + redpanda_network: + driver: bridge +volumes: + redpanda-0: null + redpanda-1: null + redpanda-2: null + minio: null +services: + ################## + # Redpanda Brokers # + ################## + redpanda-0: + command: + - redpanda + - start + - --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:19092 + # Address the broker advertises to clients that connect to the Kafka API. + # Use the internal addresses to connect to the Redpanda brokers + # from inside the same Docker network. + # Use the external addresses to connect to the Redpanda brokers + # from outside the Docker network. + - --advertise-kafka-addr internal://redpanda-0:9092,external://localhost:19092 + - --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:18082 + # Address the broker advertises to clients that connect to the HTTP Proxy. + - --advertise-pandaproxy-addr internal://redpanda-0:8082,external://localhost:18082 + - --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:18081 + # Redpanda brokers use the RPC API to communicate with each other internally. + - --rpc-addr redpanda-0:33145 + - --advertise-rpc-addr redpanda-0:33145 + # Mode dev-container uses well-known configuration properties for development in containers. + - --mode dev-container + # Tells Seastar (the framework Redpanda uses under the hood) to use 1 core on the system. + - --smp 1 + - --default-log-level=info + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + container_name: redpanda-0 + # Sets the username and password of the bootstrap SCRAM superuser + # See https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/production-deployment/#bootstrap-a-user-account + environment: + RP_BOOTSTRAP_USER: "superuser:secretpassword" + volumes: + - redpanda-0:/var/lib/redpanda/data + - ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml + networks: + - redpanda_network + ports: + - 18081:18081 + - 18082:18082 + - 19092:19092 + - 19644:9644 + healthcheck: + test: ["CMD", "rpk", "cluster", "info", "-X", "user=superuser", "-X", "pass=secretpassword"] + interval: 10s + timeout: 15s + retries: 10 + depends_on: + minio: + condition: service_healthy + redpanda-1: + command: + - redpanda + - start + - --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:29092 + - --advertise-kafka-addr internal://redpanda-1:9092,external://localhost:29092 + - --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:28082 + - --advertise-pandaproxy-addr internal://redpanda-1:8082,external://localhost:28082 + - --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:28081 + - --rpc-addr redpanda-1:33145 + - --advertise-rpc-addr redpanda-1:33145 + - --mode dev-container + - --smp 1 + - --default-log-level=info + - --seeds redpanda-0:33145 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + container_name: redpanda-1 + environment: + RP_BOOTSTRAP_USER: "superuser:secretpassword" + volumes: + - redpanda-1:/var/lib/redpanda/data + - ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml + networks: + - redpanda_network + ports: + - 28081:28081 + - 28082:28082 + - 29092:29092 + - 29644:9644 + depends_on: + - redpanda-0 + - minio + redpanda-2: + command: + - redpanda + - start + - --kafka-addr internal://0.0.0.0:9092,external://0.0.0.0:39092 + - --advertise-kafka-addr internal://redpanda-2:9092,external://localhost:39092 + - --pandaproxy-addr internal://0.0.0.0:8082,external://0.0.0.0:38082 + - --advertise-pandaproxy-addr internal://redpanda-2:8082,external://localhost:38082 + - --schema-registry-addr internal://0.0.0.0:8081,external://0.0.0.0:38081 + - --rpc-addr redpanda-2:33145 + - --advertise-rpc-addr redpanda-2:33145 + - --mode dev-container + - --smp 1 + - --default-log-level=info + - --seeds redpanda-0:33145 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + container_name: redpanda-2 + environment: + RP_BOOTSTRAP_USER: "superuser:secretpassword" + volumes: + - redpanda-2:/var/lib/redpanda/data + - ./bootstrap.yml:/etc/redpanda/.bootstrap.yaml + networks: + - redpanda_network + ports: + - 38081:38081 + - 38082:38082 + - 39092:39092 + - 39644:9644 + depends_on: + - redpanda-0 + - minio + #################### + # Redpanda Console # + #################### + console: + container_name: redpanda-console + image: docker.redpanda.com/redpandadata/console:v3.2.2 + networks: + - redpanda_network + entrypoint: /bin/sh + command: -c 'echo "$$CONSOLE_CONFIG_FILE" > /tmp/config.yml && /app/console' + volumes: + - ./config:/tmp/config/ + environment: + CONFIG_FILEPATH: ${CONFIG_FILEPATH:-/tmp/config.yml} + CONSOLE_CONFIG_FILE: | + # Configure a connection to the Redpanda cluster + # See https://docs.redpanda.com/current/console/config/connect-to-redpanda/ + kafka: + brokers: ["redpanda-0:9092"] + sasl: + enabled: true + impersonateUser: true + schemaRegistry: + enabled: true + urls: ["http://redpanda-0:8081","http://redpanda-1:8081","http://redpanda-2:8081"] + authentication: + impersonateUser: true + redpanda: + adminApi: + enabled: true + urls: ["http://redpanda-0:9644","http://redpanda-1:9644","http://redpanda-2:9644"] + authentication: + basic: + username: superuser + password: secretpassword + impersonateUser: false + console: + # Configures Redpanda Console to fetch topic documentation from GitHub and display it in the UI. + # See https://docs.redpanda.com/current/console/config/topic-documentation/ + topicDocumentation: + enabled: true + git: + enabled: true + repository: + url: https://github.com/redpanda-data/docs + branch: main + baseDirectory: tests/docker-compose + authentication: + jwtSigningKey: vazxnT+ZHtxKslK6QlDGovcYnSjTk/lKMmZ+mHrBVE+YdVDkLgSuP6AszAKe9Gvq + basic: + enabled: true + authorization: + roleBindings: + - roleName: admin + users: + - loginType: basic + name: superuser + ports: + - 8080:8080 + depends_on: + redpanda-0: + condition: service_healthy + createtopic: + condition: service_completed_successfully + registerschema: + condition: service_completed_successfully + deploytransform: + condition: service_completed_successfully + #################### + # Redpanda Connect # + #################### + connect: + container_name: redpanda-connect + image: docker.redpanda.com/redpandadata/connect + networks: + - redpanda_network + entrypoint: /bin/sh + depends_on: + redpanda-0: + condition: service_healthy + command: -c 'echo "$$CONNECT_CFG_FILE" > /tmp/connect.yml; /redpanda-connect -c /tmp/connect.yml' + environment: + # This Redpanda Connect configuration creates fake data, + # processes it, and writes the output to a set of topics. + # + # Input: + # - Uses Redpanda Connect's generate input to generate fake data. + # See https://docs.redpanda.com/redpanda-connect/components/inputs/generate/ + # Pipeline: + # - Bloblang mapping to batch each input and map 1 message to 'logins' + # topic, and a random number (1-3) of messages to 'transaction' topic + # - Unarchive processor to parse the JSON array and extract each + # element into its own message. + # See https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/ + # Output: + # - kafka_franz output to write the messages to the Redpanda brokers. + # See https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/ + CONNECT_CFG_FILE: | + input: + generate: + interval: 1s + mapping: | + let first_name = fake("first_name") + let last_name = fake("last_name") + + root.user_id = counter() + root.name = $$first_name + " " + $$last_name + root.email = ($$first_name.slice(0,1) + $$last_name + "@" + fake("domain_name")).lowercase() + root.ip = fake("ipv4") + root.login_time = now() + pipeline: + processors: + - mapping: | + root = range(0, random_int(min:2, max:4)).map_each(cust -> this) + - unarchive: + format: "json_array" + - mapping: | + if batch_index() == 0 { + meta topic = "logins" + root = this + } else { + meta topic = "transactions" + root.user_id = this.user_id + root.email = this.email + root.index = batch_index() - 1 + root.product_url = fake("url") + root.price = fake("amount_with_currency") + root.timestamp = now() + } + output: + kafka_franz: + seed_brokers: [ "redpanda-0:9092" ] + topic: $${! metadata("topic") } + sasl: + - mechanism: SCRAM-SHA-256 + password: secretpassword + username: superuser + #################### + # rpk container to create the edu-filtered-domains topic # + # See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-create/ + #################### + createtopic: + command: + - topic + - create + - edu-filtered-domains + - -X user=superuser + - -X pass=secretpassword + - -X brokers=redpanda-0:9092 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + networks: + - redpanda_network + depends_on: + redpanda-0: + condition: service_healthy + #################### + # rpk container to register the schema # + # See https://docs.redpanda.com/current/manage/schema-reg/schema-reg-api/ + #################### + registerschema: + command: + - registry + - schema + - create + - transactions + - --schema + - /etc/redpanda/transactions-schema.json + - -X user=superuser + - -X pass=secretpassword + - -X registry.hosts=redpanda-0:8081 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + # Mount the local directory that contains your schema to the container. + volumes: + - ./transactions-schema.json:/etc/redpanda/transactions-schema.json + networks: + - redpanda_network + depends_on: + redpanda-0: + condition: service_healthy + #################### + # rpk container to deploy a consumer group # + # See https://docs.redpanda.com/current/reference/rpk/rpk-topic/rpk-topic-consume/ + #################### + consumergroup: + command: + - topic + - consume + - transactions + - --group + - transactions-consumer + - -X user=superuser + - -X pass=secretpassword + - -X brokers=redpanda-0:9092 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + networks: + - redpanda_network + depends_on: + createtopic: + condition: service_completed_successfully + deploytransform: + condition: service_completed_successfully + #################### + # rpk container to deploy the pre-built data transform # + # See https://docs.redpanda.com/current/develop/data-transforms/deploy/ + #################### + deploytransform: + command: + - transform + - deploy + - --file=/etc/redpanda/regex.wasm + - --name=regex + - --input-topic=logins + - --output-topic=edu-filtered-domains + - --var=PATTERN="[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.edu" + - --var=MATCH_VALUE=true + - -X user=superuser + - -X pass=secretpassword + - -X admin.hosts=redpanda-0:9644 + image: docker.redpanda.com/redpandadata/redpanda:v25.2.7 + volumes: + - ./transform/regex.wasm:/etc/redpanda/regex.wasm + networks: + - redpanda_network + depends_on: + createtopic: + condition: service_completed_successfully + #################### + # MinIO for Tiered Storage # + # See https://min.io/ + # + # NOTE: MinIO is included in this quickstart for development and evaluation purposes only. + # It is not supported for production deployments of Redpanda. + # + # For production environments, use one of the supported object storage providers: + # https://docs.redpanda.com/current/deploy/deployment-option/self-hosted/manual/production/requirements/#object-storage-providers-for-tiered-storage + #################### + minio: + container_name: minio + image: minio/minio:RELEASE.2025-05-24T17-08-30Z + command: server --console-address ":9001" /data + ports: + - 9000:9000 + - 9001:9001 + environment: + MINIO_ROOT_USER: minio + MINIO_ROOT_PASSWORD: redpandaTieredStorage7 + MINIO_SERVER_URL: "http://minio:9000" + MINIO_REGION_NAME: local + MINIO_DOMAIN: minio + volumes: + - minio:/data + networks: + redpanda_network: + aliases: + - redpanda.minio + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/ready"] + interval: 10s + timeout: 5s + retries: 3 + mc: + depends_on: + minio: + condition: service_healthy + image: minio/mc:RELEASE.2025-05-21T01-59-54Z + container_name: mc + networks: + - redpanda_network + environment: + - AWS_ACCESS_KEY_ID=minio + - AWS_SECRET_ACCESS_KEY=redpandaTieredStorage7 + - AWS_REGION=local + entrypoint: > + /bin/sh -c " + until (/usr/bin/mc alias set minio http://minio:9000 minio redpandaTieredStorage7) do echo '...waiting...' && sleep 1; done; + /usr/bin/mc mb minio/redpanda; + /usr/bin/mc policy set public minio/redpanda; + tail -f /dev/null + " diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/generate-profiles.yaml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/generate-profiles.yaml new file mode 100644 index 0000000..baf9a6f --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/generate-profiles.yaml @@ -0,0 +1,77 @@ +input: + # Use the 'generate' input + # https://docs.redpanda.com/redpanda-connect/components/inputs/generate/ + generate: + # The interval at which new records are generated. + interval: 1s + # The mapping section defines how each generated record is structured. + # The language used here is called Bloblang. + # https://docs.redpanda.com/redpanda-connect/guides/bloblang/about/ + mapping: | + # Generate a fake first name using the 'first_name' faker function. + let first_name = fake("first_name") + + # Generate a fake last name using the 'last_name' faker function. + let last_name = fake("last_name") + + # Define possible subscription levels for users. + let subscription_levels = ["Free", "Basic", "Premium"] + + # Define possible notification channels for user preferences. + let notifications = ["email", "sms", "push" ] + + # Define supported languages for user preferences. + let languages = ["en", "es", "fr", "de", "zh", "jp"] + + # Assign a unique user ID using a UUID digit generator. + root.user_id = fake("uuid_digit") + + # Assign the generated first name to the 'first_name' field. + root.first_name = $first_name + + # Assign the generated last name to the 'last_name' field. + root.last_name = $last_name + + # Construct the user's email by combining the first initial, last name, and a fake domain name. + # The email is converted to lowercase for consistency. + root.email = ($first_name.slice(0,1) + $last_name + "@" + fake("domain_name")).lowercase() + + # Assign a fake registration date using the 'date' faker function. + root.registration_date = fake("date") + + # Assign the current timestamp as the last login time. + root.last_login = now() + + # Randomly assign a subscription level by selecting an index from the 'subscription_levels' array. + root.subscription_level = $subscription_levels.index(random_int(min: 0, max: 2)) + + # Randomly assign a language preference by selecting an index from the 'languages' array. + root.preferences.language = $languages.index(random_int(min: 0, max: 5)) + + # Randomly assign a notification preference by selecting an index from the 'notifications' array. + root.preferences.notifications = $notifications.index(random_int(min: 0, max: 2)) +pipeline: + processors: + - mapping: | + # Set the target topic for the generated records to 'profiles'. + meta topic = "profiles" + + # Assign the entire record (root) to be sent to the specified topic. + root = this +output: + # Use the 'kafka_franz' output to send the result back to Redpanda + # https://docs.redpanda.com/redpanda-connect/components/outputs/kafka_franz/ + kafka_franz: + # Define the list of seed brokers for the Kafka cluster. + seed_brokers: [ "localhost:19092", "localhost:29092", "localhost:39092"] + # Dynamically assign the topic based on the metadata specified in the processors. + # In this case, it resolves to the 'profiles' topic. + topic: ${! metadata("topic") } + # Configure SASL authentication to securely connect to the Kafka brokers. + sasl: + - # Specify the SASL mechanism to use for authentication. + mechanism: SCRAM-SHA-256 + # The password for the SASL authentication. + password: secretpassword + # The username for the SASL authentication. + username: superuser diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/rpk-profile.yaml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/rpk-profile.yaml new file mode 100644 index 0000000..174e1ec --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/rpk-profile.yaml @@ -0,0 +1,24 @@ +# This file configures `rpk` to connect to a remote Redpanda cluster running in the same local network as `rpk`. + +# Configuration for connecting to the Kafka API of the Redpanda cluster. +kafka_api: + # SASL (Simple Authentication and Security Layer) settings for authentication. + sasl: + user: superuser # The username used for authentication + password: secretpassword # The password associated with the username + mechanism: scram-sha-256 # Authentication mechanism; SCRAM-SHA-256 provides secure password-based authentication + # List of Kafka brokers in the Redpanda cluster. + # These brokers ensure high availability and fault tolerance for Kafka-based communication. + brokers: + - 127.0.0.1:19092 # Broker 1: Accessible on localhost, port 19092 + - 127.0.0.1:29092 # Broker 2: Accessible on localhost, port 29092 + - 127.0.0.1:39092 # Broker 3: Accessible on localhost, port 39092 + +# Configuration for connecting to the Redpanda Admin API. +# The Admin API allows you to perform administrative tasks such as managing configurations, monitoring, and scaling. +admin_api: + # List of Admin API endpoints for managing the cluster. + addresses: + - 127.0.0.1:19644 # Admin API for Broker 1: Accessible on localhost, port 19644 + - 127.0.0.1:29644 # Admin API for Broker 2: Accessible on localhost, port 29644 + - 127.0.0.1:39644 # Admin API for Broker 3: Accessible on localhost, port 39644 diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions-schema.json b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions-schema.json new file mode 100644 index 0000000..342c495 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions-schema.json @@ -0,0 +1,37 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Transactions", + "type": "object", + "properties": { + "email": { + "type": "string", + "format": "email", + "description": "The email address of the user involved in the transaction." + }, + "index": { + "type": "integer", + "description": "A numeric index associated with the transaction." + }, + "price": { + "type": "string", + "pattern": "^XXX \\d+\\.\\d{6}$", + "description": "A string representing the price of the product, including a currency code followed by the amount." + }, + "product_url": { + "type": "string", + "format": "uri", + "description": "A URL that points to the product involved in the transaction." + }, + "timestamp": { + "type": "string", + "format": "date-time", + "description": "The timestamp of when the transaction occurred, formatted in ISO 8601." + }, + "user_id": { + "type": "integer", + "description": "A numeric identifier for the user." + } + }, + "required": ["email", "index", "price", "product_url", "timestamp", "user_id"], + "additionalProperties": false +} diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions.md b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions.md new file mode 100644 index 0000000..e230916 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions.md @@ -0,0 +1,46 @@ +# Transactions Topic Documentation + +This document provides an overview of the `transactions` topic in the Redpanda cluster. The topic is designed to capture autogenerated transaction events with various attributes. + +## Schema Overview + +Each message in the `transactions` topic adheres to the following JSON schema: + +```json +{ + "email": "string", + "index": "integer", + "price": "string", + "product_url": "string", + "timestamp": "string", + "user_id": "integer" +} +``` + +- **email**: The email address of the user involved in the transaction. +- **index**: A numeric index associated with the transaction. This could represent the position or order of the transaction in a sequence. +- **price**: A string representing the price of the product. It includes a currency code (e.g., "XXX") followed by the amount. +- **product_url**: A URL that points to the product involved in the transaction. +- **timestamp**: The timestamp of when the transaction occurred, formatted in ISO 8601. +- **user_id**: A numeric identifier for the user. This is typically a unique ID assigned to each user in the system. + +## Example message + +```json +{ + "email": "wzieme@ykczius.edu", + "index": 0, + "price": "XXX 5651308.100000", + "product_url": "http://yjomdta.top/DxvGsCn.php", + "timestamp": "2024-08-16T15:51:19.799474084Z", + "user_id": 1 +} +``` + +## Use cases + +You can use the `transactions` topic for various purposes, including: + +- **Analytics**: Tracking and analyzing user transactions to understand buying behavior, popular products, etc. +- **Monitoring**: Observing transaction patterns to detect anomalies, such as unusual spikes or drops in transaction volume. +- **Data Processing**: Feeding transaction data into other systems, such as a data warehouse or real-time processing pipelines, for further processing and analysis. diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/README.adoc b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/README.adoc new file mode 100644 index 0000000..4485435 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/README.adoc @@ -0,0 +1,73 @@ += Modify the Wasm Transform in the Quickstart + +This directory contains the Go source code (`transform.go`) for the data transform that is used in the Redpanda Self-Managed quickstart. +If you're following the quickstart, you *do not* need to modify or rebuild this code. The Docker Compose configuration automatically deploys a pre-built transform called `regex.wasm`. + +However, if you want to customize the data transform logic, continue reading. + +== Why customize the transform? + +- **Custom filtering**: Filter by a different regex or apply multiple conditions. +- **Data manipulation**: Transform records before writing them out. For example, redacting sensitive data or combining fields. +- **Extended functionality**: Add advanced logging, error handling, or multi-topic routing. + +== Prerequisites + +You need the following: + +- At least Go 1.20 installed. ++ +[source,bash] +---- +go version +---- + +- The Redpanda CLI (`rpk`) installed. + +- A running Redpanda cluster. If you're using the local quickstart with Docker Compose, ensure the cluster is up and running. Or, point `rpk` to another Redpanda environment. + +== Modify and deploy your transform + +. Open link:transform.go[transform.go] and make your changes. For example: ++ +-- +- Change the regex logic to handle different use cases. +- Add environment variables to control new features. +- Extend the `doRegexFilter()` function to manipulate records. +-- + +. Compile your Go code into a `.wasm` file: ++ +[source,bash] +---- +rpk transform build +---- ++ +This command compiles your Go source and produces a `.wasm` file that you can deploy to Redpanda. + +. Deploy the new transform. ++ +If your Docker Compose setup already has a service to deploy the transform, you can restart that service. ++ +Otherwise, you can deploy your updated `.wasm` manually using `rpk transform deploy`. + +. Produce messages into the input topic. For example: ++ +[source,bash] +---- +echo '{"key":"alice@university.edu","value":"test message"}' | rpk topic produce logins +---- + +. Consume from the output topic. For example: ++ +[source,bash] +---- +rpk topic consume edu-filtered-domains --num 1 +---- + +== Suggested reading + +- link:https://docs.redpanda.com/current/reference/rpk/[Redpanda `rpk` CLI Reference^]. +- link:https://docs.redpanda.com/current/develop/data-transforms/build/[Develop Data Transforms^]. +- https://golang.org/ref/mod[Go Modules^] for managing dependencies and builds in Go. +- https://docs.docker.com/compose/[Docker Compose^] for customizing your environment. diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.mod b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.mod new file mode 100644 index 0000000..4bb9a33 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.mod @@ -0,0 +1,5 @@ +module regex + +go 1.20 + +require github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.sum b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.sum new file mode 100644 index 0000000..8745dd1 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.sum @@ -0,0 +1,2 @@ +github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 h1:KxgHJZsHsrT3YX7DMpu/vJN4TZN3KFm1jzrCFLyOepA= +github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0/go.mod h1:QGgiwwf/BIsD1b7EiyQ/Apzw+RLSpasRDdpOCiefQFQ= diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/regex.wasm b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/regex.wasm new file mode 100644 index 0000000..303962f Binary files /dev/null and b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/regex.wasm differ diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.go b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.go new file mode 100644 index 0000000..7bbd628 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.go @@ -0,0 +1,122 @@ +package main +// This data transform filters records based on a customizable regex pattern. +// If a record's key or value +// (determined by an environment variable) matches the specified regex, +// the record is forwarded to the output. +// Otherwise, it is dropped. +// +// Usage: +// 1. Provide the following environment variables in your Docker or configuration setup: +// - PATTERN : (required) a regular expression that determines what you want to match. +// - MATCH_VALUE : (optional) a boolean to decide whether to check the record value. If false, +// the record key is checked. Default is false. +// +// Example environment variables: +// PATTERN=".*\\.edu$" +// MATCH_VALUE="true" +// +// Logs: +// This transform logs information about each record and whether it matched. +// The logs appear in the _redpanda.transform_logs topic, so you can debug how your records are being processed. +// +// Build instructions: +// go mod tidy +// rpk transform build +// +// For more details on building transforms with the Redpanda SDK, see: +// https://docs.redpanda.com/current/develop/data-transforms +// + +import ( + "log" + "os" + "regexp" + "strings" + + "github.com/redpanda-data/redpanda/src/transform-sdk/go/transform" +) + +var ( + re *regexp.Regexp + checkValue bool +) + +func isTrueVar(v string) bool { + switch strings.ToLower(v) { + case "yes", "ok", "1", "true": + return true + default: + return false + } +} + +// The main() function runs only once at startup. It performs all initialization steps: +// - Reads and compiles the regex pattern. +// - Determines whether to match on the key or value. +// - Registers the doRegexFilter() function to process records. +func main() { + // Set logging preferences, including timestamp and UTC time. + log.SetPrefix("[regex-transform] ") + log.SetFlags(log.Ldate | log.Ltime | log.LUTC | log.Lmicroseconds) + + // Start logging the transformation process + log.Println("Starting transform...") + + // Read the PATTERN environment variable to get the regex pattern. + pattern, ok := os.LookupEnv("PATTERN") + if !ok { + log.Fatal("Missing PATTERN environment variable") + } + // Log the regex pattern being used. + log.Printf("Using PATTERN: %q\n", pattern) + // Compile the regex pattern for later use. + re = regexp.MustCompile(pattern) + + // Read the MATCH_VALUE environment variable to determine whether to check the record's value. + mk, ok := os.LookupEnv("MATCH_VALUE") + checkValue = ok && isTrueVar(mk) + log.Printf("MATCH_VALUE set to: %t\n", checkValue) + + log.Println("Initialization complete, waiting for records...") + + // Listen for records to be written, calling doRegexFilter() for each record. + transform.OnRecordWritten(doRegexFilter) +} + +// The doRegexFilter() function executes each time a new record is written. +// It checks whether the record's key or value (based on MATCH_VALUE) matches the compiled regex. +// If it matches, the record is forwarded, if not, it's dropped. +func doRegexFilter(e transform.WriteEvent, w transform.RecordWriter) error { + // This stores the data to be checked (either the key or value). + var dataToCheck []byte + + // Depending on the MATCH_VALUE environment variable, decide whether to check the record's key or value. + if checkValue { + // Use the value of the record if MATCH_VALUE is true. + dataToCheck = e.Record().Value + log.Printf("Checking record value: %s\n", string(dataToCheck)) + } else { + // Use the key of the record if MATCH_VALUE is false. + dataToCheck = e.Record().Key + log.Printf("Checking record key: %s\n", string(dataToCheck)) + } + + // If there is no key or value to check, log and skip the record. + if dataToCheck == nil { + log.Println("Record has no key/value to check, skipping.") + return nil + } + + // Check if the data matches the regex pattern. + pass := re.Match(dataToCheck) + if pass { + // If the record matches the pattern, log and write the record to the output topic. + log.Printf("Record matched pattern, passing through. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value)) + return w.Write(e.Record()) + } else { + // If the record does not match the pattern, log and drop the record. + log.Printf("Record did not match pattern, dropping. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value)) + // Do not write the record if it doesn't match the pattern. + return nil + } +} \ No newline at end of file diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.yaml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.yaml new file mode 100644 index 0000000..d415530 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.yaml @@ -0,0 +1,33 @@ +# Transform metadata used by the rpk transform build command. +# This metadata file tells rpk: +# 1) The transform’s display name, which also becomes the base for the .wasm file name. +# 2) A brief description of what it does. +# 3) Defaults for environment variables. +# 4) Input and output topics (if you want to define them here rather than in the deploy command). + +# Human-readable name of the transform. rpk transform build uses this for the generated .wasm file. +name: regex + +description: | + Filters the input topic to records that only match a regular expression. + + Regular expressions are implemented using Go's regexp library, which uses the syntax of RE2. + See the RE2 wiki for allowed syntax: https://github.com/google/re2/wiki/Syntax + + Environment variables: + - PATTERN: The regular expression that will match against records (required). + - MATCH_VALUE: By default, the regex matches keys, but if set to "true", the regex matches values. + +# By default, no input topic is set here. (You can set it in your deploy command if preferred.) +input-topic: "" + +# By default, no output topic is set here. (You can set it in your deploy command if preferred.) +output-topic: "" + +# Indicates the specific TinyGo environment used to compile your transform. +language: tinygo-no-goroutines + +env: + # The PATTERN variable must be provided at deploy time. + # Example: --var=PATTERN=".*@example.com" + PATTERN: '' diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/rpk-profile.yaml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/rpk-profile.yaml new file mode 100644 index 0000000..174e1ec --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/rpk-profile.yaml @@ -0,0 +1,24 @@ +# This file configures `rpk` to connect to a remote Redpanda cluster running in the same local network as `rpk`. + +# Configuration for connecting to the Kafka API of the Redpanda cluster. +kafka_api: + # SASL (Simple Authentication and Security Layer) settings for authentication. + sasl: + user: superuser # The username used for authentication + password: secretpassword # The password associated with the username + mechanism: scram-sha-256 # Authentication mechanism; SCRAM-SHA-256 provides secure password-based authentication + # List of Kafka brokers in the Redpanda cluster. + # These brokers ensure high availability and fault tolerance for Kafka-based communication. + brokers: + - 127.0.0.1:19092 # Broker 1: Accessible on localhost, port 19092 + - 127.0.0.1:29092 # Broker 2: Accessible on localhost, port 29092 + - 127.0.0.1:39092 # Broker 3: Accessible on localhost, port 39092 + +# Configuration for connecting to the Redpanda Admin API. +# The Admin API allows you to perform administrative tasks such as managing configurations, monitoring, and scaling. +admin_api: + # List of Admin API endpoints for managing the cluster. + addresses: + - 127.0.0.1:19644 # Admin API for Broker 1: Accessible on localhost, port 19644 + - 127.0.0.1:29644 # Admin API for Broker 2: Accessible on localhost, port 29644 + - 127.0.0.1:39644 # Admin API for Broker 3: Accessible on localhost, port 39644 diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions-schema.json b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions-schema.json new file mode 100644 index 0000000..342c495 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions-schema.json @@ -0,0 +1,37 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Transactions", + "type": "object", + "properties": { + "email": { + "type": "string", + "format": "email", + "description": "The email address of the user involved in the transaction." + }, + "index": { + "type": "integer", + "description": "A numeric index associated with the transaction." + }, + "price": { + "type": "string", + "pattern": "^XXX \\d+\\.\\d{6}$", + "description": "A string representing the price of the product, including a currency code followed by the amount." + }, + "product_url": { + "type": "string", + "format": "uri", + "description": "A URL that points to the product involved in the transaction." + }, + "timestamp": { + "type": "string", + "format": "date-time", + "description": "The timestamp of when the transaction occurred, formatted in ISO 8601." + }, + "user_id": { + "type": "integer", + "description": "A numeric identifier for the user." + } + }, + "required": ["email", "index", "price", "product_url", "timestamp", "user_id"], + "additionalProperties": false +} diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions.md b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions.md new file mode 100644 index 0000000..e230916 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions.md @@ -0,0 +1,46 @@ +# Transactions Topic Documentation + +This document provides an overview of the `transactions` topic in the Redpanda cluster. The topic is designed to capture autogenerated transaction events with various attributes. + +## Schema Overview + +Each message in the `transactions` topic adheres to the following JSON schema: + +```json +{ + "email": "string", + "index": "integer", + "price": "string", + "product_url": "string", + "timestamp": "string", + "user_id": "integer" +} +``` + +- **email**: The email address of the user involved in the transaction. +- **index**: A numeric index associated with the transaction. This could represent the position or order of the transaction in a sequence. +- **price**: A string representing the price of the product. It includes a currency code (e.g., "XXX") followed by the amount. +- **product_url**: A URL that points to the product involved in the transaction. +- **timestamp**: The timestamp of when the transaction occurred, formatted in ISO 8601. +- **user_id**: A numeric identifier for the user. This is typically a unique ID assigned to each user in the system. + +## Example message + +```json +{ + "email": "wzieme@ykczius.edu", + "index": 0, + "price": "XXX 5651308.100000", + "product_url": "http://yjomdta.top/DxvGsCn.php", + "timestamp": "2024-08-16T15:51:19.799474084Z", + "user_id": 1 +} +``` + +## Use cases + +You can use the `transactions` topic for various purposes, including: + +- **Analytics**: Tracking and analyzing user transactions to understand buying behavior, popular products, etc. +- **Monitoring**: Observing transaction patterns to detect anomalies, such as unusual spikes or drops in transaction volume. +- **Data Processing**: Feeding transaction data into other systems, such as a data warehouse or real-time processing pipelines, for further processing and analysis. diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/README.adoc b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/README.adoc new file mode 100644 index 0000000..4485435 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/README.adoc @@ -0,0 +1,73 @@ += Modify the Wasm Transform in the Quickstart + +This directory contains the Go source code (`transform.go`) for the data transform that is used in the Redpanda Self-Managed quickstart. +If you're following the quickstart, you *do not* need to modify or rebuild this code. The Docker Compose configuration automatically deploys a pre-built transform called `regex.wasm`. + +However, if you want to customize the data transform logic, continue reading. + +== Why customize the transform? + +- **Custom filtering**: Filter by a different regex or apply multiple conditions. +- **Data manipulation**: Transform records before writing them out. For example, redacting sensitive data or combining fields. +- **Extended functionality**: Add advanced logging, error handling, or multi-topic routing. + +== Prerequisites + +You need the following: + +- At least Go 1.20 installed. ++ +[source,bash] +---- +go version +---- + +- The Redpanda CLI (`rpk`) installed. + +- A running Redpanda cluster. If you're using the local quickstart with Docker Compose, ensure the cluster is up and running. Or, point `rpk` to another Redpanda environment. + +== Modify and deploy your transform + +. Open link:transform.go[transform.go] and make your changes. For example: ++ +-- +- Change the regex logic to handle different use cases. +- Add environment variables to control new features. +- Extend the `doRegexFilter()` function to manipulate records. +-- + +. Compile your Go code into a `.wasm` file: ++ +[source,bash] +---- +rpk transform build +---- ++ +This command compiles your Go source and produces a `.wasm` file that you can deploy to Redpanda. + +. Deploy the new transform. ++ +If your Docker Compose setup already has a service to deploy the transform, you can restart that service. ++ +Otherwise, you can deploy your updated `.wasm` manually using `rpk transform deploy`. + +. Produce messages into the input topic. For example: ++ +[source,bash] +---- +echo '{"key":"alice@university.edu","value":"test message"}' | rpk topic produce logins +---- + +. Consume from the output topic. For example: ++ +[source,bash] +---- +rpk topic consume edu-filtered-domains --num 1 +---- + +== Suggested reading + +- link:https://docs.redpanda.com/current/reference/rpk/[Redpanda `rpk` CLI Reference^]. +- link:https://docs.redpanda.com/current/develop/data-transforms/build/[Develop Data Transforms^]. +- https://golang.org/ref/mod[Go Modules^] for managing dependencies and builds in Go. +- https://docs.docker.com/compose/[Docker Compose^] for customizing your environment. diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.mod b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.mod new file mode 100644 index 0000000..4bb9a33 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.mod @@ -0,0 +1,5 @@ +module regex + +go 1.20 + +require github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.sum b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.sum new file mode 100644 index 0000000..8745dd1 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.sum @@ -0,0 +1,2 @@ +github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 h1:KxgHJZsHsrT3YX7DMpu/vJN4TZN3KFm1jzrCFLyOepA= +github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0/go.mod h1:QGgiwwf/BIsD1b7EiyQ/Apzw+RLSpasRDdpOCiefQFQ= diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/regex.wasm b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/regex.wasm new file mode 100644 index 0000000..303962f Binary files /dev/null and b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/regex.wasm differ diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.go b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.go new file mode 100644 index 0000000..7bbd628 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.go @@ -0,0 +1,122 @@ +package main +// This data transform filters records based on a customizable regex pattern. +// If a record's key or value +// (determined by an environment variable) matches the specified regex, +// the record is forwarded to the output. +// Otherwise, it is dropped. +// +// Usage: +// 1. Provide the following environment variables in your Docker or configuration setup: +// - PATTERN : (required) a regular expression that determines what you want to match. +// - MATCH_VALUE : (optional) a boolean to decide whether to check the record value. If false, +// the record key is checked. Default is false. +// +// Example environment variables: +// PATTERN=".*\\.edu$" +// MATCH_VALUE="true" +// +// Logs: +// This transform logs information about each record and whether it matched. +// The logs appear in the _redpanda.transform_logs topic, so you can debug how your records are being processed. +// +// Build instructions: +// go mod tidy +// rpk transform build +// +// For more details on building transforms with the Redpanda SDK, see: +// https://docs.redpanda.com/current/develop/data-transforms +// + +import ( + "log" + "os" + "regexp" + "strings" + + "github.com/redpanda-data/redpanda/src/transform-sdk/go/transform" +) + +var ( + re *regexp.Regexp + checkValue bool +) + +func isTrueVar(v string) bool { + switch strings.ToLower(v) { + case "yes", "ok", "1", "true": + return true + default: + return false + } +} + +// The main() function runs only once at startup. It performs all initialization steps: +// - Reads and compiles the regex pattern. +// - Determines whether to match on the key or value. +// - Registers the doRegexFilter() function to process records. +func main() { + // Set logging preferences, including timestamp and UTC time. + log.SetPrefix("[regex-transform] ") + log.SetFlags(log.Ldate | log.Ltime | log.LUTC | log.Lmicroseconds) + + // Start logging the transformation process + log.Println("Starting transform...") + + // Read the PATTERN environment variable to get the regex pattern. + pattern, ok := os.LookupEnv("PATTERN") + if !ok { + log.Fatal("Missing PATTERN environment variable") + } + // Log the regex pattern being used. + log.Printf("Using PATTERN: %q\n", pattern) + // Compile the regex pattern for later use. + re = regexp.MustCompile(pattern) + + // Read the MATCH_VALUE environment variable to determine whether to check the record's value. + mk, ok := os.LookupEnv("MATCH_VALUE") + checkValue = ok && isTrueVar(mk) + log.Printf("MATCH_VALUE set to: %t\n", checkValue) + + log.Println("Initialization complete, waiting for records...") + + // Listen for records to be written, calling doRegexFilter() for each record. + transform.OnRecordWritten(doRegexFilter) +} + +// The doRegexFilter() function executes each time a new record is written. +// It checks whether the record's key or value (based on MATCH_VALUE) matches the compiled regex. +// If it matches, the record is forwarded, if not, it's dropped. +func doRegexFilter(e transform.WriteEvent, w transform.RecordWriter) error { + // This stores the data to be checked (either the key or value). + var dataToCheck []byte + + // Depending on the MATCH_VALUE environment variable, decide whether to check the record's key or value. + if checkValue { + // Use the value of the record if MATCH_VALUE is true. + dataToCheck = e.Record().Value + log.Printf("Checking record value: %s\n", string(dataToCheck)) + } else { + // Use the key of the record if MATCH_VALUE is false. + dataToCheck = e.Record().Key + log.Printf("Checking record key: %s\n", string(dataToCheck)) + } + + // If there is no key or value to check, log and skip the record. + if dataToCheck == nil { + log.Println("Record has no key/value to check, skipping.") + return nil + } + + // Check if the data matches the regex pattern. + pass := re.Match(dataToCheck) + if pass { + // If the record matches the pattern, log and write the record to the output topic. + log.Printf("Record matched pattern, passing through. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value)) + return w.Write(e.Record()) + } else { + // If the record does not match the pattern, log and drop the record. + log.Printf("Record did not match pattern, dropping. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value)) + // Do not write the record if it doesn't match the pattern. + return nil + } +} \ No newline at end of file diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.yaml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.yaml new file mode 100644 index 0000000..d415530 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.yaml @@ -0,0 +1,33 @@ +# Transform metadata used by the rpk transform build command. +# This metadata file tells rpk: +# 1) The transform’s display name, which also becomes the base for the .wasm file name. +# 2) A brief description of what it does. +# 3) Defaults for environment variables. +# 4) Input and output topics (if you want to define them here rather than in the deploy command). + +# Human-readable name of the transform. rpk transform build uses this for the generated .wasm file. +name: regex + +description: | + Filters the input topic to records that only match a regular expression. + + Regular expressions are implemented using Go's regexp library, which uses the syntax of RE2. + See the RE2 wiki for allowed syntax: https://github.com/google/re2/wiki/Syntax + + Environment variables: + - PATTERN: The regular expression that will match against records (required). + - MATCH_VALUE: By default, the regex matches keys, but if set to "true", the regex matches values. + +# By default, no input topic is set here. (You can set it in your deploy command if preferred.) +input-topic: "" + +# By default, no output topic is set here. (You can set it in your deploy command if preferred.) +output-topic: "" + +# Indicates the specific TinyGo environment used to compile your transform. +language: tinygo-no-goroutines + +env: + # The PATTERN variable must be provided at deploy time. + # Example: --var=PATTERN=".*@example.com" + PATTERN: '' diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/rpk-profile.yaml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/rpk-profile.yaml new file mode 100644 index 0000000..174e1ec --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/rpk-profile.yaml @@ -0,0 +1,24 @@ +# This file configures `rpk` to connect to a remote Redpanda cluster running in the same local network as `rpk`. + +# Configuration for connecting to the Kafka API of the Redpanda cluster. +kafka_api: + # SASL (Simple Authentication and Security Layer) settings for authentication. + sasl: + user: superuser # The username used for authentication + password: secretpassword # The password associated with the username + mechanism: scram-sha-256 # Authentication mechanism; SCRAM-SHA-256 provides secure password-based authentication + # List of Kafka brokers in the Redpanda cluster. + # These brokers ensure high availability and fault tolerance for Kafka-based communication. + brokers: + - 127.0.0.1:19092 # Broker 1: Accessible on localhost, port 19092 + - 127.0.0.1:29092 # Broker 2: Accessible on localhost, port 29092 + - 127.0.0.1:39092 # Broker 3: Accessible on localhost, port 39092 + +# Configuration for connecting to the Redpanda Admin API. +# The Admin API allows you to perform administrative tasks such as managing configurations, monitoring, and scaling. +admin_api: + # List of Admin API endpoints for managing the cluster. + addresses: + - 127.0.0.1:19644 # Admin API for Broker 1: Accessible on localhost, port 19644 + - 127.0.0.1:29644 # Admin API for Broker 2: Accessible on localhost, port 29644 + - 127.0.0.1:39644 # Admin API for Broker 3: Accessible on localhost, port 39644 diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions-schema.json b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions-schema.json new file mode 100644 index 0000000..342c495 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions-schema.json @@ -0,0 +1,37 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Transactions", + "type": "object", + "properties": { + "email": { + "type": "string", + "format": "email", + "description": "The email address of the user involved in the transaction." + }, + "index": { + "type": "integer", + "description": "A numeric index associated with the transaction." + }, + "price": { + "type": "string", + "pattern": "^XXX \\d+\\.\\d{6}$", + "description": "A string representing the price of the product, including a currency code followed by the amount." + }, + "product_url": { + "type": "string", + "format": "uri", + "description": "A URL that points to the product involved in the transaction." + }, + "timestamp": { + "type": "string", + "format": "date-time", + "description": "The timestamp of when the transaction occurred, formatted in ISO 8601." + }, + "user_id": { + "type": "integer", + "description": "A numeric identifier for the user." + } + }, + "required": ["email", "index", "price", "product_url", "timestamp", "user_id"], + "additionalProperties": false +} diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions.md b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions.md new file mode 100644 index 0000000..e230916 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions.md @@ -0,0 +1,46 @@ +# Transactions Topic Documentation + +This document provides an overview of the `transactions` topic in the Redpanda cluster. The topic is designed to capture autogenerated transaction events with various attributes. + +## Schema Overview + +Each message in the `transactions` topic adheres to the following JSON schema: + +```json +{ + "email": "string", + "index": "integer", + "price": "string", + "product_url": "string", + "timestamp": "string", + "user_id": "integer" +} +``` + +- **email**: The email address of the user involved in the transaction. +- **index**: A numeric index associated with the transaction. This could represent the position or order of the transaction in a sequence. +- **price**: A string representing the price of the product. It includes a currency code (e.g., "XXX") followed by the amount. +- **product_url**: A URL that points to the product involved in the transaction. +- **timestamp**: The timestamp of when the transaction occurred, formatted in ISO 8601. +- **user_id**: A numeric identifier for the user. This is typically a unique ID assigned to each user in the system. + +## Example message + +```json +{ + "email": "wzieme@ykczius.edu", + "index": 0, + "price": "XXX 5651308.100000", + "product_url": "http://yjomdta.top/DxvGsCn.php", + "timestamp": "2024-08-16T15:51:19.799474084Z", + "user_id": 1 +} +``` + +## Use cases + +You can use the `transactions` topic for various purposes, including: + +- **Analytics**: Tracking and analyzing user transactions to understand buying behavior, popular products, etc. +- **Monitoring**: Observing transaction patterns to detect anomalies, such as unusual spikes or drops in transaction volume. +- **Data Processing**: Feeding transaction data into other systems, such as a data warehouse or real-time processing pipelines, for further processing and analysis. diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/README.adoc b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/README.adoc new file mode 100644 index 0000000..4485435 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/README.adoc @@ -0,0 +1,73 @@ += Modify the Wasm Transform in the Quickstart + +This directory contains the Go source code (`transform.go`) for the data transform that is used in the Redpanda Self-Managed quickstart. +If you're following the quickstart, you *do not* need to modify or rebuild this code. The Docker Compose configuration automatically deploys a pre-built transform called `regex.wasm`. + +However, if you want to customize the data transform logic, continue reading. + +== Why customize the transform? + +- **Custom filtering**: Filter by a different regex or apply multiple conditions. +- **Data manipulation**: Transform records before writing them out. For example, redacting sensitive data or combining fields. +- **Extended functionality**: Add advanced logging, error handling, or multi-topic routing. + +== Prerequisites + +You need the following: + +- At least Go 1.20 installed. ++ +[source,bash] +---- +go version +---- + +- The Redpanda CLI (`rpk`) installed. + +- A running Redpanda cluster. If you're using the local quickstart with Docker Compose, ensure the cluster is up and running. Or, point `rpk` to another Redpanda environment. + +== Modify and deploy your transform + +. Open link:transform.go[transform.go] and make your changes. For example: ++ +-- +- Change the regex logic to handle different use cases. +- Add environment variables to control new features. +- Extend the `doRegexFilter()` function to manipulate records. +-- + +. Compile your Go code into a `.wasm` file: ++ +[source,bash] +---- +rpk transform build +---- ++ +This command compiles your Go source and produces a `.wasm` file that you can deploy to Redpanda. + +. Deploy the new transform. ++ +If your Docker Compose setup already has a service to deploy the transform, you can restart that service. ++ +Otherwise, you can deploy your updated `.wasm` manually using `rpk transform deploy`. + +. Produce messages into the input topic. For example: ++ +[source,bash] +---- +echo '{"key":"alice@university.edu","value":"test message"}' | rpk topic produce logins +---- + +. Consume from the output topic. For example: ++ +[source,bash] +---- +rpk topic consume edu-filtered-domains --num 1 +---- + +== Suggested reading + +- link:https://docs.redpanda.com/current/reference/rpk/[Redpanda `rpk` CLI Reference^]. +- link:https://docs.redpanda.com/current/develop/data-transforms/build/[Develop Data Transforms^]. +- https://golang.org/ref/mod[Go Modules^] for managing dependencies and builds in Go. +- https://docs.docker.com/compose/[Docker Compose^] for customizing your environment. diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.mod b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.mod new file mode 100644 index 0000000..4bb9a33 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.mod @@ -0,0 +1,5 @@ +module regex + +go 1.20 + +require github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.sum b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.sum new file mode 100644 index 0000000..8745dd1 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.sum @@ -0,0 +1,2 @@ +github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 h1:KxgHJZsHsrT3YX7DMpu/vJN4TZN3KFm1jzrCFLyOepA= +github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0/go.mod h1:QGgiwwf/BIsD1b7EiyQ/Apzw+RLSpasRDdpOCiefQFQ= diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/regex.wasm b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/regex.wasm new file mode 100644 index 0000000..303962f Binary files /dev/null and b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/regex.wasm differ diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.go b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.go new file mode 100644 index 0000000..7bbd628 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.go @@ -0,0 +1,122 @@ +package main +// This data transform filters records based on a customizable regex pattern. +// If a record's key or value +// (determined by an environment variable) matches the specified regex, +// the record is forwarded to the output. +// Otherwise, it is dropped. +// +// Usage: +// 1. Provide the following environment variables in your Docker or configuration setup: +// - PATTERN : (required) a regular expression that determines what you want to match. +// - MATCH_VALUE : (optional) a boolean to decide whether to check the record value. If false, +// the record key is checked. Default is false. +// +// Example environment variables: +// PATTERN=".*\\.edu$" +// MATCH_VALUE="true" +// +// Logs: +// This transform logs information about each record and whether it matched. +// The logs appear in the _redpanda.transform_logs topic, so you can debug how your records are being processed. +// +// Build instructions: +// go mod tidy +// rpk transform build +// +// For more details on building transforms with the Redpanda SDK, see: +// https://docs.redpanda.com/current/develop/data-transforms +// + +import ( + "log" + "os" + "regexp" + "strings" + + "github.com/redpanda-data/redpanda/src/transform-sdk/go/transform" +) + +var ( + re *regexp.Regexp + checkValue bool +) + +func isTrueVar(v string) bool { + switch strings.ToLower(v) { + case "yes", "ok", "1", "true": + return true + default: + return false + } +} + +// The main() function runs only once at startup. It performs all initialization steps: +// - Reads and compiles the regex pattern. +// - Determines whether to match on the key or value. +// - Registers the doRegexFilter() function to process records. +func main() { + // Set logging preferences, including timestamp and UTC time. + log.SetPrefix("[regex-transform] ") + log.SetFlags(log.Ldate | log.Ltime | log.LUTC | log.Lmicroseconds) + + // Start logging the transformation process + log.Println("Starting transform...") + + // Read the PATTERN environment variable to get the regex pattern. + pattern, ok := os.LookupEnv("PATTERN") + if !ok { + log.Fatal("Missing PATTERN environment variable") + } + // Log the regex pattern being used. + log.Printf("Using PATTERN: %q\n", pattern) + // Compile the regex pattern for later use. + re = regexp.MustCompile(pattern) + + // Read the MATCH_VALUE environment variable to determine whether to check the record's value. + mk, ok := os.LookupEnv("MATCH_VALUE") + checkValue = ok && isTrueVar(mk) + log.Printf("MATCH_VALUE set to: %t\n", checkValue) + + log.Println("Initialization complete, waiting for records...") + + // Listen for records to be written, calling doRegexFilter() for each record. + transform.OnRecordWritten(doRegexFilter) +} + +// The doRegexFilter() function executes each time a new record is written. +// It checks whether the record's key or value (based on MATCH_VALUE) matches the compiled regex. +// If it matches, the record is forwarded, if not, it's dropped. +func doRegexFilter(e transform.WriteEvent, w transform.RecordWriter) error { + // This stores the data to be checked (either the key or value). + var dataToCheck []byte + + // Depending on the MATCH_VALUE environment variable, decide whether to check the record's key or value. + if checkValue { + // Use the value of the record if MATCH_VALUE is true. + dataToCheck = e.Record().Value + log.Printf("Checking record value: %s\n", string(dataToCheck)) + } else { + // Use the key of the record if MATCH_VALUE is false. + dataToCheck = e.Record().Key + log.Printf("Checking record key: %s\n", string(dataToCheck)) + } + + // If there is no key or value to check, log and skip the record. + if dataToCheck == nil { + log.Println("Record has no key/value to check, skipping.") + return nil + } + + // Check if the data matches the regex pattern. + pass := re.Match(dataToCheck) + if pass { + // If the record matches the pattern, log and write the record to the output topic. + log.Printf("Record matched pattern, passing through. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value)) + return w.Write(e.Record()) + } else { + // If the record does not match the pattern, log and drop the record. + log.Printf("Record did not match pattern, dropping. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value)) + // Do not write the record if it doesn't match the pattern. + return nil + } +} \ No newline at end of file diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.yaml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.yaml new file mode 100644 index 0000000..d415530 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.yaml @@ -0,0 +1,33 @@ +# Transform metadata used by the rpk transform build command. +# This metadata file tells rpk: +# 1) The transform’s display name, which also becomes the base for the .wasm file name. +# 2) A brief description of what it does. +# 3) Defaults for environment variables. +# 4) Input and output topics (if you want to define them here rather than in the deploy command). + +# Human-readable name of the transform. rpk transform build uses this for the generated .wasm file. +name: regex + +description: | + Filters the input topic to records that only match a regular expression. + + Regular expressions are implemented using Go's regexp library, which uses the syntax of RE2. + See the RE2 wiki for allowed syntax: https://github.com/google/re2/wiki/Syntax + + Environment variables: + - PATTERN: The regular expression that will match against records (required). + - MATCH_VALUE: By default, the regex matches keys, but if set to "true", the regex matches values. + +# By default, no input topic is set here. (You can set it in your deploy command if preferred.) +input-topic: "" + +# By default, no output topic is set here. (You can set it in your deploy command if preferred.) +output-topic: "" + +# Indicates the specific TinyGo environment used to compile your transform. +language: tinygo-no-goroutines + +env: + # The PATTERN variable must be provided at deploy time. + # Example: --var=PATTERN=".*@example.com" + PATTERN: '' diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/rpk-profile.yaml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/rpk-profile.yaml new file mode 100644 index 0000000..174e1ec --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/rpk-profile.yaml @@ -0,0 +1,24 @@ +# This file configures `rpk` to connect to a remote Redpanda cluster running in the same local network as `rpk`. + +# Configuration for connecting to the Kafka API of the Redpanda cluster. +kafka_api: + # SASL (Simple Authentication and Security Layer) settings for authentication. + sasl: + user: superuser # The username used for authentication + password: secretpassword # The password associated with the username + mechanism: scram-sha-256 # Authentication mechanism; SCRAM-SHA-256 provides secure password-based authentication + # List of Kafka brokers in the Redpanda cluster. + # These brokers ensure high availability and fault tolerance for Kafka-based communication. + brokers: + - 127.0.0.1:19092 # Broker 1: Accessible on localhost, port 19092 + - 127.0.0.1:29092 # Broker 2: Accessible on localhost, port 29092 + - 127.0.0.1:39092 # Broker 3: Accessible on localhost, port 39092 + +# Configuration for connecting to the Redpanda Admin API. +# The Admin API allows you to perform administrative tasks such as managing configurations, monitoring, and scaling. +admin_api: + # List of Admin API endpoints for managing the cluster. + addresses: + - 127.0.0.1:19644 # Admin API for Broker 1: Accessible on localhost, port 19644 + - 127.0.0.1:29644 # Admin API for Broker 2: Accessible on localhost, port 29644 + - 127.0.0.1:39644 # Admin API for Broker 3: Accessible on localhost, port 39644 diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions-schema.json b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions-schema.json new file mode 100644 index 0000000..342c495 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions-schema.json @@ -0,0 +1,37 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Transactions", + "type": "object", + "properties": { + "email": { + "type": "string", + "format": "email", + "description": "The email address of the user involved in the transaction." + }, + "index": { + "type": "integer", + "description": "A numeric index associated with the transaction." + }, + "price": { + "type": "string", + "pattern": "^XXX \\d+\\.\\d{6}$", + "description": "A string representing the price of the product, including a currency code followed by the amount." + }, + "product_url": { + "type": "string", + "format": "uri", + "description": "A URL that points to the product involved in the transaction." + }, + "timestamp": { + "type": "string", + "format": "date-time", + "description": "The timestamp of when the transaction occurred, formatted in ISO 8601." + }, + "user_id": { + "type": "integer", + "description": "A numeric identifier for the user." + } + }, + "required": ["email", "index", "price", "product_url", "timestamp", "user_id"], + "additionalProperties": false +} diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions.md b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions.md new file mode 100644 index 0000000..e230916 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions.md @@ -0,0 +1,46 @@ +# Transactions Topic Documentation + +This document provides an overview of the `transactions` topic in the Redpanda cluster. The topic is designed to capture autogenerated transaction events with various attributes. + +## Schema Overview + +Each message in the `transactions` topic adheres to the following JSON schema: + +```json +{ + "email": "string", + "index": "integer", + "price": "string", + "product_url": "string", + "timestamp": "string", + "user_id": "integer" +} +``` + +- **email**: The email address of the user involved in the transaction. +- **index**: A numeric index associated with the transaction. This could represent the position or order of the transaction in a sequence. +- **price**: A string representing the price of the product. It includes a currency code (e.g., "XXX") followed by the amount. +- **product_url**: A URL that points to the product involved in the transaction. +- **timestamp**: The timestamp of when the transaction occurred, formatted in ISO 8601. +- **user_id**: A numeric identifier for the user. This is typically a unique ID assigned to each user in the system. + +## Example message + +```json +{ + "email": "wzieme@ykczius.edu", + "index": 0, + "price": "XXX 5651308.100000", + "product_url": "http://yjomdta.top/DxvGsCn.php", + "timestamp": "2024-08-16T15:51:19.799474084Z", + "user_id": 1 +} +``` + +## Use cases + +You can use the `transactions` topic for various purposes, including: + +- **Analytics**: Tracking and analyzing user transactions to understand buying behavior, popular products, etc. +- **Monitoring**: Observing transaction patterns to detect anomalies, such as unusual spikes or drops in transaction volume. +- **Data Processing**: Feeding transaction data into other systems, such as a data warehouse or real-time processing pipelines, for further processing and analysis. diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/README.adoc b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/README.adoc new file mode 100644 index 0000000..4485435 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/README.adoc @@ -0,0 +1,73 @@ += Modify the Wasm Transform in the Quickstart + +This directory contains the Go source code (`transform.go`) for the data transform that is used in the Redpanda Self-Managed quickstart. +If you're following the quickstart, you *do not* need to modify or rebuild this code. The Docker Compose configuration automatically deploys a pre-built transform called `regex.wasm`. + +However, if you want to customize the data transform logic, continue reading. + +== Why customize the transform? + +- **Custom filtering**: Filter by a different regex or apply multiple conditions. +- **Data manipulation**: Transform records before writing them out. For example, redacting sensitive data or combining fields. +- **Extended functionality**: Add advanced logging, error handling, or multi-topic routing. + +== Prerequisites + +You need the following: + +- At least Go 1.20 installed. ++ +[source,bash] +---- +go version +---- + +- The Redpanda CLI (`rpk`) installed. + +- A running Redpanda cluster. If you're using the local quickstart with Docker Compose, ensure the cluster is up and running. Or, point `rpk` to another Redpanda environment. + +== Modify and deploy your transform + +. Open link:transform.go[transform.go] and make your changes. For example: ++ +-- +- Change the regex logic to handle different use cases. +- Add environment variables to control new features. +- Extend the `doRegexFilter()` function to manipulate records. +-- + +. Compile your Go code into a `.wasm` file: ++ +[source,bash] +---- +rpk transform build +---- ++ +This command compiles your Go source and produces a `.wasm` file that you can deploy to Redpanda. + +. Deploy the new transform. ++ +If your Docker Compose setup already has a service to deploy the transform, you can restart that service. ++ +Otherwise, you can deploy your updated `.wasm` manually using `rpk transform deploy`. + +. Produce messages into the input topic. For example: ++ +[source,bash] +---- +echo '{"key":"alice@university.edu","value":"test message"}' | rpk topic produce logins +---- + +. Consume from the output topic. For example: ++ +[source,bash] +---- +rpk topic consume edu-filtered-domains --num 1 +---- + +== Suggested reading + +- link:https://docs.redpanda.com/current/reference/rpk/[Redpanda `rpk` CLI Reference^]. +- link:https://docs.redpanda.com/current/develop/data-transforms/build/[Develop Data Transforms^]. +- https://golang.org/ref/mod[Go Modules^] for managing dependencies and builds in Go. +- https://docs.docker.com/compose/[Docker Compose^] for customizing your environment. diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.mod b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.mod new file mode 100644 index 0000000..4bb9a33 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.mod @@ -0,0 +1,5 @@ +module regex + +go 1.20 + +require github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.sum b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.sum new file mode 100644 index 0000000..8745dd1 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.sum @@ -0,0 +1,2 @@ +github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 h1:KxgHJZsHsrT3YX7DMpu/vJN4TZN3KFm1jzrCFLyOepA= +github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0/go.mod h1:QGgiwwf/BIsD1b7EiyQ/Apzw+RLSpasRDdpOCiefQFQ= diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/regex.wasm b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/regex.wasm new file mode 100644 index 0000000..303962f Binary files /dev/null and b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/regex.wasm differ diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.go b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.go new file mode 100644 index 0000000..7bbd628 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.go @@ -0,0 +1,122 @@ +package main +// This data transform filters records based on a customizable regex pattern. +// If a record's key or value +// (determined by an environment variable) matches the specified regex, +// the record is forwarded to the output. +// Otherwise, it is dropped. +// +// Usage: +// 1. Provide the following environment variables in your Docker or configuration setup: +// - PATTERN : (required) a regular expression that determines what you want to match. +// - MATCH_VALUE : (optional) a boolean to decide whether to check the record value. If false, +// the record key is checked. Default is false. +// +// Example environment variables: +// PATTERN=".*\\.edu$" +// MATCH_VALUE="true" +// +// Logs: +// This transform logs information about each record and whether it matched. +// The logs appear in the _redpanda.transform_logs topic, so you can debug how your records are being processed. +// +// Build instructions: +// go mod tidy +// rpk transform build +// +// For more details on building transforms with the Redpanda SDK, see: +// https://docs.redpanda.com/current/develop/data-transforms +// + +import ( + "log" + "os" + "regexp" + "strings" + + "github.com/redpanda-data/redpanda/src/transform-sdk/go/transform" +) + +var ( + re *regexp.Regexp + checkValue bool +) + +func isTrueVar(v string) bool { + switch strings.ToLower(v) { + case "yes", "ok", "1", "true": + return true + default: + return false + } +} + +// The main() function runs only once at startup. It performs all initialization steps: +// - Reads and compiles the regex pattern. +// - Determines whether to match on the key or value. +// - Registers the doRegexFilter() function to process records. +func main() { + // Set logging preferences, including timestamp and UTC time. + log.SetPrefix("[regex-transform] ") + log.SetFlags(log.Ldate | log.Ltime | log.LUTC | log.Lmicroseconds) + + // Start logging the transformation process + log.Println("Starting transform...") + + // Read the PATTERN environment variable to get the regex pattern. + pattern, ok := os.LookupEnv("PATTERN") + if !ok { + log.Fatal("Missing PATTERN environment variable") + } + // Log the regex pattern being used. + log.Printf("Using PATTERN: %q\n", pattern) + // Compile the regex pattern for later use. + re = regexp.MustCompile(pattern) + + // Read the MATCH_VALUE environment variable to determine whether to check the record's value. + mk, ok := os.LookupEnv("MATCH_VALUE") + checkValue = ok && isTrueVar(mk) + log.Printf("MATCH_VALUE set to: %t\n", checkValue) + + log.Println("Initialization complete, waiting for records...") + + // Listen for records to be written, calling doRegexFilter() for each record. + transform.OnRecordWritten(doRegexFilter) +} + +// The doRegexFilter() function executes each time a new record is written. +// It checks whether the record's key or value (based on MATCH_VALUE) matches the compiled regex. +// If it matches, the record is forwarded, if not, it's dropped. +func doRegexFilter(e transform.WriteEvent, w transform.RecordWriter) error { + // This stores the data to be checked (either the key or value). + var dataToCheck []byte + + // Depending on the MATCH_VALUE environment variable, decide whether to check the record's key or value. + if checkValue { + // Use the value of the record if MATCH_VALUE is true. + dataToCheck = e.Record().Value + log.Printf("Checking record value: %s\n", string(dataToCheck)) + } else { + // Use the key of the record if MATCH_VALUE is false. + dataToCheck = e.Record().Key + log.Printf("Checking record key: %s\n", string(dataToCheck)) + } + + // If there is no key or value to check, log and skip the record. + if dataToCheck == nil { + log.Println("Record has no key/value to check, skipping.") + return nil + } + + // Check if the data matches the regex pattern. + pass := re.Match(dataToCheck) + if pass { + // If the record matches the pattern, log and write the record to the output topic. + log.Printf("Record matched pattern, passing through. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value)) + return w.Write(e.Record()) + } else { + // If the record does not match the pattern, log and drop the record. + log.Printf("Record did not match pattern, dropping. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value)) + // Do not write the record if it doesn't match the pattern. + return nil + } +} \ No newline at end of file diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.yaml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.yaml new file mode 100644 index 0000000..d415530 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.yaml @@ -0,0 +1,33 @@ +# Transform metadata used by the rpk transform build command. +# This metadata file tells rpk: +# 1) The transform’s display name, which also becomes the base for the .wasm file name. +# 2) A brief description of what it does. +# 3) Defaults for environment variables. +# 4) Input and output topics (if you want to define them here rather than in the deploy command). + +# Human-readable name of the transform. rpk transform build uses this for the generated .wasm file. +name: regex + +description: | + Filters the input topic to records that only match a regular expression. + + Regular expressions are implemented using Go's regexp library, which uses the syntax of RE2. + See the RE2 wiki for allowed syntax: https://github.com/google/re2/wiki/Syntax + + Environment variables: + - PATTERN: The regular expression that will match against records (required). + - MATCH_VALUE: By default, the regex matches keys, but if set to "true", the regex matches values. + +# By default, no input topic is set here. (You can set it in your deploy command if preferred.) +input-topic: "" + +# By default, no output topic is set here. (You can set it in your deploy command if preferred.) +output-topic: "" + +# Indicates the specific TinyGo environment used to compile your transform. +language: tinygo-no-goroutines + +env: + # The PATTERN variable must be provided at deploy time. + # Example: --var=PATTERN=".*@example.com" + PATTERN: '' diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/rpk-profile.yaml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/rpk-profile.yaml new file mode 100644 index 0000000..174e1ec --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/rpk-profile.yaml @@ -0,0 +1,24 @@ +# This file configures `rpk` to connect to a remote Redpanda cluster running in the same local network as `rpk`. + +# Configuration for connecting to the Kafka API of the Redpanda cluster. +kafka_api: + # SASL (Simple Authentication and Security Layer) settings for authentication. + sasl: + user: superuser # The username used for authentication + password: secretpassword # The password associated with the username + mechanism: scram-sha-256 # Authentication mechanism; SCRAM-SHA-256 provides secure password-based authentication + # List of Kafka brokers in the Redpanda cluster. + # These brokers ensure high availability and fault tolerance for Kafka-based communication. + brokers: + - 127.0.0.1:19092 # Broker 1: Accessible on localhost, port 19092 + - 127.0.0.1:29092 # Broker 2: Accessible on localhost, port 29092 + - 127.0.0.1:39092 # Broker 3: Accessible on localhost, port 39092 + +# Configuration for connecting to the Redpanda Admin API. +# The Admin API allows you to perform administrative tasks such as managing configurations, monitoring, and scaling. +admin_api: + # List of Admin API endpoints for managing the cluster. + addresses: + - 127.0.0.1:19644 # Admin API for Broker 1: Accessible on localhost, port 19644 + - 127.0.0.1:29644 # Admin API for Broker 2: Accessible on localhost, port 29644 + - 127.0.0.1:39644 # Admin API for Broker 3: Accessible on localhost, port 39644 diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions-schema.json b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions-schema.json new file mode 100644 index 0000000..342c495 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions-schema.json @@ -0,0 +1,37 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Transactions", + "type": "object", + "properties": { + "email": { + "type": "string", + "format": "email", + "description": "The email address of the user involved in the transaction." + }, + "index": { + "type": "integer", + "description": "A numeric index associated with the transaction." + }, + "price": { + "type": "string", + "pattern": "^XXX \\d+\\.\\d{6}$", + "description": "A string representing the price of the product, including a currency code followed by the amount." + }, + "product_url": { + "type": "string", + "format": "uri", + "description": "A URL that points to the product involved in the transaction." + }, + "timestamp": { + "type": "string", + "format": "date-time", + "description": "The timestamp of when the transaction occurred, formatted in ISO 8601." + }, + "user_id": { + "type": "integer", + "description": "A numeric identifier for the user." + } + }, + "required": ["email", "index", "price", "product_url", "timestamp", "user_id"], + "additionalProperties": false +} diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions.md b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions.md new file mode 100644 index 0000000..e230916 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions.md @@ -0,0 +1,46 @@ +# Transactions Topic Documentation + +This document provides an overview of the `transactions` topic in the Redpanda cluster. The topic is designed to capture autogenerated transaction events with various attributes. + +## Schema Overview + +Each message in the `transactions` topic adheres to the following JSON schema: + +```json +{ + "email": "string", + "index": "integer", + "price": "string", + "product_url": "string", + "timestamp": "string", + "user_id": "integer" +} +``` + +- **email**: The email address of the user involved in the transaction. +- **index**: A numeric index associated with the transaction. This could represent the position or order of the transaction in a sequence. +- **price**: A string representing the price of the product. It includes a currency code (e.g., "XXX") followed by the amount. +- **product_url**: A URL that points to the product involved in the transaction. +- **timestamp**: The timestamp of when the transaction occurred, formatted in ISO 8601. +- **user_id**: A numeric identifier for the user. This is typically a unique ID assigned to each user in the system. + +## Example message + +```json +{ + "email": "wzieme@ykczius.edu", + "index": 0, + "price": "XXX 5651308.100000", + "product_url": "http://yjomdta.top/DxvGsCn.php", + "timestamp": "2024-08-16T15:51:19.799474084Z", + "user_id": 1 +} +``` + +## Use cases + +You can use the `transactions` topic for various purposes, including: + +- **Analytics**: Tracking and analyzing user transactions to understand buying behavior, popular products, etc. +- **Monitoring**: Observing transaction patterns to detect anomalies, such as unusual spikes or drops in transaction volume. +- **Data Processing**: Feeding transaction data into other systems, such as a data warehouse or real-time processing pipelines, for further processing and analysis. diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/README.adoc b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/README.adoc new file mode 100644 index 0000000..4485435 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/README.adoc @@ -0,0 +1,73 @@ += Modify the Wasm Transform in the Quickstart + +This directory contains the Go source code (`transform.go`) for the data transform that is used in the Redpanda Self-Managed quickstart. +If you're following the quickstart, you *do not* need to modify or rebuild this code. The Docker Compose configuration automatically deploys a pre-built transform called `regex.wasm`. + +However, if you want to customize the data transform logic, continue reading. + +== Why customize the transform? + +- **Custom filtering**: Filter by a different regex or apply multiple conditions. +- **Data manipulation**: Transform records before writing them out. For example, redacting sensitive data or combining fields. +- **Extended functionality**: Add advanced logging, error handling, or multi-topic routing. + +== Prerequisites + +You need the following: + +- At least Go 1.20 installed. ++ +[source,bash] +---- +go version +---- + +- The Redpanda CLI (`rpk`) installed. + +- A running Redpanda cluster. If you're using the local quickstart with Docker Compose, ensure the cluster is up and running. Or, point `rpk` to another Redpanda environment. + +== Modify and deploy your transform + +. Open link:transform.go[transform.go] and make your changes. For example: ++ +-- +- Change the regex logic to handle different use cases. +- Add environment variables to control new features. +- Extend the `doRegexFilter()` function to manipulate records. +-- + +. Compile your Go code into a `.wasm` file: ++ +[source,bash] +---- +rpk transform build +---- ++ +This command compiles your Go source and produces a `.wasm` file that you can deploy to Redpanda. + +. Deploy the new transform. ++ +If your Docker Compose setup already has a service to deploy the transform, you can restart that service. ++ +Otherwise, you can deploy your updated `.wasm` manually using `rpk transform deploy`. + +. Produce messages into the input topic. For example: ++ +[source,bash] +---- +echo '{"key":"alice@university.edu","value":"test message"}' | rpk topic produce logins +---- + +. Consume from the output topic. For example: ++ +[source,bash] +---- +rpk topic consume edu-filtered-domains --num 1 +---- + +== Suggested reading + +- link:https://docs.redpanda.com/current/reference/rpk/[Redpanda `rpk` CLI Reference^]. +- link:https://docs.redpanda.com/current/develop/data-transforms/build/[Develop Data Transforms^]. +- https://golang.org/ref/mod[Go Modules^] for managing dependencies and builds in Go. +- https://docs.docker.com/compose/[Docker Compose^] for customizing your environment. diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.mod b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.mod new file mode 100644 index 0000000..4bb9a33 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.mod @@ -0,0 +1,5 @@ +module regex + +go 1.20 + +require github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.sum b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.sum new file mode 100644 index 0000000..8745dd1 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.sum @@ -0,0 +1,2 @@ +github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 h1:KxgHJZsHsrT3YX7DMpu/vJN4TZN3KFm1jzrCFLyOepA= +github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0/go.mod h1:QGgiwwf/BIsD1b7EiyQ/Apzw+RLSpasRDdpOCiefQFQ= diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/regex.wasm b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/regex.wasm new file mode 100644 index 0000000..303962f Binary files /dev/null and b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/regex.wasm differ diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.go b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.go new file mode 100644 index 0000000..7bbd628 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.go @@ -0,0 +1,122 @@ +package main +// This data transform filters records based on a customizable regex pattern. +// If a record's key or value +// (determined by an environment variable) matches the specified regex, +// the record is forwarded to the output. +// Otherwise, it is dropped. +// +// Usage: +// 1. Provide the following environment variables in your Docker or configuration setup: +// - PATTERN : (required) a regular expression that determines what you want to match. +// - MATCH_VALUE : (optional) a boolean to decide whether to check the record value. If false, +// the record key is checked. Default is false. +// +// Example environment variables: +// PATTERN=".*\\.edu$" +// MATCH_VALUE="true" +// +// Logs: +// This transform logs information about each record and whether it matched. +// The logs appear in the _redpanda.transform_logs topic, so you can debug how your records are being processed. +// +// Build instructions: +// go mod tidy +// rpk transform build +// +// For more details on building transforms with the Redpanda SDK, see: +// https://docs.redpanda.com/current/develop/data-transforms +// + +import ( + "log" + "os" + "regexp" + "strings" + + "github.com/redpanda-data/redpanda/src/transform-sdk/go/transform" +) + +var ( + re *regexp.Regexp + checkValue bool +) + +func isTrueVar(v string) bool { + switch strings.ToLower(v) { + case "yes", "ok", "1", "true": + return true + default: + return false + } +} + +// The main() function runs only once at startup. It performs all initialization steps: +// - Reads and compiles the regex pattern. +// - Determines whether to match on the key or value. +// - Registers the doRegexFilter() function to process records. +func main() { + // Set logging preferences, including timestamp and UTC time. + log.SetPrefix("[regex-transform] ") + log.SetFlags(log.Ldate | log.Ltime | log.LUTC | log.Lmicroseconds) + + // Start logging the transformation process + log.Println("Starting transform...") + + // Read the PATTERN environment variable to get the regex pattern. + pattern, ok := os.LookupEnv("PATTERN") + if !ok { + log.Fatal("Missing PATTERN environment variable") + } + // Log the regex pattern being used. + log.Printf("Using PATTERN: %q\n", pattern) + // Compile the regex pattern for later use. + re = regexp.MustCompile(pattern) + + // Read the MATCH_VALUE environment variable to determine whether to check the record's value. + mk, ok := os.LookupEnv("MATCH_VALUE") + checkValue = ok && isTrueVar(mk) + log.Printf("MATCH_VALUE set to: %t\n", checkValue) + + log.Println("Initialization complete, waiting for records...") + + // Listen for records to be written, calling doRegexFilter() for each record. + transform.OnRecordWritten(doRegexFilter) +} + +// The doRegexFilter() function executes each time a new record is written. +// It checks whether the record's key or value (based on MATCH_VALUE) matches the compiled regex. +// If it matches, the record is forwarded, if not, it's dropped. +func doRegexFilter(e transform.WriteEvent, w transform.RecordWriter) error { + // This stores the data to be checked (either the key or value). + var dataToCheck []byte + + // Depending on the MATCH_VALUE environment variable, decide whether to check the record's key or value. + if checkValue { + // Use the value of the record if MATCH_VALUE is true. + dataToCheck = e.Record().Value + log.Printf("Checking record value: %s\n", string(dataToCheck)) + } else { + // Use the key of the record if MATCH_VALUE is false. + dataToCheck = e.Record().Key + log.Printf("Checking record key: %s\n", string(dataToCheck)) + } + + // If there is no key or value to check, log and skip the record. + if dataToCheck == nil { + log.Println("Record has no key/value to check, skipping.") + return nil + } + + // Check if the data matches the regex pattern. + pass := re.Match(dataToCheck) + if pass { + // If the record matches the pattern, log and write the record to the output topic. + log.Printf("Record matched pattern, passing through. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value)) + return w.Write(e.Record()) + } else { + // If the record does not match the pattern, log and drop the record. + log.Printf("Record did not match pattern, dropping. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value)) + // Do not write the record if it doesn't match the pattern. + return nil + } +} \ No newline at end of file diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.yaml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.yaml new file mode 100644 index 0000000..d415530 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.yaml @@ -0,0 +1,33 @@ +# Transform metadata used by the rpk transform build command. +# This metadata file tells rpk: +# 1) The transform’s display name, which also becomes the base for the .wasm file name. +# 2) A brief description of what it does. +# 3) Defaults for environment variables. +# 4) Input and output topics (if you want to define them here rather than in the deploy command). + +# Human-readable name of the transform. rpk transform build uses this for the generated .wasm file. +name: regex + +description: | + Filters the input topic to records that only match a regular expression. + + Regular expressions are implemented using Go's regexp library, which uses the syntax of RE2. + See the RE2 wiki for allowed syntax: https://github.com/google/re2/wiki/Syntax + + Environment variables: + - PATTERN: The regular expression that will match against records (required). + - MATCH_VALUE: By default, the regex matches keys, but if set to "true", the regex matches values. + +# By default, no input topic is set here. (You can set it in your deploy command if preferred.) +input-topic: "" + +# By default, no output topic is set here. (You can set it in your deploy command if preferred.) +output-topic: "" + +# Indicates the specific TinyGo environment used to compile your transform. +language: tinygo-no-goroutines + +env: + # The PATTERN variable must be provided at deploy time. + # Example: --var=PATTERN=".*@example.com" + PATTERN: '' diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/rpk-profile.yaml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/rpk-profile.yaml new file mode 100644 index 0000000..174e1ec --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/rpk-profile.yaml @@ -0,0 +1,24 @@ +# This file configures `rpk` to connect to a remote Redpanda cluster running in the same local network as `rpk`. + +# Configuration for connecting to the Kafka API of the Redpanda cluster. +kafka_api: + # SASL (Simple Authentication and Security Layer) settings for authentication. + sasl: + user: superuser # The username used for authentication + password: secretpassword # The password associated with the username + mechanism: scram-sha-256 # Authentication mechanism; SCRAM-SHA-256 provides secure password-based authentication + # List of Kafka brokers in the Redpanda cluster. + # These brokers ensure high availability and fault tolerance for Kafka-based communication. + brokers: + - 127.0.0.1:19092 # Broker 1: Accessible on localhost, port 19092 + - 127.0.0.1:29092 # Broker 2: Accessible on localhost, port 29092 + - 127.0.0.1:39092 # Broker 3: Accessible on localhost, port 39092 + +# Configuration for connecting to the Redpanda Admin API. +# The Admin API allows you to perform administrative tasks such as managing configurations, monitoring, and scaling. +admin_api: + # List of Admin API endpoints for managing the cluster. + addresses: + - 127.0.0.1:19644 # Admin API for Broker 1: Accessible on localhost, port 19644 + - 127.0.0.1:29644 # Admin API for Broker 2: Accessible on localhost, port 29644 + - 127.0.0.1:39644 # Admin API for Broker 3: Accessible on localhost, port 39644 diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions-schema.json b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions-schema.json new file mode 100644 index 0000000..342c495 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions-schema.json @@ -0,0 +1,37 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Transactions", + "type": "object", + "properties": { + "email": { + "type": "string", + "format": "email", + "description": "The email address of the user involved in the transaction." + }, + "index": { + "type": "integer", + "description": "A numeric index associated with the transaction." + }, + "price": { + "type": "string", + "pattern": "^XXX \\d+\\.\\d{6}$", + "description": "A string representing the price of the product, including a currency code followed by the amount." + }, + "product_url": { + "type": "string", + "format": "uri", + "description": "A URL that points to the product involved in the transaction." + }, + "timestamp": { + "type": "string", + "format": "date-time", + "description": "The timestamp of when the transaction occurred, formatted in ISO 8601." + }, + "user_id": { + "type": "integer", + "description": "A numeric identifier for the user." + } + }, + "required": ["email", "index", "price", "product_url", "timestamp", "user_id"], + "additionalProperties": false +} diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions.md b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions.md new file mode 100644 index 0000000..e230916 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions.md @@ -0,0 +1,46 @@ +# Transactions Topic Documentation + +This document provides an overview of the `transactions` topic in the Redpanda cluster. The topic is designed to capture autogenerated transaction events with various attributes. + +## Schema Overview + +Each message in the `transactions` topic adheres to the following JSON schema: + +```json +{ + "email": "string", + "index": "integer", + "price": "string", + "product_url": "string", + "timestamp": "string", + "user_id": "integer" +} +``` + +- **email**: The email address of the user involved in the transaction. +- **index**: A numeric index associated with the transaction. This could represent the position or order of the transaction in a sequence. +- **price**: A string representing the price of the product. It includes a currency code (e.g., "XXX") followed by the amount. +- **product_url**: A URL that points to the product involved in the transaction. +- **timestamp**: The timestamp of when the transaction occurred, formatted in ISO 8601. +- **user_id**: A numeric identifier for the user. This is typically a unique ID assigned to each user in the system. + +## Example message + +```json +{ + "email": "wzieme@ykczius.edu", + "index": 0, + "price": "XXX 5651308.100000", + "product_url": "http://yjomdta.top/DxvGsCn.php", + "timestamp": "2024-08-16T15:51:19.799474084Z", + "user_id": 1 +} +``` + +## Use cases + +You can use the `transactions` topic for various purposes, including: + +- **Analytics**: Tracking and analyzing user transactions to understand buying behavior, popular products, etc. +- **Monitoring**: Observing transaction patterns to detect anomalies, such as unusual spikes or drops in transaction volume. +- **Data Processing**: Feeding transaction data into other systems, such as a data warehouse or real-time processing pipelines, for further processing and analysis. diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/README.adoc b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/README.adoc new file mode 100644 index 0000000..4485435 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/README.adoc @@ -0,0 +1,73 @@ += Modify the Wasm Transform in the Quickstart + +This directory contains the Go source code (`transform.go`) for the data transform that is used in the Redpanda Self-Managed quickstart. +If you're following the quickstart, you *do not* need to modify or rebuild this code. The Docker Compose configuration automatically deploys a pre-built transform called `regex.wasm`. + +However, if you want to customize the data transform logic, continue reading. + +== Why customize the transform? + +- **Custom filtering**: Filter by a different regex or apply multiple conditions. +- **Data manipulation**: Transform records before writing them out. For example, redacting sensitive data or combining fields. +- **Extended functionality**: Add advanced logging, error handling, or multi-topic routing. + +== Prerequisites + +You need the following: + +- At least Go 1.20 installed. ++ +[source,bash] +---- +go version +---- + +- The Redpanda CLI (`rpk`) installed. + +- A running Redpanda cluster. If you're using the local quickstart with Docker Compose, ensure the cluster is up and running. Or, point `rpk` to another Redpanda environment. + +== Modify and deploy your transform + +. Open link:transform.go[transform.go] and make your changes. For example: ++ +-- +- Change the regex logic to handle different use cases. +- Add environment variables to control new features. +- Extend the `doRegexFilter()` function to manipulate records. +-- + +. Compile your Go code into a `.wasm` file: ++ +[source,bash] +---- +rpk transform build +---- ++ +This command compiles your Go source and produces a `.wasm` file that you can deploy to Redpanda. + +. Deploy the new transform. ++ +If your Docker Compose setup already has a service to deploy the transform, you can restart that service. ++ +Otherwise, you can deploy your updated `.wasm` manually using `rpk transform deploy`. + +. Produce messages into the input topic. For example: ++ +[source,bash] +---- +echo '{"key":"alice@university.edu","value":"test message"}' | rpk topic produce logins +---- + +. Consume from the output topic. For example: ++ +[source,bash] +---- +rpk topic consume edu-filtered-domains --num 1 +---- + +== Suggested reading + +- link:https://docs.redpanda.com/current/reference/rpk/[Redpanda `rpk` CLI Reference^]. +- link:https://docs.redpanda.com/current/develop/data-transforms/build/[Develop Data Transforms^]. +- https://golang.org/ref/mod[Go Modules^] for managing dependencies and builds in Go. +- https://docs.docker.com/compose/[Docker Compose^] for customizing your environment. diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.mod b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.mod new file mode 100644 index 0000000..4bb9a33 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.mod @@ -0,0 +1,5 @@ +module regex + +go 1.20 + +require github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.sum b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.sum new file mode 100644 index 0000000..8745dd1 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.sum @@ -0,0 +1,2 @@ +github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 h1:KxgHJZsHsrT3YX7DMpu/vJN4TZN3KFm1jzrCFLyOepA= +github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0/go.mod h1:QGgiwwf/BIsD1b7EiyQ/Apzw+RLSpasRDdpOCiefQFQ= diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/regex.wasm b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/regex.wasm new file mode 100644 index 0000000..303962f Binary files /dev/null and b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/regex.wasm differ diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.go b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.go new file mode 100644 index 0000000..7bbd628 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.go @@ -0,0 +1,122 @@ +package main +// This data transform filters records based on a customizable regex pattern. +// If a record's key or value +// (determined by an environment variable) matches the specified regex, +// the record is forwarded to the output. +// Otherwise, it is dropped. +// +// Usage: +// 1. Provide the following environment variables in your Docker or configuration setup: +// - PATTERN : (required) a regular expression that determines what you want to match. +// - MATCH_VALUE : (optional) a boolean to decide whether to check the record value. If false, +// the record key is checked. Default is false. +// +// Example environment variables: +// PATTERN=".*\\.edu$" +// MATCH_VALUE="true" +// +// Logs: +// This transform logs information about each record and whether it matched. +// The logs appear in the _redpanda.transform_logs topic, so you can debug how your records are being processed. +// +// Build instructions: +// go mod tidy +// rpk transform build +// +// For more details on building transforms with the Redpanda SDK, see: +// https://docs.redpanda.com/current/develop/data-transforms +// + +import ( + "log" + "os" + "regexp" + "strings" + + "github.com/redpanda-data/redpanda/src/transform-sdk/go/transform" +) + +var ( + re *regexp.Regexp + checkValue bool +) + +func isTrueVar(v string) bool { + switch strings.ToLower(v) { + case "yes", "ok", "1", "true": + return true + default: + return false + } +} + +// The main() function runs only once at startup. It performs all initialization steps: +// - Reads and compiles the regex pattern. +// - Determines whether to match on the key or value. +// - Registers the doRegexFilter() function to process records. +func main() { + // Set logging preferences, including timestamp and UTC time. + log.SetPrefix("[regex-transform] ") + log.SetFlags(log.Ldate | log.Ltime | log.LUTC | log.Lmicroseconds) + + // Start logging the transformation process + log.Println("Starting transform...") + + // Read the PATTERN environment variable to get the regex pattern. + pattern, ok := os.LookupEnv("PATTERN") + if !ok { + log.Fatal("Missing PATTERN environment variable") + } + // Log the regex pattern being used. + log.Printf("Using PATTERN: %q\n", pattern) + // Compile the regex pattern for later use. + re = regexp.MustCompile(pattern) + + // Read the MATCH_VALUE environment variable to determine whether to check the record's value. + mk, ok := os.LookupEnv("MATCH_VALUE") + checkValue = ok && isTrueVar(mk) + log.Printf("MATCH_VALUE set to: %t\n", checkValue) + + log.Println("Initialization complete, waiting for records...") + + // Listen for records to be written, calling doRegexFilter() for each record. + transform.OnRecordWritten(doRegexFilter) +} + +// The doRegexFilter() function executes each time a new record is written. +// It checks whether the record's key or value (based on MATCH_VALUE) matches the compiled regex. +// If it matches, the record is forwarded, if not, it's dropped. +func doRegexFilter(e transform.WriteEvent, w transform.RecordWriter) error { + // This stores the data to be checked (either the key or value). + var dataToCheck []byte + + // Depending on the MATCH_VALUE environment variable, decide whether to check the record's key or value. + if checkValue { + // Use the value of the record if MATCH_VALUE is true. + dataToCheck = e.Record().Value + log.Printf("Checking record value: %s\n", string(dataToCheck)) + } else { + // Use the key of the record if MATCH_VALUE is false. + dataToCheck = e.Record().Key + log.Printf("Checking record key: %s\n", string(dataToCheck)) + } + + // If there is no key or value to check, log and skip the record. + if dataToCheck == nil { + log.Println("Record has no key/value to check, skipping.") + return nil + } + + // Check if the data matches the regex pattern. + pass := re.Match(dataToCheck) + if pass { + // If the record matches the pattern, log and write the record to the output topic. + log.Printf("Record matched pattern, passing through. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value)) + return w.Write(e.Record()) + } else { + // If the record does not match the pattern, log and drop the record. + log.Printf("Record did not match pattern, dropping. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value)) + // Do not write the record if it doesn't match the pattern. + return nil + } +} \ No newline at end of file diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.yaml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.yaml new file mode 100644 index 0000000..d415530 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.yaml @@ -0,0 +1,33 @@ +# Transform metadata used by the rpk transform build command. +# This metadata file tells rpk: +# 1) The transform’s display name, which also becomes the base for the .wasm file name. +# 2) A brief description of what it does. +# 3) Defaults for environment variables. +# 4) Input and output topics (if you want to define them here rather than in the deploy command). + +# Human-readable name of the transform. rpk transform build uses this for the generated .wasm file. +name: regex + +description: | + Filters the input topic to records that only match a regular expression. + + Regular expressions are implemented using Go's regexp library, which uses the syntax of RE2. + See the RE2 wiki for allowed syntax: https://github.com/google/re2/wiki/Syntax + + Environment variables: + - PATTERN: The regular expression that will match against records (required). + - MATCH_VALUE: By default, the regex matches keys, but if set to "true", the regex matches values. + +# By default, no input topic is set here. (You can set it in your deploy command if preferred.) +input-topic: "" + +# By default, no output topic is set here. (You can set it in your deploy command if preferred.) +output-topic: "" + +# Indicates the specific TinyGo environment used to compile your transform. +language: tinygo-no-goroutines + +env: + # The PATTERN variable must be provided at deploy time. + # Example: --var=PATTERN=".*@example.com" + PATTERN: '' diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/rpk-profile.yaml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/rpk-profile.yaml new file mode 100644 index 0000000..174e1ec --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/rpk-profile.yaml @@ -0,0 +1,24 @@ +# This file configures `rpk` to connect to a remote Redpanda cluster running in the same local network as `rpk`. + +# Configuration for connecting to the Kafka API of the Redpanda cluster. +kafka_api: + # SASL (Simple Authentication and Security Layer) settings for authentication. + sasl: + user: superuser # The username used for authentication + password: secretpassword # The password associated with the username + mechanism: scram-sha-256 # Authentication mechanism; SCRAM-SHA-256 provides secure password-based authentication + # List of Kafka brokers in the Redpanda cluster. + # These brokers ensure high availability and fault tolerance for Kafka-based communication. + brokers: + - 127.0.0.1:19092 # Broker 1: Accessible on localhost, port 19092 + - 127.0.0.1:29092 # Broker 2: Accessible on localhost, port 29092 + - 127.0.0.1:39092 # Broker 3: Accessible on localhost, port 39092 + +# Configuration for connecting to the Redpanda Admin API. +# The Admin API allows you to perform administrative tasks such as managing configurations, monitoring, and scaling. +admin_api: + # List of Admin API endpoints for managing the cluster. + addresses: + - 127.0.0.1:19644 # Admin API for Broker 1: Accessible on localhost, port 19644 + - 127.0.0.1:29644 # Admin API for Broker 2: Accessible on localhost, port 29644 + - 127.0.0.1:39644 # Admin API for Broker 3: Accessible on localhost, port 39644 diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions-schema.json b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions-schema.json new file mode 100644 index 0000000..342c495 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions-schema.json @@ -0,0 +1,37 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Transactions", + "type": "object", + "properties": { + "email": { + "type": "string", + "format": "email", + "description": "The email address of the user involved in the transaction." + }, + "index": { + "type": "integer", + "description": "A numeric index associated with the transaction." + }, + "price": { + "type": "string", + "pattern": "^XXX \\d+\\.\\d{6}$", + "description": "A string representing the price of the product, including a currency code followed by the amount." + }, + "product_url": { + "type": "string", + "format": "uri", + "description": "A URL that points to the product involved in the transaction." + }, + "timestamp": { + "type": "string", + "format": "date-time", + "description": "The timestamp of when the transaction occurred, formatted in ISO 8601." + }, + "user_id": { + "type": "integer", + "description": "A numeric identifier for the user." + } + }, + "required": ["email", "index", "price", "product_url", "timestamp", "user_id"], + "additionalProperties": false +} diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions.md b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions.md new file mode 100644 index 0000000..e230916 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transactions.md @@ -0,0 +1,46 @@ +# Transactions Topic Documentation + +This document provides an overview of the `transactions` topic in the Redpanda cluster. The topic is designed to capture autogenerated transaction events with various attributes. + +## Schema Overview + +Each message in the `transactions` topic adheres to the following JSON schema: + +```json +{ + "email": "string", + "index": "integer", + "price": "string", + "product_url": "string", + "timestamp": "string", + "user_id": "integer" +} +``` + +- **email**: The email address of the user involved in the transaction. +- **index**: A numeric index associated with the transaction. This could represent the position or order of the transaction in a sequence. +- **price**: A string representing the price of the product. It includes a currency code (e.g., "XXX") followed by the amount. +- **product_url**: A URL that points to the product involved in the transaction. +- **timestamp**: The timestamp of when the transaction occurred, formatted in ISO 8601. +- **user_id**: A numeric identifier for the user. This is typically a unique ID assigned to each user in the system. + +## Example message + +```json +{ + "email": "wzieme@ykczius.edu", + "index": 0, + "price": "XXX 5651308.100000", + "product_url": "http://yjomdta.top/DxvGsCn.php", + "timestamp": "2024-08-16T15:51:19.799474084Z", + "user_id": 1 +} +``` + +## Use cases + +You can use the `transactions` topic for various purposes, including: + +- **Analytics**: Tracking and analyzing user transactions to understand buying behavior, popular products, etc. +- **Monitoring**: Observing transaction patterns to detect anomalies, such as unusual spikes or drops in transaction volume. +- **Data Processing**: Feeding transaction data into other systems, such as a data warehouse or real-time processing pipelines, for further processing and analysis. diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/README.adoc b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/README.adoc new file mode 100644 index 0000000..4485435 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/README.adoc @@ -0,0 +1,73 @@ += Modify the Wasm Transform in the Quickstart + +This directory contains the Go source code (`transform.go`) for the data transform that is used in the Redpanda Self-Managed quickstart. +If you're following the quickstart, you *do not* need to modify or rebuild this code. The Docker Compose configuration automatically deploys a pre-built transform called `regex.wasm`. + +However, if you want to customize the data transform logic, continue reading. + +== Why customize the transform? + +- **Custom filtering**: Filter by a different regex or apply multiple conditions. +- **Data manipulation**: Transform records before writing them out. For example, redacting sensitive data or combining fields. +- **Extended functionality**: Add advanced logging, error handling, or multi-topic routing. + +== Prerequisites + +You need the following: + +- At least Go 1.20 installed. ++ +[source,bash] +---- +go version +---- + +- The Redpanda CLI (`rpk`) installed. + +- A running Redpanda cluster. If you're using the local quickstart with Docker Compose, ensure the cluster is up and running. Or, point `rpk` to another Redpanda environment. + +== Modify and deploy your transform + +. Open link:transform.go[transform.go] and make your changes. For example: ++ +-- +- Change the regex logic to handle different use cases. +- Add environment variables to control new features. +- Extend the `doRegexFilter()` function to manipulate records. +-- + +. Compile your Go code into a `.wasm` file: ++ +[source,bash] +---- +rpk transform build +---- ++ +This command compiles your Go source and produces a `.wasm` file that you can deploy to Redpanda. + +. Deploy the new transform. ++ +If your Docker Compose setup already has a service to deploy the transform, you can restart that service. ++ +Otherwise, you can deploy your updated `.wasm` manually using `rpk transform deploy`. + +. Produce messages into the input topic. For example: ++ +[source,bash] +---- +echo '{"key":"alice@university.edu","value":"test message"}' | rpk topic produce logins +---- + +. Consume from the output topic. For example: ++ +[source,bash] +---- +rpk topic consume edu-filtered-domains --num 1 +---- + +== Suggested reading + +- link:https://docs.redpanda.com/current/reference/rpk/[Redpanda `rpk` CLI Reference^]. +- link:https://docs.redpanda.com/current/develop/data-transforms/build/[Develop Data Transforms^]. +- https://golang.org/ref/mod[Go Modules^] for managing dependencies and builds in Go. +- https://docs.docker.com/compose/[Docker Compose^] for customizing your environment. diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.mod b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.mod new file mode 100644 index 0000000..4bb9a33 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.mod @@ -0,0 +1,5 @@ +module regex + +go 1.20 + +require github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.sum b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.sum new file mode 100644 index 0000000..8745dd1 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/go.sum @@ -0,0 +1,2 @@ +github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 h1:KxgHJZsHsrT3YX7DMpu/vJN4TZN3KFm1jzrCFLyOepA= +github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0/go.mod h1:QGgiwwf/BIsD1b7EiyQ/Apzw+RLSpasRDdpOCiefQFQ= diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/regex.wasm b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/regex.wasm new file mode 100644 index 0000000..303962f Binary files /dev/null and b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/regex.wasm differ diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.go b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.go new file mode 100644 index 0000000..7bbd628 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.go @@ -0,0 +1,122 @@ +package main +// This data transform filters records based on a customizable regex pattern. +// If a record's key or value +// (determined by an environment variable) matches the specified regex, +// the record is forwarded to the output. +// Otherwise, it is dropped. +// +// Usage: +// 1. Provide the following environment variables in your Docker or configuration setup: +// - PATTERN : (required) a regular expression that determines what you want to match. +// - MATCH_VALUE : (optional) a boolean to decide whether to check the record value. If false, +// the record key is checked. Default is false. +// +// Example environment variables: +// PATTERN=".*\\.edu$" +// MATCH_VALUE="true" +// +// Logs: +// This transform logs information about each record and whether it matched. +// The logs appear in the _redpanda.transform_logs topic, so you can debug how your records are being processed. +// +// Build instructions: +// go mod tidy +// rpk transform build +// +// For more details on building transforms with the Redpanda SDK, see: +// https://docs.redpanda.com/current/develop/data-transforms +// + +import ( + "log" + "os" + "regexp" + "strings" + + "github.com/redpanda-data/redpanda/src/transform-sdk/go/transform" +) + +var ( + re *regexp.Regexp + checkValue bool +) + +func isTrueVar(v string) bool { + switch strings.ToLower(v) { + case "yes", "ok", "1", "true": + return true + default: + return false + } +} + +// The main() function runs only once at startup. It performs all initialization steps: +// - Reads and compiles the regex pattern. +// - Determines whether to match on the key or value. +// - Registers the doRegexFilter() function to process records. +func main() { + // Set logging preferences, including timestamp and UTC time. + log.SetPrefix("[regex-transform] ") + log.SetFlags(log.Ldate | log.Ltime | log.LUTC | log.Lmicroseconds) + + // Start logging the transformation process + log.Println("Starting transform...") + + // Read the PATTERN environment variable to get the regex pattern. + pattern, ok := os.LookupEnv("PATTERN") + if !ok { + log.Fatal("Missing PATTERN environment variable") + } + // Log the regex pattern being used. + log.Printf("Using PATTERN: %q\n", pattern) + // Compile the regex pattern for later use. + re = regexp.MustCompile(pattern) + + // Read the MATCH_VALUE environment variable to determine whether to check the record's value. + mk, ok := os.LookupEnv("MATCH_VALUE") + checkValue = ok && isTrueVar(mk) + log.Printf("MATCH_VALUE set to: %t\n", checkValue) + + log.Println("Initialization complete, waiting for records...") + + // Listen for records to be written, calling doRegexFilter() for each record. + transform.OnRecordWritten(doRegexFilter) +} + +// The doRegexFilter() function executes each time a new record is written. +// It checks whether the record's key or value (based on MATCH_VALUE) matches the compiled regex. +// If it matches, the record is forwarded, if not, it's dropped. +func doRegexFilter(e transform.WriteEvent, w transform.RecordWriter) error { + // This stores the data to be checked (either the key or value). + var dataToCheck []byte + + // Depending on the MATCH_VALUE environment variable, decide whether to check the record's key or value. + if checkValue { + // Use the value of the record if MATCH_VALUE is true. + dataToCheck = e.Record().Value + log.Printf("Checking record value: %s\n", string(dataToCheck)) + } else { + // Use the key of the record if MATCH_VALUE is false. + dataToCheck = e.Record().Key + log.Printf("Checking record key: %s\n", string(dataToCheck)) + } + + // If there is no key or value to check, log and skip the record. + if dataToCheck == nil { + log.Println("Record has no key/value to check, skipping.") + return nil + } + + // Check if the data matches the regex pattern. + pass := re.Match(dataToCheck) + if pass { + // If the record matches the pattern, log and write the record to the output topic. + log.Printf("Record matched pattern, passing through. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value)) + return w.Write(e.Record()) + } else { + // If the record does not match the pattern, log and drop the record. + log.Printf("Record did not match pattern, dropping. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value)) + // Do not write the record if it doesn't match the pattern. + return nil + } +} \ No newline at end of file diff --git a/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.yaml b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.yaml new file mode 100644 index 0000000..d415530 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/redpanda-quickstart/docker-compose/transform/transform.yaml @@ -0,0 +1,33 @@ +# Transform metadata used by the rpk transform build command. +# This metadata file tells rpk: +# 1) The transform’s display name, which also becomes the base for the .wasm file name. +# 2) A brief description of what it does. +# 3) Defaults for environment variables. +# 4) Input and output topics (if you want to define them here rather than in the deploy command). + +# Human-readable name of the transform. rpk transform build uses this for the generated .wasm file. +name: regex + +description: | + Filters the input topic to records that only match a regular expression. + + Regular expressions are implemented using Go's regexp library, which uses the syntax of RE2. + See the RE2 wiki for allowed syntax: https://github.com/google/re2/wiki/Syntax + + Environment variables: + - PATTERN: The regular expression that will match against records (required). + - MATCH_VALUE: By default, the regex matches keys, but if set to "true", the regex matches values. + +# By default, no input topic is set here. (You can set it in your deploy command if preferred.) +input-topic: "" + +# By default, no output topic is set here. (You can set it in your deploy command if preferred.) +output-topic: "" + +# Indicates the specific TinyGo environment used to compile your transform. +language: tinygo-no-goroutines + +env: + # The PATTERN variable must be provided at deploy time. + # Example: --var=PATTERN=".*@example.com" + PATTERN: '' diff --git a/docker-compose/redpanda-quickstart/docker-compose/rpk-profile.yaml b/docker-compose/redpanda-quickstart/docker-compose/rpk-profile.yaml new file mode 100644 index 0000000..174e1ec --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/rpk-profile.yaml @@ -0,0 +1,24 @@ +# This file configures `rpk` to connect to a remote Redpanda cluster running in the same local network as `rpk`. + +# Configuration for connecting to the Kafka API of the Redpanda cluster. +kafka_api: + # SASL (Simple Authentication and Security Layer) settings for authentication. + sasl: + user: superuser # The username used for authentication + password: secretpassword # The password associated with the username + mechanism: scram-sha-256 # Authentication mechanism; SCRAM-SHA-256 provides secure password-based authentication + # List of Kafka brokers in the Redpanda cluster. + # These brokers ensure high availability and fault tolerance for Kafka-based communication. + brokers: + - 127.0.0.1:19092 # Broker 1: Accessible on localhost, port 19092 + - 127.0.0.1:29092 # Broker 2: Accessible on localhost, port 29092 + - 127.0.0.1:39092 # Broker 3: Accessible on localhost, port 39092 + +# Configuration for connecting to the Redpanda Admin API. +# The Admin API allows you to perform administrative tasks such as managing configurations, monitoring, and scaling. +admin_api: + # List of Admin API endpoints for managing the cluster. + addresses: + - 127.0.0.1:19644 # Admin API for Broker 1: Accessible on localhost, port 19644 + - 127.0.0.1:29644 # Admin API for Broker 2: Accessible on localhost, port 29644 + - 127.0.0.1:39644 # Admin API for Broker 3: Accessible on localhost, port 39644 diff --git a/docker-compose/redpanda-quickstart/docker-compose/transactions-schema.json b/docker-compose/redpanda-quickstart/docker-compose/transactions-schema.json new file mode 100644 index 0000000..342c495 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/transactions-schema.json @@ -0,0 +1,37 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Transactions", + "type": "object", + "properties": { + "email": { + "type": "string", + "format": "email", + "description": "The email address of the user involved in the transaction." + }, + "index": { + "type": "integer", + "description": "A numeric index associated with the transaction." + }, + "price": { + "type": "string", + "pattern": "^XXX \\d+\\.\\d{6}$", + "description": "A string representing the price of the product, including a currency code followed by the amount." + }, + "product_url": { + "type": "string", + "format": "uri", + "description": "A URL that points to the product involved in the transaction." + }, + "timestamp": { + "type": "string", + "format": "date-time", + "description": "The timestamp of when the transaction occurred, formatted in ISO 8601." + }, + "user_id": { + "type": "integer", + "description": "A numeric identifier for the user." + } + }, + "required": ["email", "index", "price", "product_url", "timestamp", "user_id"], + "additionalProperties": false +} diff --git a/docker-compose/redpanda-quickstart/docker-compose/transactions.md b/docker-compose/redpanda-quickstart/docker-compose/transactions.md new file mode 100644 index 0000000..e230916 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/transactions.md @@ -0,0 +1,46 @@ +# Transactions Topic Documentation + +This document provides an overview of the `transactions` topic in the Redpanda cluster. The topic is designed to capture autogenerated transaction events with various attributes. + +## Schema Overview + +Each message in the `transactions` topic adheres to the following JSON schema: + +```json +{ + "email": "string", + "index": "integer", + "price": "string", + "product_url": "string", + "timestamp": "string", + "user_id": "integer" +} +``` + +- **email**: The email address of the user involved in the transaction. +- **index**: A numeric index associated with the transaction. This could represent the position or order of the transaction in a sequence. +- **price**: A string representing the price of the product. It includes a currency code (e.g., "XXX") followed by the amount. +- **product_url**: A URL that points to the product involved in the transaction. +- **timestamp**: The timestamp of when the transaction occurred, formatted in ISO 8601. +- **user_id**: A numeric identifier for the user. This is typically a unique ID assigned to each user in the system. + +## Example message + +```json +{ + "email": "wzieme@ykczius.edu", + "index": 0, + "price": "XXX 5651308.100000", + "product_url": "http://yjomdta.top/DxvGsCn.php", + "timestamp": "2024-08-16T15:51:19.799474084Z", + "user_id": 1 +} +``` + +## Use cases + +You can use the `transactions` topic for various purposes, including: + +- **Analytics**: Tracking and analyzing user transactions to understand buying behavior, popular products, etc. +- **Monitoring**: Observing transaction patterns to detect anomalies, such as unusual spikes or drops in transaction volume. +- **Data Processing**: Feeding transaction data into other systems, such as a data warehouse or real-time processing pipelines, for further processing and analysis. diff --git a/docker-compose/redpanda-quickstart/docker-compose/transform/README.adoc b/docker-compose/redpanda-quickstart/docker-compose/transform/README.adoc new file mode 100644 index 0000000..4485435 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/transform/README.adoc @@ -0,0 +1,73 @@ += Modify the Wasm Transform in the Quickstart + +This directory contains the Go source code (`transform.go`) for the data transform that is used in the Redpanda Self-Managed quickstart. +If you're following the quickstart, you *do not* need to modify or rebuild this code. The Docker Compose configuration automatically deploys a pre-built transform called `regex.wasm`. + +However, if you want to customize the data transform logic, continue reading. + +== Why customize the transform? + +- **Custom filtering**: Filter by a different regex or apply multiple conditions. +- **Data manipulation**: Transform records before writing them out. For example, redacting sensitive data or combining fields. +- **Extended functionality**: Add advanced logging, error handling, or multi-topic routing. + +== Prerequisites + +You need the following: + +- At least Go 1.20 installed. ++ +[source,bash] +---- +go version +---- + +- The Redpanda CLI (`rpk`) installed. + +- A running Redpanda cluster. If you're using the local quickstart with Docker Compose, ensure the cluster is up and running. Or, point `rpk` to another Redpanda environment. + +== Modify and deploy your transform + +. Open link:transform.go[transform.go] and make your changes. For example: ++ +-- +- Change the regex logic to handle different use cases. +- Add environment variables to control new features. +- Extend the `doRegexFilter()` function to manipulate records. +-- + +. Compile your Go code into a `.wasm` file: ++ +[source,bash] +---- +rpk transform build +---- ++ +This command compiles your Go source and produces a `.wasm` file that you can deploy to Redpanda. + +. Deploy the new transform. ++ +If your Docker Compose setup already has a service to deploy the transform, you can restart that service. ++ +Otherwise, you can deploy your updated `.wasm` manually using `rpk transform deploy`. + +. Produce messages into the input topic. For example: ++ +[source,bash] +---- +echo '{"key":"alice@university.edu","value":"test message"}' | rpk topic produce logins +---- + +. Consume from the output topic. For example: ++ +[source,bash] +---- +rpk topic consume edu-filtered-domains --num 1 +---- + +== Suggested reading + +- link:https://docs.redpanda.com/current/reference/rpk/[Redpanda `rpk` CLI Reference^]. +- link:https://docs.redpanda.com/current/develop/data-transforms/build/[Develop Data Transforms^]. +- https://golang.org/ref/mod[Go Modules^] for managing dependencies and builds in Go. +- https://docs.docker.com/compose/[Docker Compose^] for customizing your environment. diff --git a/docker-compose/redpanda-quickstart/docker-compose/transform/go.mod b/docker-compose/redpanda-quickstart/docker-compose/transform/go.mod new file mode 100644 index 0000000..4bb9a33 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/transform/go.mod @@ -0,0 +1,5 @@ +module regex + +go 1.20 + +require github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 diff --git a/docker-compose/redpanda-quickstart/docker-compose/transform/go.sum b/docker-compose/redpanda-quickstart/docker-compose/transform/go.sum new file mode 100644 index 0000000..8745dd1 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/transform/go.sum @@ -0,0 +1,2 @@ +github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 h1:KxgHJZsHsrT3YX7DMpu/vJN4TZN3KFm1jzrCFLyOepA= +github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0/go.mod h1:QGgiwwf/BIsD1b7EiyQ/Apzw+RLSpasRDdpOCiefQFQ= diff --git a/docker-compose/redpanda-quickstart/docker-compose/transform/regex.wasm b/docker-compose/redpanda-quickstart/docker-compose/transform/regex.wasm new file mode 100644 index 0000000..303962f Binary files /dev/null and b/docker-compose/redpanda-quickstart/docker-compose/transform/regex.wasm differ diff --git a/docker-compose/redpanda-quickstart/docker-compose/transform/transform.go b/docker-compose/redpanda-quickstart/docker-compose/transform/transform.go new file mode 100644 index 0000000..7bbd628 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/transform/transform.go @@ -0,0 +1,122 @@ +package main +// This data transform filters records based on a customizable regex pattern. +// If a record's key or value +// (determined by an environment variable) matches the specified regex, +// the record is forwarded to the output. +// Otherwise, it is dropped. +// +// Usage: +// 1. Provide the following environment variables in your Docker or configuration setup: +// - PATTERN : (required) a regular expression that determines what you want to match. +// - MATCH_VALUE : (optional) a boolean to decide whether to check the record value. If false, +// the record key is checked. Default is false. +// +// Example environment variables: +// PATTERN=".*\\.edu$" +// MATCH_VALUE="true" +// +// Logs: +// This transform logs information about each record and whether it matched. +// The logs appear in the _redpanda.transform_logs topic, so you can debug how your records are being processed. +// +// Build instructions: +// go mod tidy +// rpk transform build +// +// For more details on building transforms with the Redpanda SDK, see: +// https://docs.redpanda.com/current/develop/data-transforms +// + +import ( + "log" + "os" + "regexp" + "strings" + + "github.com/redpanda-data/redpanda/src/transform-sdk/go/transform" +) + +var ( + re *regexp.Regexp + checkValue bool +) + +func isTrueVar(v string) bool { + switch strings.ToLower(v) { + case "yes", "ok", "1", "true": + return true + default: + return false + } +} + +// The main() function runs only once at startup. It performs all initialization steps: +// - Reads and compiles the regex pattern. +// - Determines whether to match on the key or value. +// - Registers the doRegexFilter() function to process records. +func main() { + // Set logging preferences, including timestamp and UTC time. + log.SetPrefix("[regex-transform] ") + log.SetFlags(log.Ldate | log.Ltime | log.LUTC | log.Lmicroseconds) + + // Start logging the transformation process + log.Println("Starting transform...") + + // Read the PATTERN environment variable to get the regex pattern. + pattern, ok := os.LookupEnv("PATTERN") + if !ok { + log.Fatal("Missing PATTERN environment variable") + } + // Log the regex pattern being used. + log.Printf("Using PATTERN: %q\n", pattern) + // Compile the regex pattern for later use. + re = regexp.MustCompile(pattern) + + // Read the MATCH_VALUE environment variable to determine whether to check the record's value. + mk, ok := os.LookupEnv("MATCH_VALUE") + checkValue = ok && isTrueVar(mk) + log.Printf("MATCH_VALUE set to: %t\n", checkValue) + + log.Println("Initialization complete, waiting for records...") + + // Listen for records to be written, calling doRegexFilter() for each record. + transform.OnRecordWritten(doRegexFilter) +} + +// The doRegexFilter() function executes each time a new record is written. +// It checks whether the record's key or value (based on MATCH_VALUE) matches the compiled regex. +// If it matches, the record is forwarded, if not, it's dropped. +func doRegexFilter(e transform.WriteEvent, w transform.RecordWriter) error { + // This stores the data to be checked (either the key or value). + var dataToCheck []byte + + // Depending on the MATCH_VALUE environment variable, decide whether to check the record's key or value. + if checkValue { + // Use the value of the record if MATCH_VALUE is true. + dataToCheck = e.Record().Value + log.Printf("Checking record value: %s\n", string(dataToCheck)) + } else { + // Use the key of the record if MATCH_VALUE is false. + dataToCheck = e.Record().Key + log.Printf("Checking record key: %s\n", string(dataToCheck)) + } + + // If there is no key or value to check, log and skip the record. + if dataToCheck == nil { + log.Println("Record has no key/value to check, skipping.") + return nil + } + + // Check if the data matches the regex pattern. + pass := re.Match(dataToCheck) + if pass { + // If the record matches the pattern, log and write the record to the output topic. + log.Printf("Record matched pattern, passing through. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value)) + return w.Write(e.Record()) + } else { + // If the record does not match the pattern, log and drop the record. + log.Printf("Record did not match pattern, dropping. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value)) + // Do not write the record if it doesn't match the pattern. + return nil + } +} \ No newline at end of file diff --git a/docker-compose/redpanda-quickstart/docker-compose/transform/transform.yaml b/docker-compose/redpanda-quickstart/docker-compose/transform/transform.yaml new file mode 100644 index 0000000..d415530 --- /dev/null +++ b/docker-compose/redpanda-quickstart/docker-compose/transform/transform.yaml @@ -0,0 +1,33 @@ +# Transform metadata used by the rpk transform build command. +# This metadata file tells rpk: +# 1) The transform’s display name, which also becomes the base for the .wasm file name. +# 2) A brief description of what it does. +# 3) Defaults for environment variables. +# 4) Input and output topics (if you want to define them here rather than in the deploy command). + +# Human-readable name of the transform. rpk transform build uses this for the generated .wasm file. +name: regex + +description: | + Filters the input topic to records that only match a regular expression. + + Regular expressions are implemented using Go's regexp library, which uses the syntax of RE2. + See the RE2 wiki for allowed syntax: https://github.com/google/re2/wiki/Syntax + + Environment variables: + - PATTERN: The regular expression that will match against records (required). + - MATCH_VALUE: By default, the regex matches keys, but if set to "true", the regex matches values. + +# By default, no input topic is set here. (You can set it in your deploy command if preferred.) +input-topic: "" + +# By default, no output topic is set here. (You can set it in your deploy command if preferred.) +output-topic: "" + +# Indicates the specific TinyGo environment used to compile your transform. +language: tinygo-no-goroutines + +env: + # The PATTERN variable must be provided at deploy time. + # Example: --var=PATTERN=".*@example.com" + PATTERN: '' diff --git a/docker-compose/rpk-profile.yaml b/docker-compose/rpk-profile.yaml new file mode 100644 index 0000000..174e1ec --- /dev/null +++ b/docker-compose/rpk-profile.yaml @@ -0,0 +1,24 @@ +# This file configures `rpk` to connect to a remote Redpanda cluster running in the same local network as `rpk`. + +# Configuration for connecting to the Kafka API of the Redpanda cluster. +kafka_api: + # SASL (Simple Authentication and Security Layer) settings for authentication. + sasl: + user: superuser # The username used for authentication + password: secretpassword # The password associated with the username + mechanism: scram-sha-256 # Authentication mechanism; SCRAM-SHA-256 provides secure password-based authentication + # List of Kafka brokers in the Redpanda cluster. + # These brokers ensure high availability and fault tolerance for Kafka-based communication. + brokers: + - 127.0.0.1:19092 # Broker 1: Accessible on localhost, port 19092 + - 127.0.0.1:29092 # Broker 2: Accessible on localhost, port 29092 + - 127.0.0.1:39092 # Broker 3: Accessible on localhost, port 39092 + +# Configuration for connecting to the Redpanda Admin API. +# The Admin API allows you to perform administrative tasks such as managing configurations, monitoring, and scaling. +admin_api: + # List of Admin API endpoints for managing the cluster. + addresses: + - 127.0.0.1:19644 # Admin API for Broker 1: Accessible on localhost, port 19644 + - 127.0.0.1:29644 # Admin API for Broker 2: Accessible on localhost, port 29644 + - 127.0.0.1:39644 # Admin API for Broker 3: Accessible on localhost, port 39644 diff --git a/docker-compose/transactions-schema.json b/docker-compose/transactions-schema.json new file mode 100644 index 0000000..342c495 --- /dev/null +++ b/docker-compose/transactions-schema.json @@ -0,0 +1,37 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Transactions", + "type": "object", + "properties": { + "email": { + "type": "string", + "format": "email", + "description": "The email address of the user involved in the transaction." + }, + "index": { + "type": "integer", + "description": "A numeric index associated with the transaction." + }, + "price": { + "type": "string", + "pattern": "^XXX \\d+\\.\\d{6}$", + "description": "A string representing the price of the product, including a currency code followed by the amount." + }, + "product_url": { + "type": "string", + "format": "uri", + "description": "A URL that points to the product involved in the transaction." + }, + "timestamp": { + "type": "string", + "format": "date-time", + "description": "The timestamp of when the transaction occurred, formatted in ISO 8601." + }, + "user_id": { + "type": "integer", + "description": "A numeric identifier for the user." + } + }, + "required": ["email", "index", "price", "product_url", "timestamp", "user_id"], + "additionalProperties": false +} diff --git a/docker-compose/transactions.md b/docker-compose/transactions.md new file mode 100644 index 0000000..e230916 --- /dev/null +++ b/docker-compose/transactions.md @@ -0,0 +1,46 @@ +# Transactions Topic Documentation + +This document provides an overview of the `transactions` topic in the Redpanda cluster. The topic is designed to capture autogenerated transaction events with various attributes. + +## Schema Overview + +Each message in the `transactions` topic adheres to the following JSON schema: + +```json +{ + "email": "string", + "index": "integer", + "price": "string", + "product_url": "string", + "timestamp": "string", + "user_id": "integer" +} +``` + +- **email**: The email address of the user involved in the transaction. +- **index**: A numeric index associated with the transaction. This could represent the position or order of the transaction in a sequence. +- **price**: A string representing the price of the product. It includes a currency code (e.g., "XXX") followed by the amount. +- **product_url**: A URL that points to the product involved in the transaction. +- **timestamp**: The timestamp of when the transaction occurred, formatted in ISO 8601. +- **user_id**: A numeric identifier for the user. This is typically a unique ID assigned to each user in the system. + +## Example message + +```json +{ + "email": "wzieme@ykczius.edu", + "index": 0, + "price": "XXX 5651308.100000", + "product_url": "http://yjomdta.top/DxvGsCn.php", + "timestamp": "2024-08-16T15:51:19.799474084Z", + "user_id": 1 +} +``` + +## Use cases + +You can use the `transactions` topic for various purposes, including: + +- **Analytics**: Tracking and analyzing user transactions to understand buying behavior, popular products, etc. +- **Monitoring**: Observing transaction patterns to detect anomalies, such as unusual spikes or drops in transaction volume. +- **Data Processing**: Feeding transaction data into other systems, such as a data warehouse or real-time processing pipelines, for further processing and analysis. diff --git a/docker-compose/transform/README.adoc b/docker-compose/transform/README.adoc new file mode 100644 index 0000000..4485435 --- /dev/null +++ b/docker-compose/transform/README.adoc @@ -0,0 +1,73 @@ += Modify the Wasm Transform in the Quickstart + +This directory contains the Go source code (`transform.go`) for the data transform that is used in the Redpanda Self-Managed quickstart. +If you're following the quickstart, you *do not* need to modify or rebuild this code. The Docker Compose configuration automatically deploys a pre-built transform called `regex.wasm`. + +However, if you want to customize the data transform logic, continue reading. + +== Why customize the transform? + +- **Custom filtering**: Filter by a different regex or apply multiple conditions. +- **Data manipulation**: Transform records before writing them out. For example, redacting sensitive data or combining fields. +- **Extended functionality**: Add advanced logging, error handling, or multi-topic routing. + +== Prerequisites + +You need the following: + +- At least Go 1.20 installed. ++ +[source,bash] +---- +go version +---- + +- The Redpanda CLI (`rpk`) installed. + +- A running Redpanda cluster. If you're using the local quickstart with Docker Compose, ensure the cluster is up and running. Or, point `rpk` to another Redpanda environment. + +== Modify and deploy your transform + +. Open link:transform.go[transform.go] and make your changes. For example: ++ +-- +- Change the regex logic to handle different use cases. +- Add environment variables to control new features. +- Extend the `doRegexFilter()` function to manipulate records. +-- + +. Compile your Go code into a `.wasm` file: ++ +[source,bash] +---- +rpk transform build +---- ++ +This command compiles your Go source and produces a `.wasm` file that you can deploy to Redpanda. + +. Deploy the new transform. ++ +If your Docker Compose setup already has a service to deploy the transform, you can restart that service. ++ +Otherwise, you can deploy your updated `.wasm` manually using `rpk transform deploy`. + +. Produce messages into the input topic. For example: ++ +[source,bash] +---- +echo '{"key":"alice@university.edu","value":"test message"}' | rpk topic produce logins +---- + +. Consume from the output topic. For example: ++ +[source,bash] +---- +rpk topic consume edu-filtered-domains --num 1 +---- + +== Suggested reading + +- link:https://docs.redpanda.com/current/reference/rpk/[Redpanda `rpk` CLI Reference^]. +- link:https://docs.redpanda.com/current/develop/data-transforms/build/[Develop Data Transforms^]. +- https://golang.org/ref/mod[Go Modules^] for managing dependencies and builds in Go. +- https://docs.docker.com/compose/[Docker Compose^] for customizing your environment. diff --git a/docker-compose/transform/go.mod b/docker-compose/transform/go.mod new file mode 100644 index 0000000..4bb9a33 --- /dev/null +++ b/docker-compose/transform/go.mod @@ -0,0 +1,5 @@ +module regex + +go 1.20 + +require github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 diff --git a/docker-compose/transform/go.sum b/docker-compose/transform/go.sum new file mode 100644 index 0000000..8745dd1 --- /dev/null +++ b/docker-compose/transform/go.sum @@ -0,0 +1,2 @@ +github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0 h1:KxgHJZsHsrT3YX7DMpu/vJN4TZN3KFm1jzrCFLyOepA= +github.com/redpanda-data/redpanda/src/transform-sdk/go/transform v1.1.0/go.mod h1:QGgiwwf/BIsD1b7EiyQ/Apzw+RLSpasRDdpOCiefQFQ= diff --git a/docker-compose/transform/regex.wasm b/docker-compose/transform/regex.wasm new file mode 100644 index 0000000..303962f Binary files /dev/null and b/docker-compose/transform/regex.wasm differ diff --git a/docker-compose/transform/transform.go b/docker-compose/transform/transform.go new file mode 100644 index 0000000..7bbd628 --- /dev/null +++ b/docker-compose/transform/transform.go @@ -0,0 +1,122 @@ +package main +// This data transform filters records based on a customizable regex pattern. +// If a record's key or value +// (determined by an environment variable) matches the specified regex, +// the record is forwarded to the output. +// Otherwise, it is dropped. +// +// Usage: +// 1. Provide the following environment variables in your Docker or configuration setup: +// - PATTERN : (required) a regular expression that determines what you want to match. +// - MATCH_VALUE : (optional) a boolean to decide whether to check the record value. If false, +// the record key is checked. Default is false. +// +// Example environment variables: +// PATTERN=".*\\.edu$" +// MATCH_VALUE="true" +// +// Logs: +// This transform logs information about each record and whether it matched. +// The logs appear in the _redpanda.transform_logs topic, so you can debug how your records are being processed. +// +// Build instructions: +// go mod tidy +// rpk transform build +// +// For more details on building transforms with the Redpanda SDK, see: +// https://docs.redpanda.com/current/develop/data-transforms +// + +import ( + "log" + "os" + "regexp" + "strings" + + "github.com/redpanda-data/redpanda/src/transform-sdk/go/transform" +) + +var ( + re *regexp.Regexp + checkValue bool +) + +func isTrueVar(v string) bool { + switch strings.ToLower(v) { + case "yes", "ok", "1", "true": + return true + default: + return false + } +} + +// The main() function runs only once at startup. It performs all initialization steps: +// - Reads and compiles the regex pattern. +// - Determines whether to match on the key or value. +// - Registers the doRegexFilter() function to process records. +func main() { + // Set logging preferences, including timestamp and UTC time. + log.SetPrefix("[regex-transform] ") + log.SetFlags(log.Ldate | log.Ltime | log.LUTC | log.Lmicroseconds) + + // Start logging the transformation process + log.Println("Starting transform...") + + // Read the PATTERN environment variable to get the regex pattern. + pattern, ok := os.LookupEnv("PATTERN") + if !ok { + log.Fatal("Missing PATTERN environment variable") + } + // Log the regex pattern being used. + log.Printf("Using PATTERN: %q\n", pattern) + // Compile the regex pattern for later use. + re = regexp.MustCompile(pattern) + + // Read the MATCH_VALUE environment variable to determine whether to check the record's value. + mk, ok := os.LookupEnv("MATCH_VALUE") + checkValue = ok && isTrueVar(mk) + log.Printf("MATCH_VALUE set to: %t\n", checkValue) + + log.Println("Initialization complete, waiting for records...") + + // Listen for records to be written, calling doRegexFilter() for each record. + transform.OnRecordWritten(doRegexFilter) +} + +// The doRegexFilter() function executes each time a new record is written. +// It checks whether the record's key or value (based on MATCH_VALUE) matches the compiled regex. +// If it matches, the record is forwarded, if not, it's dropped. +func doRegexFilter(e transform.WriteEvent, w transform.RecordWriter) error { + // This stores the data to be checked (either the key or value). + var dataToCheck []byte + + // Depending on the MATCH_VALUE environment variable, decide whether to check the record's key or value. + if checkValue { + // Use the value of the record if MATCH_VALUE is true. + dataToCheck = e.Record().Value + log.Printf("Checking record value: %s\n", string(dataToCheck)) + } else { + // Use the key of the record if MATCH_VALUE is false. + dataToCheck = e.Record().Key + log.Printf("Checking record key: %s\n", string(dataToCheck)) + } + + // If there is no key or value to check, log and skip the record. + if dataToCheck == nil { + log.Println("Record has no key/value to check, skipping.") + return nil + } + + // Check if the data matches the regex pattern. + pass := re.Match(dataToCheck) + if pass { + // If the record matches the pattern, log and write the record to the output topic. + log.Printf("Record matched pattern, passing through. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value)) + return w.Write(e.Record()) + } else { + // If the record does not match the pattern, log and drop the record. + log.Printf("Record did not match pattern, dropping. Key: %s, Value: %s\n", string(e.Record().Key), string(e.Record().Value)) + // Do not write the record if it doesn't match the pattern. + return nil + } +} \ No newline at end of file diff --git a/docker-compose/transform/transform.yaml b/docker-compose/transform/transform.yaml new file mode 100644 index 0000000..d415530 --- /dev/null +++ b/docker-compose/transform/transform.yaml @@ -0,0 +1,33 @@ +# Transform metadata used by the rpk transform build command. +# This metadata file tells rpk: +# 1) The transform’s display name, which also becomes the base for the .wasm file name. +# 2) A brief description of what it does. +# 3) Defaults for environment variables. +# 4) Input and output topics (if you want to define them here rather than in the deploy command). + +# Human-readable name of the transform. rpk transform build uses this for the generated .wasm file. +name: regex + +description: | + Filters the input topic to records that only match a regular expression. + + Regular expressions are implemented using Go's regexp library, which uses the syntax of RE2. + See the RE2 wiki for allowed syntax: https://github.com/google/re2/wiki/Syntax + + Environment variables: + - PATTERN: The regular expression that will match against records (required). + - MATCH_VALUE: By default, the regex matches keys, but if set to "true", the regex matches values. + +# By default, no input topic is set here. (You can set it in your deploy command if preferred.) +input-topic: "" + +# By default, no output topic is set here. (You can set it in your deploy command if preferred.) +output-topic: "" + +# Indicates the specific TinyGo environment used to compile your transform. +language: tinygo-no-goroutines + +env: + # The PATTERN variable must be provided at deploy time. + # Example: --var=PATTERN=".*@example.com" + PATTERN: ''