diff --git a/docker/signoz/.env.example b/docker/signoz/.env.example index 0c16104..512db75 100644 --- a/docker/signoz/.env.example +++ b/docker/signoz/.env.example @@ -14,21 +14,35 @@ # ----------------------------------------------------------------------------- # Pin a specific version for reproducibility. Check releases at: # https://github.com/SigNoz/signoz/releases -SIGNOZ_VERSION=latest +SIGNOZ_VERSION=v0.128.0 + +# ----------------------------------------------------------------------------- +# OTEL Collector Version +# ----------------------------------------------------------------------------- +OTELCOL_VERSION=v0.144.5 + +# ----------------------------------------------------------------------------- +# ZooKeeper Version +# ----------------------------------------------------------------------------- +ZOOKEEPER_VERSION=3.7.1 # ----------------------------------------------------------------------------- # ClickHouse # ----------------------------------------------------------------------------- CLICKHOUSE_VERSION=25.5 -CLICKHOUSE_DB=signoz -CLICKHOUSE_USER=admin -CLICKHOUSE_PASSWORD=change-me-clickhouse-password + +# ----------------------------------------------------------------------------- +# JWT Secret (REQUIRED) +# ----------------------------------------------------------------------------- +# Generate with: openssl rand -hex 32 +# Without this, user sessions are vulnerable to tampering. +SIGNOZ_JWT_SECRET=change-me-generate-with-openssl-rand-hex-32 # ----------------------------------------------------------------------------- # Exposed Ports # ----------------------------------------------------------------------------- # SigNoz UI -EXPOSE_SIGNOZ_UI_PORT=3301 +EXPOSE_SIGNOZ_UI_PORT=8080 # OTLP gRPC receiver (used by instrumented apps/services) EXPOSE_OTLP_GRPC_PORT=4317 # OTLP HTTP receiver (used by instrumented apps/services) diff --git a/docker/signoz/clickhouse/config.d/cluster.xml b/docker/signoz/clickhouse/config.d/cluster.xml new file mode 100644 index 0000000..5d785ac --- /dev/null +++ b/docker/signoz/clickhouse/config.d/cluster.xml @@ -0,0 +1,19 @@ + + + + + zookeeper + 2181 + + + + + + + clickhouse + 9000 + + + + + diff --git a/docker/signoz/clickhouse/config.xml b/docker/signoz/clickhouse/config.xml new file mode 100644 index 0000000..5740efe --- /dev/null +++ b/docker/signoz/clickhouse/config.xml @@ -0,0 +1,144 @@ + + + + information + json + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + 1000M + 10 + + 8123 + 9000 + 9004 + 9005 + 9009 + 4096 + 3 + 100 + 0 + 10000 + 0.9 + 4194304 + 0 + 8589934592 + 5368709120 + 1000 + 134217728 + 10000 + /var/lib/clickhouse/ + /var/lib/clickhouse/tmp/ + /var/lib/clickhouse/user_files/ + /var/lib/clickhouse/user_scripts/ + true + false + default + default + + 3600 + 3600 + 60 + + /metrics + 9363 + true + true + true + true + + + system + query_log
+ toYYYYMM(event_date) + 7500 +
+ + system + trace_log
+ toYYYYMM(event_date) + 7500 +
+ + system + query_thread_log
+ toYYYYMM(event_date) + 7500 +
+ + system + query_views_log
+ toYYYYMM(event_date) + 7500 +
+ + system + part_log
+ toYYYYMM(event_date) + 7500 +
+ + system + metric_log
+ 7500 + 1000 +
+ + system + asynchronous_metric_log
+ 7000 +
+ + + engine MergeTree + partition by toYYYYMM(finish_date) + order by (finish_date, finish_time_us, trace_id) + + system + opentelemetry_span_log
+ 7500 +
+ + system + crash_log
+ + 1000 +
+ + system + processors_profile_log
+ toYYYYMM(event_date) + 7500 +
+ + 268435456 + true + + /var/lib/clickhouse/format_schemas/ + + + users.xml + /var/lib/clickhouse/access/ + + 360000000 + + /clickhouse/task_queue/ddl + + + click_costany036008640060 + max0603600300864003600 + + + + hide encrypt/decrypt arguments + ((?:aes_)?(?:encrypt|decrypt)(?:_mysql)?)\s*\(\s*\'(?:\\\\\'|.)+\'|.*?\s*\) + \1(???) + + + + false + false + + + *_dictionary.xml + *function.xml +
diff --git a/docker/signoz/clickhouse/custom-function.xml b/docker/signoz/clickhouse/custom-function.xml new file mode 100644 index 0000000..5864e58 --- /dev/null +++ b/docker/signoz/clickhouse/custom-function.xml @@ -0,0 +1,12 @@ + + + executable + histogramQuantile + Float64 + Array(Float64)buckets + Array(Float64)counts + Float64quantile + CSV + ./histogramQuantile + + diff --git a/docker/signoz/clickhouse/users.xml b/docker/signoz/clickhouse/users.xml new file mode 100644 index 0000000..4273c66 --- /dev/null +++ b/docker/signoz/clickhouse/users.xml @@ -0,0 +1,30 @@ + + + + + 10000000000 + random + + 1 + + + + + ::/0 + default + default + + + + + + 3600 + 0 + 0 + 0 + 0 + 0 + + + + diff --git a/docker/signoz/compose.yaml b/docker/signoz/compose.yaml index 05acb08..a675ba3 100644 --- a/docker/signoz/compose.yaml +++ b/docker/signoz/compose.yaml @@ -1,71 +1,123 @@ name: signoz services: + # =========================================================================== + # ZooKeeper — required for ClickHouse coordination + # =========================================================================== + zookeeper: + image: signoz/zookeeper:${ZOOKEEPER_VERSION:-3.7.1} + restart: unless-stopped + user: root + environment: + ZOO_SERVER_ID: 1 + ALLOW_ANONYMOUS_LOGIN: "yes" + ZOO_AUTOPURGE_INTERVAL: 1 + ZOO_ENABLE_PROMETHEUS_METRICS: "yes" + ZOO_PROMETHEUS_METRICS_PORT_NUMBER: 9141 + volumes: + - ./zookeeper-data:/bitnami/zookeeper + healthcheck: + test: + - CMD-SHELL + - curl -s -m 2 http://localhost:8080/commands/ruok | grep error | grep null + interval: 30s + timeout: 5s + retries: 3 + networks: + - signoz + + # =========================================================================== + # Init ClickHouse — installs histogramQuantile user script + # =========================================================================== + init-clickhouse: + image: clickhouse/clickhouse-server:${CLICKHOUSE_VERSION:-25.5} + restart: on-failure + command: + - bash + - -c + - | + version="v0.0.1" + node_os=$(uname -s | tr '[:upper:]' '[:lower:]') + node_arch=$(uname -m | sed s/aarch64/arm64/ | sed s/x86_64/amd64/) + echo "Fetching histogram-binary for ${node_os}/${node_arch}" + cd /tmp + wget -O histogram-quantile.tar.gz "https://github.com/SigNoz/signoz/releases/download/histogram-quantile%2F${version}/histogram-quantile_${node_os}_${node_arch}.tar.gz" + tar -xvzf histogram-quantile.tar.gz + mv histogram-quantile /var/lib/clickhouse/user_scripts/histogramQuantile + echo "Done." + volumes: + - ./clickhouse/user_scripts:/var/lib/clickhouse/user_scripts + networks: + - signoz + # =========================================================================== # ClickHouse — columnar storage for all telemetry data # =========================================================================== clickhouse: image: clickhouse/clickhouse-server:${CLICKHOUSE_VERSION:-25.5} restart: unless-stopped + tty: true + depends_on: + init-clickhouse: + condition: service_completed_successfully + zookeeper: + condition: service_healthy environment: - CLICKHOUSE_DB: ${CLICKHOUSE_DB:-signoz} - CLICKHOUSE_USER: ${CLICKHOUSE_USER:-admin} - CLICKHOUSE_PASSWORD: ${CLICKHOUSE_PASSWORD:-change-me-clickhouse-password} + CLICKHOUSE_SKIP_USER_SETUP: "1" volumes: + - ./clickhouse/config.xml:/etc/clickhouse-server/config.xml:ro + - ./clickhouse/users.xml:/etc/clickhouse-server/users.xml:ro + - ./clickhouse/custom-function.xml:/etc/clickhouse-server/custom-function.xml:ro + - ./clickhouse/user_scripts:/var/lib/clickhouse/user_scripts:ro + - ./clickhouse/config.d/cluster.xml:/etc/clickhouse-server/config.d/cluster.xml:ro - ./clickhouse-data:/var/lib/clickhouse healthcheck: - test: wget --no-verbose --tries=1 --spider http://localhost:8123/ping || exit 1 - interval: 5s - timeout: 3s - retries: 30 - start_period: 10s + test: + - CMD + - wget + - --spider + - -q + - 0.0.0.0:8123/ping + interval: 30s + timeout: 5s + retries: 3 + ulimits: + nproc: 65535 + nofile: + soft: 262144 + hard: 262144 networks: - signoz # =========================================================================== - # SigNoz — all-in-one observability platform (query service + UI + collector) - # =========================================================================== - # Replaces both Alloy (OTEL collector) and LGTM (Grafana/Prometheus/Tempo/Loki). - # Accepts OTLP gRPC (4317) and OTLP HTTP (4318) from all stacks. - # UI on port 3301. - # - # Docs: https://signoz.io/docs/install/docker/ + # SigNoz — query service + UI # =========================================================================== signoz: - image: signoz/signoz:${SIGNOZ_VERSION:-latest} + image: signoz/signoz:${SIGNOZ_VERSION:-v0.128.0} restart: unless-stopped depends_on: clickhouse: condition: service_healthy environment: - SIGNOZ_TELEMETRY_STORE: clickhouse - DSN: tcp://clickhouse:9000 - CLICKHOUSE_USER: ${CLICKHOUSE_USER:-admin} - CLICKHOUSE_PASSWORD: ${CLICKHOUSE_PASSWORD:-change-me-clickhouse-password} - CLICKHOUSE_DATABASE: ${CLICKHOUSE_DB:-signoz} - STORAGE: clickhouse - CLICKHOUSE_ENDPOINT: tcp://clickhouse:9000 - SIGNOZ_CLICKHOUSE_DSN: tcp://clickhouse:9000 + SIGNOZ_ALERTMANAGER_PROVIDER: signoz + SIGNOZ_TELEMETRYSTORE_CLICKHOUSE_DSN: tcp://clickhouse:9000 + SIGNOZ_SQLSTORE_SQLITE_PATH: /var/lib/signoz/signoz.db + SIGNOZ_TOKENIZER_JWT_SECRET: ${SIGNOZ_JWT_SECRET:-change-me-jwt-secret} ports: # SigNoz UI - - ${EXPOSE_SIGNOZ_UI_PORT:-3301}:3301 - # OTLP gRPC receiver - - ${EXPOSE_OTLP_GRPC_PORT:-4317}:4317 - # OTLP HTTP receiver - - ${EXPOSE_OTLP_HTTP_PORT:-4318}:4318 + - ${EXPOSE_SIGNOZ_UI_PORT:-8080}:8080 volumes: - ./signoz-data:/var/lib/signoz healthcheck: test: - CMD - wget - - --no-verbose - - --tries=1 - --spider - - http://localhost:3301/api/v1/health - interval: 15s + - -q + - localhost:8080/api/v1/health + interval: 30s timeout: 5s - retries: 10 + retries: 3 start_period: 30s networks: signoz: {} @@ -78,6 +130,66 @@ services: # aliases: # - signoz + # =========================================================================== + # OTEL Collector — receives OTLP from all stacks + # =========================================================================== + otel-collector: + image: signoz/signoz-otel-collector:${OTELCOL_VERSION:-v0.144.5} + restart: unless-stopped + depends_on: + clickhouse: + condition: service_healthy + entrypoint: + - /bin/sh + command: + - -c + - | + /signoz-otel-collector migrate sync check && + /signoz-otel-collector --config=/etc/otel-collector-config.yaml --manager-config=/etc/manager-config.yaml --copy-path=/var/tmp/collector-config.yaml + volumes: + - ./otel-collector/otel-collector-config.yaml:/etc/otel-collector-config.yaml:ro + - ./otel-collector/manager-config.yaml:/etc/manager-config.yaml:ro + environment: + OTEL_RESOURCE_ATTRIBUTES: host.name=signoz-host,os.type=linux + LOW_CARDINAL_EXCEPTION_GROUPING: "false" + SIGNOZ_OTEL_COLLECTOR_CLICKHOUSE_DSN: tcp://clickhouse:9000 + SIGNOZ_OTEL_COLLECTOR_CLICKHOUSE_CLUSTER: cluster + SIGNOZ_OTEL_COLLECTOR_CLICKHOUSE_REPLICATION: "true" + SIGNOZ_OTEL_COLLECTOR_TIMEOUT: 10m + ports: + # OTLP gRPC receiver + - ${EXPOSE_OTLP_GRPC_PORT:-4317}:4317 + # OTLP HTTP receiver + - ${EXPOSE_OTLP_HTTP_PORT:-4318}:4318 + networks: + - signoz + - pipeline + + # =========================================================================== + # Telemetry Store Migrator — runs ClickHouse migrations + # =========================================================================== + migrator: + image: signoz/signoz-otel-collector:${OTELCOL_VERSION:-v0.144.5} + restart: on-failure + depends_on: + clickhouse: + condition: service_healthy + environment: + SIGNOZ_OTEL_COLLECTOR_CLICKHOUSE_DSN: tcp://clickhouse:9000 + SIGNOZ_OTEL_COLLECTOR_CLICKHOUSE_CLUSTER: cluster + SIGNOZ_OTEL_COLLECTOR_CLICKHOUSE_REPLICATION: "true" + SIGNOZ_OTEL_COLLECTOR_TIMEOUT: 10m + entrypoint: + - /bin/sh + command: + - -c + - | + /signoz-otel-collector migrate bootstrap && + /signoz-otel-collector migrate sync up && + /signoz-otel-collector migrate async up + networks: + - signoz + networks: signoz: name: signoz diff --git a/docker/signoz/otel-collector/manager-config.yaml b/docker/signoz/otel-collector/manager-config.yaml new file mode 100644 index 0000000..7267607 --- /dev/null +++ b/docker/signoz/otel-collector/manager-config.yaml @@ -0,0 +1 @@ +server_endpoint: ws://signoz:4320/v1/opamp diff --git a/docker/signoz/otel-collector/otel-collector-config.yaml b/docker/signoz/otel-collector/otel-collector-config.yaml new file mode 100644 index 0000000..ee7724c --- /dev/null +++ b/docker/signoz/otel-collector/otel-collector-config.yaml @@ -0,0 +1,118 @@ +connectors: + signozmeter: + metrics_flush_interval: 1h + dimensions: + - name: service.name + - name: deployment.environment + - name: host.name +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + prometheus: + config: + global: + scrape_interval: 60s + scrape_configs: + - job_name: otel-collector + static_configs: + - targets: + - localhost:8888 + labels: + job_name: otel-collector +processors: + batch: + send_batch_size: 10000 + send_batch_max_size: 11000 + timeout: 10s + batch/meter: + send_batch_max_size: 25000 + send_batch_size: 20000 + timeout: 1s + resourcedetection: + # Using OTEL_RESOURCE_ATTRIBUTES envvar, env detector adds custom labels. + detectors: [env, system] + timeout: 2s + signozspanmetrics/delta: + metrics_exporter: signozclickhousemetrics + metrics_flush_interval: 60s + latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s ] + dimensions_cache_size: 100000 + aggregation_temporality: AGGREGATION_TEMPORALITY_DELTA + enable_exp_histogram: true + dimensions: + - name: service.namespace + default: default + - name: deployment.environment + default: default + # This is added to ensure the uniqueness of the timeseries + # Otherwise, identical timeseries produced by multiple replicas of + # collectors result in incorrect APM metrics + - name: signoz.collector.id + - name: service.version + - name: browser.platform + - name: browser.mobile + - name: k8s.cluster.name + - name: k8s.node.name + - name: k8s.namespace.name + - name: host.name + - name: host.type + - name: container.name +extensions: + health_check: + endpoint: 0.0.0.0:13133 + pprof: + endpoint: 0.0.0.0:1777 +exporters: + clickhousetraces: + datasource: tcp://clickhouse:9000/signoz_traces + low_cardinal_exception_grouping: ${env:LOW_CARDINAL_EXCEPTION_GROUPING} + use_new_schema: true + signozclickhousemetrics: + dsn: tcp://clickhouse:9000/signoz_metrics + clickhouselogsexporter: + dsn: tcp://clickhouse:9000/signoz_logs + timeout: 10s + use_new_schema: true + signozclickhousemeter: + dsn: tcp://clickhouse:9000/signoz_meter + timeout: 45s + sending_queue: + enabled: false + metadataexporter: + cache: + provider: in_memory + dsn: tcp://clickhouse:9000/signoz_metadata + enabled: true + timeout: 45s +service: + telemetry: + logs: + encoding: json + extensions: + - health_check + - pprof + pipelines: + traces: + receivers: [otlp] + processors: [signozspanmetrics/delta, batch] + exporters: [clickhousetraces, metadataexporter, signozmeter] + metrics: + receivers: [otlp] + processors: [batch] + exporters: [signozclickhousemetrics, metadataexporter, signozmeter] + metrics/prometheus: + receivers: [prometheus] + processors: [batch] + exporters: [signozclickhousemetrics, metadataexporter, signozmeter] + logs: + receivers: [otlp] + processors: [batch] + exporters: [clickhouselogsexporter, metadataexporter, signozmeter] + metrics/meter: + receivers: [signozmeter] + processors: [batch/meter] + exporters: [signozclickhousemeter] diff --git a/docker/signoz/swag/signoz.subdomain.conf b/docker/signoz/swag/signoz.subdomain.conf index 9a94fbe..ab471cf 100644 --- a/docker/signoz/swag/signoz.subdomain.conf +++ b/docker/signoz/swag/signoz.subdomain.conf @@ -1,7 +1,7 @@ ## ----------------------------------------------------------------------------- ## SWAG proxy config for SigNoz ## Domain: signoz.ld50.xyz -## Upstream: signoz:3301 (shared Docker network: ${NETWORKS_EXTERNAL_NAME:-swag}) +## Upstream: signoz:8080 (shared Docker network: ${NETWORKS_EXTERNAL_NAME:-swag}) ## ## Install: ## 1) Copy this file into SWAG: /config/nginx/proxy-confs/signoz.subdomain.conf @@ -22,7 +22,7 @@ server { include /config/nginx/proxy.conf; set $upstream_app signoz; - set $upstream_port 3301; + set $upstream_port 8080; set $upstream_proto http; proxy_pass $upstream_proto://$upstream_app:$upstream_port;