signoz updates

This commit is contained in:
Robbie 2026-06-16 12:19:45 -04:00
parent ebcc6e4d2d
commit 04e233216c
9 changed files with 491 additions and 41 deletions

View file

@ -14,21 +14,35 @@
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
# Pin a specific version for reproducibility. Check releases at: # Pin a specific version for reproducibility. Check releases at:
# https://github.com/SigNoz/signoz/releases # https://github.com/SigNoz/signoz/releases
SIGNOZ_VERSION=latest SIGNOZ_VERSION=v0.128.0
# -----------------------------------------------------------------------------
# OTEL Collector Version
# -----------------------------------------------------------------------------
OTELCOL_VERSION=v0.144.5
# -----------------------------------------------------------------------------
# ZooKeeper Version
# -----------------------------------------------------------------------------
ZOOKEEPER_VERSION=3.7.1
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
# ClickHouse # ClickHouse
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
CLICKHOUSE_VERSION=25.5 CLICKHOUSE_VERSION=25.5
CLICKHOUSE_DB=signoz
CLICKHOUSE_USER=admin # -----------------------------------------------------------------------------
CLICKHOUSE_PASSWORD=change-me-clickhouse-password # JWT Secret (REQUIRED)
# -----------------------------------------------------------------------------
# Generate with: openssl rand -hex 32
# Without this, user sessions are vulnerable to tampering.
SIGNOZ_JWT_SECRET=change-me-generate-with-openssl-rand-hex-32
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
# Exposed Ports # Exposed Ports
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
# SigNoz UI # SigNoz UI
EXPOSE_SIGNOZ_UI_PORT=3301 EXPOSE_SIGNOZ_UI_PORT=8080
# OTLP gRPC receiver (used by instrumented apps/services) # OTLP gRPC receiver (used by instrumented apps/services)
EXPOSE_OTLP_GRPC_PORT=4317 EXPOSE_OTLP_GRPC_PORT=4317
# OTLP HTTP receiver (used by instrumented apps/services) # OTLP HTTP receiver (used by instrumented apps/services)

View file

@ -0,0 +1,19 @@
<?xml version="1.0"?>
<clickhouse>
<zookeeper>
<node index="1">
<host>zookeeper</host>
<port>2181</port>
</node>
</zookeeper>
<remote_servers>
<cluster>
<shard>
<replica>
<host>clickhouse</host>
<port>9000</port>
</replica>
</shard>
</cluster>
</remote_servers>
</clickhouse>

View file

@ -0,0 +1,144 @@
<?xml version="1.0"?>
<clickhouse>
<logger>
<level>information</level>
<formatting><type>json</type></formatting>
<log>/var/log/clickhouse-server/clickhouse-server.log</log>
<errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>
<size>1000M</size>
<count>10</count>
</logger>
<http_port>8123</http_port>
<tcp_port>9000</tcp_port>
<mysql_port>9004</mysql_port>
<postgresql_port>9005</postgresql_port>
<interserver_http_port>9009</interserver_http_port>
<max_connections>4096</max_connections>
<keep_alive_timeout>3</keep_alive_timeout>
<max_concurrent_queries>100</max_concurrent_queries>
<max_server_memory_usage>0</max_server_memory_usage>
<max_thread_pool_size>10000</max_thread_pool_size>
<max_server_memory_usage_to_ram_ratio>0.9</max_server_memory_usage_to_ram_ratio>
<total_memory_profiler_step>4194304</total_memory_profiler_step>
<total_memory_tracker_sample_probability>0</total_memory_tracker_sample_probability>
<uncompressed_cache_size>8589934592</uncompressed_cache_size>
<mark_cache_size>5368709120</mark_cache_size>
<mmap_cache_size>1000</mmap_cache_size>
<compiled_expression_cache_size>134217728</compiled_expression_cache_size>
<compiled_expression_cache_elements_size>10000</compiled_expression_cache_elements_size>
<path>/var/lib/clickhouse/</path>
<tmp_path>/var/lib/clickhouse/tmp/</tmp_path>
<user_files_path>/var/lib/clickhouse/user_files/</user_files_path>
<user_scripts_path>/var/lib/clickhouse/user_scripts/</user_scripts_path>
<mlock_executable>true</mlock_executable>
<remap_executable>false</remap_executable>
<default_profile>default</default_profile>
<default_database>default</default_database>
<custom_settings_prefixes></custom_settings_prefixes>
<builtin_dictionaries_reload_interval>3600</builtin_dictionaries_reload_interval>
<max_session_timeout>3600</max_session_timeout>
<default_session_timeout>60</default_session_timeout>
<prometheus>
<endpoint>/metrics</endpoint>
<port>9363</port>
<metrics>true</metrics>
<events>true</events>
<asynchronous_metrics>true</asynchronous_metrics>
<status_info>true</status_info>
</prometheus>
<query_log>
<database>system</database>
<table>query_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_log>
<trace_log>
<database>system</database>
<table>trace_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</trace_log>
<query_thread_log>
<database>system</database>
<table>query_thread_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_thread_log>
<query_views_log>
<database>system</database>
<table>query_views_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_views_log>
<part_log>
<database>system</database>
<table>part_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</part_log>
<metric_log>
<database>system</database>
<table>metric_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
</metric_log>
<asynchronous_metric_log>
<database>system</database>
<table>asynchronous_metric_log</table>
<flush_interval_milliseconds>7000</flush_interval_milliseconds>
</asynchronous_metric_log>
<opentelemetry_span_log>
<engine>
engine MergeTree
partition by toYYYYMM(finish_date)
order by (finish_date, finish_time_us, trace_id)
</engine>
<database>system</database>
<table>opentelemetry_span_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</opentelemetry_span_log>
<crash_log>
<database>system</database>
<table>crash_log</table>
<partition_by />
<flush_interval_milliseconds>1000</flush_interval_milliseconds>
</crash_log>
<processors_profile_log>
<database>system</database>
<table>processors_profile_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</processors_profile_log>
<merge_tree_metadata_cache>
<lru_cache_size>268435456</lru_cache_size>
<continue_if_corrupted>true</continue_if_corrupted>
</merge_tree_metadata_cache>
<format_schema_path>/var/lib/clickhouse/format_schemas/</format_schema_path>
<top_level_domains_lists></top_level_domains_lists>
<user_directories>
<users_xml><path>users.xml</path></users_xml>
<local_directory><path>/var/lib/clickhouse/access/</path></local_directory>
</user_directories>
<quotas><default><interval><duration>3600</duration><queries>0</queries><errors>0</errors><result_rows>0</result_rows><read_rows>0</read_rows><execution_time>0</execution_time></interval></default></quotas>
<distributed_ddl>
<path>/clickhouse/task_queue/ddl</path>
</distributed_ddl>
<graphite_rollup_example>
<pattern><regexp>click_cost</regexp><function>any</function><retention><age>0</age><precision>3600</precision></retention><retention><age>86400</age><precision>60</precision></retention></pattern>
<default><function>max</function><retention><age>0</age><precision>60</precision></retention><retention><age>3600</age><precision>300</precision></retention><retention><age>86400</age><precision>3600</precision></retention></default>
</graphite_rollup_example>
<query_masking_rules>
<rule>
<name>hide encrypt/decrypt arguments</name>
<regexp>((?:aes_)?(?:encrypt|decrypt)(?:_mysql)?)\s*\(\s*\'(?:\\\\\'|.)+\'|.*?\s*\)</regexp>
<replace>\1(???)</replace>
</rule>
</query_masking_rules>
<send_crash_reports>
<enabled>false</enabled>
<anonymize>false</anonymize>
</send_crash_reports>
<encryption_codecs></encryption_codecs>
<dictionaries_config>*_dictionary.xml</dictionaries_config>
<user_defined_executable_functions_config>*function.xml</user_defined_executable_functions_config>
</clickhouse>

View file

@ -0,0 +1,12 @@
<functions>
<function>
<type>executable</type>
<name>histogramQuantile</name>
<return_type>Float64</return_type>
<argument><type>Array(Float64)</type><name>buckets</name></argument>
<argument><type>Array(Float64)</type><name>counts</name></argument>
<argument><type>Float64</type><name>quantile</name></argument>
<format>CSV</format>
<command>./histogramQuantile</command>
</function>
</functions>

View file

@ -0,0 +1,30 @@
<?xml version="1.0"?>
<clickhouse>
<profiles>
<default>
<max_memory_usage>10000000000</max_memory_usage>
<load_balancing>random</load_balancing>
</default>
<readonly><readonly>1</readonly></readonly>
</profiles>
<users>
<default>
<password></password>
<networks><ip>::/0</ip></networks>
<profile>default</profile>
<quota>default</quota>
</default>
</users>
<quotas>
<default>
<interval>
<duration>3600</duration>
<queries>0</queries>
<errors>0</errors>
<result_rows>0</result_rows>
<read_rows>0</read_rows>
<execution_time>0</execution_time>
</interval>
</default>
</quotas>
</clickhouse>

View file

@ -1,71 +1,123 @@
name: signoz name: signoz
services: services:
# ===========================================================================
# ZooKeeper — required for ClickHouse coordination
# ===========================================================================
zookeeper:
image: signoz/zookeeper:${ZOOKEEPER_VERSION:-3.7.1}
restart: unless-stopped
user: root
environment:
ZOO_SERVER_ID: 1
ALLOW_ANONYMOUS_LOGIN: "yes"
ZOO_AUTOPURGE_INTERVAL: 1
ZOO_ENABLE_PROMETHEUS_METRICS: "yes"
ZOO_PROMETHEUS_METRICS_PORT_NUMBER: 9141
volumes:
- ./zookeeper-data:/bitnami/zookeeper
healthcheck:
test:
- CMD-SHELL
- curl -s -m 2 http://localhost:8080/commands/ruok | grep error | grep null
interval: 30s
timeout: 5s
retries: 3
networks:
- signoz
# ===========================================================================
# Init ClickHouse — installs histogramQuantile user script
# ===========================================================================
init-clickhouse:
image: clickhouse/clickhouse-server:${CLICKHOUSE_VERSION:-25.5}
restart: on-failure
command:
- bash
- -c
- |
version="v0.0.1"
node_os=$(uname -s | tr '[:upper:]' '[:lower:]')
node_arch=$(uname -m | sed s/aarch64/arm64/ | sed s/x86_64/amd64/)
echo "Fetching histogram-binary for ${node_os}/${node_arch}"
cd /tmp
wget -O histogram-quantile.tar.gz "https://github.com/SigNoz/signoz/releases/download/histogram-quantile%2F${version}/histogram-quantile_${node_os}_${node_arch}.tar.gz"
tar -xvzf histogram-quantile.tar.gz
mv histogram-quantile /var/lib/clickhouse/user_scripts/histogramQuantile
echo "Done."
volumes:
- ./clickhouse/user_scripts:/var/lib/clickhouse/user_scripts
networks:
- signoz
# =========================================================================== # ===========================================================================
# ClickHouse — columnar storage for all telemetry data # ClickHouse — columnar storage for all telemetry data
# =========================================================================== # ===========================================================================
clickhouse: clickhouse:
image: clickhouse/clickhouse-server:${CLICKHOUSE_VERSION:-25.5} image: clickhouse/clickhouse-server:${CLICKHOUSE_VERSION:-25.5}
restart: unless-stopped restart: unless-stopped
tty: true
depends_on:
init-clickhouse:
condition: service_completed_successfully
zookeeper:
condition: service_healthy
environment: environment:
CLICKHOUSE_DB: ${CLICKHOUSE_DB:-signoz} CLICKHOUSE_SKIP_USER_SETUP: "1"
CLICKHOUSE_USER: ${CLICKHOUSE_USER:-admin}
CLICKHOUSE_PASSWORD: ${CLICKHOUSE_PASSWORD:-change-me-clickhouse-password}
volumes: volumes:
- ./clickhouse/config.xml:/etc/clickhouse-server/config.xml:ro
- ./clickhouse/users.xml:/etc/clickhouse-server/users.xml:ro
- ./clickhouse/custom-function.xml:/etc/clickhouse-server/custom-function.xml:ro
- ./clickhouse/user_scripts:/var/lib/clickhouse/user_scripts:ro
- ./clickhouse/config.d/cluster.xml:/etc/clickhouse-server/config.d/cluster.xml:ro
- ./clickhouse-data:/var/lib/clickhouse - ./clickhouse-data:/var/lib/clickhouse
healthcheck: healthcheck:
test: wget --no-verbose --tries=1 --spider http://localhost:8123/ping || exit 1 test:
interval: 5s - CMD
timeout: 3s - wget
retries: 30 - --spider
start_period: 10s - -q
- 0.0.0.0:8123/ping
interval: 30s
timeout: 5s
retries: 3
ulimits:
nproc: 65535
nofile:
soft: 262144
hard: 262144
networks: networks:
- signoz - signoz
# =========================================================================== # ===========================================================================
# SigNoz — all-in-one observability platform (query service + UI + collector) # SigNoz — query service + UI
# ===========================================================================
# Replaces both Alloy (OTEL collector) and LGTM (Grafana/Prometheus/Tempo/Loki).
# Accepts OTLP gRPC (4317) and OTLP HTTP (4318) from all stacks.
# UI on port 3301.
#
# Docs: https://signoz.io/docs/install/docker/
# =========================================================================== # ===========================================================================
signoz: signoz:
image: signoz/signoz:${SIGNOZ_VERSION:-latest} image: signoz/signoz:${SIGNOZ_VERSION:-v0.128.0}
restart: unless-stopped restart: unless-stopped
depends_on: depends_on:
clickhouse: clickhouse:
condition: service_healthy condition: service_healthy
environment: environment:
SIGNOZ_TELEMETRY_STORE: clickhouse SIGNOZ_ALERTMANAGER_PROVIDER: signoz
DSN: tcp://clickhouse:9000 SIGNOZ_TELEMETRYSTORE_CLICKHOUSE_DSN: tcp://clickhouse:9000
CLICKHOUSE_USER: ${CLICKHOUSE_USER:-admin} SIGNOZ_SQLSTORE_SQLITE_PATH: /var/lib/signoz/signoz.db
CLICKHOUSE_PASSWORD: ${CLICKHOUSE_PASSWORD:-change-me-clickhouse-password} SIGNOZ_TOKENIZER_JWT_SECRET: ${SIGNOZ_JWT_SECRET:-change-me-jwt-secret}
CLICKHOUSE_DATABASE: ${CLICKHOUSE_DB:-signoz}
STORAGE: clickhouse
CLICKHOUSE_ENDPOINT: tcp://clickhouse:9000
SIGNOZ_CLICKHOUSE_DSN: tcp://clickhouse:9000
ports: ports:
# SigNoz UI # SigNoz UI
- ${EXPOSE_SIGNOZ_UI_PORT:-3301}:3301 - ${EXPOSE_SIGNOZ_UI_PORT:-8080}:8080
# OTLP gRPC receiver
- ${EXPOSE_OTLP_GRPC_PORT:-4317}:4317
# OTLP HTTP receiver
- ${EXPOSE_OTLP_HTTP_PORT:-4318}:4318
volumes: volumes:
- ./signoz-data:/var/lib/signoz - ./signoz-data:/var/lib/signoz
healthcheck: healthcheck:
test: test:
- CMD - CMD
- wget - wget
- --no-verbose
- --tries=1
- --spider - --spider
- http://localhost:3301/api/v1/health - -q
interval: 15s - localhost:8080/api/v1/health
interval: 30s
timeout: 5s timeout: 5s
retries: 10 retries: 3
start_period: 30s start_period: 30s
networks: networks:
signoz: {} signoz: {}
@ -78,6 +130,66 @@ services:
# aliases: # aliases:
# - signoz # - signoz
# ===========================================================================
# OTEL Collector — receives OTLP from all stacks
# ===========================================================================
otel-collector:
image: signoz/signoz-otel-collector:${OTELCOL_VERSION:-v0.144.5}
restart: unless-stopped
depends_on:
clickhouse:
condition: service_healthy
entrypoint:
- /bin/sh
command:
- -c
- |
/signoz-otel-collector migrate sync check &&
/signoz-otel-collector --config=/etc/otel-collector-config.yaml --manager-config=/etc/manager-config.yaml --copy-path=/var/tmp/collector-config.yaml
volumes:
- ./otel-collector/otel-collector-config.yaml:/etc/otel-collector-config.yaml:ro
- ./otel-collector/manager-config.yaml:/etc/manager-config.yaml:ro
environment:
OTEL_RESOURCE_ATTRIBUTES: host.name=signoz-host,os.type=linux
LOW_CARDINAL_EXCEPTION_GROUPING: "false"
SIGNOZ_OTEL_COLLECTOR_CLICKHOUSE_DSN: tcp://clickhouse:9000
SIGNOZ_OTEL_COLLECTOR_CLICKHOUSE_CLUSTER: cluster
SIGNOZ_OTEL_COLLECTOR_CLICKHOUSE_REPLICATION: "true"
SIGNOZ_OTEL_COLLECTOR_TIMEOUT: 10m
ports:
# OTLP gRPC receiver
- ${EXPOSE_OTLP_GRPC_PORT:-4317}:4317
# OTLP HTTP receiver
- ${EXPOSE_OTLP_HTTP_PORT:-4318}:4318
networks:
- signoz
- pipeline
# ===========================================================================
# Telemetry Store Migrator — runs ClickHouse migrations
# ===========================================================================
migrator:
image: signoz/signoz-otel-collector:${OTELCOL_VERSION:-v0.144.5}
restart: on-failure
depends_on:
clickhouse:
condition: service_healthy
environment:
SIGNOZ_OTEL_COLLECTOR_CLICKHOUSE_DSN: tcp://clickhouse:9000
SIGNOZ_OTEL_COLLECTOR_CLICKHOUSE_CLUSTER: cluster
SIGNOZ_OTEL_COLLECTOR_CLICKHOUSE_REPLICATION: "true"
SIGNOZ_OTEL_COLLECTOR_TIMEOUT: 10m
entrypoint:
- /bin/sh
command:
- -c
- |
/signoz-otel-collector migrate bootstrap &&
/signoz-otel-collector migrate sync up &&
/signoz-otel-collector migrate async up
networks:
- signoz
networks: networks:
signoz: signoz:
name: signoz name: signoz

View file

@ -0,0 +1 @@
server_endpoint: ws://signoz:4320/v1/opamp

View file

@ -0,0 +1,118 @@
connectors:
signozmeter:
metrics_flush_interval: 1h
dimensions:
- name: service.name
- name: deployment.environment
- name: host.name
receivers:
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318
prometheus:
config:
global:
scrape_interval: 60s
scrape_configs:
- job_name: otel-collector
static_configs:
- targets:
- localhost:8888
labels:
job_name: otel-collector
processors:
batch:
send_batch_size: 10000
send_batch_max_size: 11000
timeout: 10s
batch/meter:
send_batch_max_size: 25000
send_batch_size: 20000
timeout: 1s
resourcedetection:
# Using OTEL_RESOURCE_ATTRIBUTES envvar, env detector adds custom labels.
detectors: [env, system]
timeout: 2s
signozspanmetrics/delta:
metrics_exporter: signozclickhousemetrics
metrics_flush_interval: 60s
latency_histogram_buckets: [100us, 1ms, 2ms, 6ms, 10ms, 50ms, 100ms, 250ms, 500ms, 1000ms, 1400ms, 2000ms, 5s, 10s, 20s, 40s, 60s ]
dimensions_cache_size: 100000
aggregation_temporality: AGGREGATION_TEMPORALITY_DELTA
enable_exp_histogram: true
dimensions:
- name: service.namespace
default: default
- name: deployment.environment
default: default
# This is added to ensure the uniqueness of the timeseries
# Otherwise, identical timeseries produced by multiple replicas of
# collectors result in incorrect APM metrics
- name: signoz.collector.id
- name: service.version
- name: browser.platform
- name: browser.mobile
- name: k8s.cluster.name
- name: k8s.node.name
- name: k8s.namespace.name
- name: host.name
- name: host.type
- name: container.name
extensions:
health_check:
endpoint: 0.0.0.0:13133
pprof:
endpoint: 0.0.0.0:1777
exporters:
clickhousetraces:
datasource: tcp://clickhouse:9000/signoz_traces
low_cardinal_exception_grouping: ${env:LOW_CARDINAL_EXCEPTION_GROUPING}
use_new_schema: true
signozclickhousemetrics:
dsn: tcp://clickhouse:9000/signoz_metrics
clickhouselogsexporter:
dsn: tcp://clickhouse:9000/signoz_logs
timeout: 10s
use_new_schema: true
signozclickhousemeter:
dsn: tcp://clickhouse:9000/signoz_meter
timeout: 45s
sending_queue:
enabled: false
metadataexporter:
cache:
provider: in_memory
dsn: tcp://clickhouse:9000/signoz_metadata
enabled: true
timeout: 45s
service:
telemetry:
logs:
encoding: json
extensions:
- health_check
- pprof
pipelines:
traces:
receivers: [otlp]
processors: [signozspanmetrics/delta, batch]
exporters: [clickhousetraces, metadataexporter, signozmeter]
metrics:
receivers: [otlp]
processors: [batch]
exporters: [signozclickhousemetrics, metadataexporter, signozmeter]
metrics/prometheus:
receivers: [prometheus]
processors: [batch]
exporters: [signozclickhousemetrics, metadataexporter, signozmeter]
logs:
receivers: [otlp]
processors: [batch]
exporters: [clickhouselogsexporter, metadataexporter, signozmeter]
metrics/meter:
receivers: [signozmeter]
processors: [batch/meter]
exporters: [signozclickhousemeter]

View file

@ -1,7 +1,7 @@
## ----------------------------------------------------------------------------- ## -----------------------------------------------------------------------------
## SWAG proxy config for SigNoz ## SWAG proxy config for SigNoz
## Domain: signoz.ld50.xyz ## Domain: signoz.ld50.xyz
## Upstream: signoz:3301 (shared Docker network: ${NETWORKS_EXTERNAL_NAME:-swag}) ## Upstream: signoz:8080 (shared Docker network: ${NETWORKS_EXTERNAL_NAME:-swag})
## ##
## Install: ## Install:
## 1) Copy this file into SWAG: /config/nginx/proxy-confs/signoz.subdomain.conf ## 1) Copy this file into SWAG: /config/nginx/proxy-confs/signoz.subdomain.conf
@ -22,7 +22,7 @@ server {
include /config/nginx/proxy.conf; include /config/nginx/proxy.conf;
set $upstream_app signoz; set $upstream_app signoz;
set $upstream_port 3301; set $upstream_port 8080;
set $upstream_proto http; set $upstream_proto http;
proxy_pass $upstream_proto://$upstream_app:$upstream_port; proxy_pass $upstream_proto://$upstream_app:$upstream_port;