[ PROMPT_NODE_23792 ]
clickhouse-io
[ SKILL_DOCUMENTATION ]
# ClickHouse 分析模式
针对高性能分析和数据工程的 ClickHouse 特定模式。
## 概述
ClickHouse 是一个用于联机分析处理 (OLAP) 的列式数据库管理系统 (DBMS)。它针对大规模数据集上的快速分析查询进行了优化。
**关键特性:**
- 列式存储
- 数据压缩
- 并行查询执行
- 分布式查询
- 实时分析
## 表设计模式
### MergeTree 引擎 (最常用)
sql
CREATE TABLE markets_analytics (
date Date,
market_id String,
market_name String,
volume UInt64,
trades UInt32,
unique_traders UInt32,
avg_trade_size Float64,
created_at DateTime
) ENGINE = MergeTree()
PARTITION BY toYYYYMM(date)
ORDER BY (date, market_id)
SETTINGS index_granularity = 8192;
### ReplacingMergeTree (去重)
sql
-- 用于可能存在重复的数据 (例如来自多个源)
CREATE TABLE user_events (
event_id String,
user_id String,
event_type String,
timestamp DateTime,
properties String
) ENGINE = ReplacingMergeTree()
PARTITION BY toYYYYMM(timestamp)
ORDER BY (user_id, event_id, timestamp)
PRIMARY KEY (user_id, event_id);
### AggregatingMergeTree (预聚合)
sql
-- 用于维护聚合指标
CREATE TABLE market_stats_hourly (
hour DateTime,
market_id String,
total_volume AggregateFunction(sum, UInt64),
total_trades AggregateFunction(count, UInt32),
unique_users AggregateFunction(uniq, String)
) ENGINE = AggregatingMergeTree()
PARTITION BY toYYYYMM(hour)
ORDER BY (hour, market_id);
-- 查询聚合数据
SELECT
hour,
market_id,
sumMerge(total_volume) AS volume,
countMerge(total_trades) AS trades,
uniqMerge(unique_users) AS users
FROM market_stats_hourly
WHERE hour >= toStartOfHour(now() - INTERVAL 24 HOUR)
GROUP BY hour, market_id
ORDER BY hour DESC;
## 查询优化模式
### 高效过滤
sql
-- ✅ 好:优先使用索引列
SELECT *
FROM markets_analytics
WHERE date >= '2025-01-01'
AND market_id = 'market-123'
AND volume > 1000
ORDER BY date DESC
LIMIT 100;
-- ❌ 差:优先在非索引列上过滤
SELECT *
FROM markets_analytics
WHERE volume > 1000
AND market_name LIKE '%election%'
AND date >= '2025-01-01';
### 聚合
sql
-- ✅ 好:使用 ClickHouse 特有的聚合函数
SELECT
toStartOfDay(created_at) AS day,
market_id,
sum(volume) AS