SELECT, INSERT, агрегатные функции, оконные функции, функции для работы с массивами, JOIN, подзапросы
Полный гид по SQL-диалекту ClickHouse: от базовых запросов до оконных функций и работы с массивами
SELECT
column1,
column2,
function(column3) AS alias
FROM table_name
WHERE condition
GROUP BY column1
HAVING condition
ORDER BY column2 DESC
LIMIT 100;Важно: ORDER BY в CREATE TABLE и в SELECT — это разные вещи:
-- ORDER BY в CREATE TABLE: физическая сортировка данных
CREATE TABLE events
ENGINE = MergeTree()
ORDER BY (event_date, user_id); -- Определяет индекс
-- ORDER BY в SELECT: порядок результатов
SELECT * FROM events
ORDER BY event_date DESC; -- Сортировка результата-- Уникальные значения
SELECT DISTINCT user_id FROM events;
-- По нескольким колонкам
SELECT DISTINCT country, city FROM events;
-- DISTINCT с агрегатами
SELECT
country,
COUNT(DISTINCT user_id) AS unique_users
FROM events
GROUP BY country;SELECT
count() AS total_rows, -- Все строки
count(user_id) AS non_null, -- Без NULL
countDistinct(user_id) AS unique, -- COUNT(DISTINCT ...)
sum(amount) AS total_amount,
avg(amount) AS avg_amount,
min(amount) AS min_amount,
max(amount) AS max_amount,
groupArray(amount) AS amounts_array -- Массив значений
FROM transactions;SELECT
-- Квантили
quantile(0.5)(response_time) AS p50,
quantile(0.9)(response_time) AS p90,
quantile(0.95)(response_time) AS p95,
quantile(0.99)(response_time) AS p99,
-- Точные квантили (дороже)
quantileExact(0.5)(response_time) AS exact_p50,
-- Для временных данных (быстрее)
quantileTiming(0.5)(response_time) AS timing_p50,
-- Среднеквадратичное отклонение
stddevPop(response_time) AS stddev,
-- Медиана
median(response_time) AS median
FROM api_logs;SELECT
-- Приблизительный подсчёт (быстро, память ~1KB)
uniq(user_id) AS unique_users,
-- Точный подсчёт (медленно для больших данных)
uniqExact(user_id) AS exact_users,
-- Комбинированный (авто-выбор)
uniqCombined(user_id) AS combined_users,
-- Для больших данных (>1B)
uniqCombined64(user_id) AS users_64
FROM events;Когда какой использовать:
uniq — по умолчанию, ошибка ~1-2%uniqExact — когда нужна точностьuniqCombined — компромисс точности и скоростиSELECT
-- Фильтрация внутри агрегата
sumIf(amount, status = 'completed') AS completed_amount,
countIf(status = 'failed') AS failed_count,
avgIf(response_time, response_time < 1000) AS avg_fast,
-- Минимум/максимум с условием
minIf(created_at, status = 'active') AS first_active,
maxIf(created_at, status = 'active') AS last_active,
-- Аргумент для мин/макс
argMin(user_id, created_at) AS first_user,
argMax(user_id, created_at) AS last_user
FROM orders;-- Группировка по одной колонке
SELECT
country,
count() AS events
FROM page_views
GROUP BY country
ORDER BY events DESC
LIMIT 10;
-- Группировка по нескольким колонкам
SELECT
country,
city,
count() AS events
FROM page_views
GROUP BY country, city
ORDER BY country, events DESC;Автоматическое создание иерархических агрегатов:
SELECT
country,
city,
count() AS events
FROM page_views
GROUP BY country, city WITH ROLLUP
ORDER BY country, city;Результат:
country | city | events
---------|---------|--------
RU | Moscow | 1000
RU | SPb | 500
RU | NULL | 1500 ← Итог по стране
US | NYC | 2000
US | LA | 800
US | NULL | 2800 ← Итог по стране
NULL | NULL | 4300 ← Общий итог
Все возможные комбинации группировки:
SELECT
country,
platform,
count() AS events
FROM page_views
GROUP BY country, platform WITH CUBE;Результат включает:
Явное указание наборов группировки:
SELECT
country,
platform,
count() AS events
FROM page_views
GROUPING SETS (
(country, platform), -- По стране и платформе
(country), -- Только по стране
(platform), -- Только по платформе
() -- Общий итог
);SELECT
event_time,
user_id,
amount,
-- Сумма по окну
sum(amount) OVER (PARTITION BY user_id ORDER BY event_time) AS running_total,
-- Номер строки
row_number() OVER (PARTITION BY user_id ORDER BY event_time) AS row_num,
-- Ранг
rank() OVER (PARTITION BY user_id ORDER BY amount DESC) AS rank,
-- Предыдущее значение
lag(amount) OVER (PARTITION BY user_id ORDER BY event_time) AS prev_amount,
-- Следующее значение
lead(amount) OVER (PARTITION BY user_id ORDER BY event_time) AS next_amount,
-- Среднее скользящее
avg(amount) OVER (
PARTITION BY user_id
ORDER BY event_time
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
) AS moving_avg
FROM transactions;SELECT
event_time,
value,
-- Скользящее среднее за 7 дней
avg(value) OVER (
ORDER BY event_time
RANGE BETWEEN INTERVAL 6 DAY PRECEDING AND CURRENT ROW
) AS avg_7d,
-- Сумма за последние 3 строки
sum(value) OVER (
ORDER BY event_time
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
) AS sum_3rows,
-- Сумма от начала до текущей строки
sum(value) OVER (
ORDER BY event_time
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS running_total
FROM metrics;SELECT
user_id,
event_time,
amount,
-- Первое значение в окне
first_value(amount) OVER (
PARTITION BY user_id
ORDER BY event_time
) AS first_amount,
-- Последнее значение
last_value(amount) OVER (
PARTITION BY user_id
ORDER BY event_time
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS last_amount,
-- N-ное значение
nth_value(amount, 3) OVER (
PARTITION BY user_id
ORDER BY event_time
) AS third_amount
FROM transactions;SELECT
-- Создание массива
array(1, 2, 3) AS arr,
[1, 2, 3] AS arr_literal,
-- Доступ к элементу
arrayElement(arr, 1) AS first, -- Индекс с 1!
arr[1] AS first_short,
-- Размер массива
arraySize(arr) AS size,
-- Наличие значения
has(arr, 2) AS has_two,
-- Индекс значения
indexOf(arr, 2) AS index_of_two
FROM numbers(1);SELECT
arr,
-- Преобразование каждого элемента
arrayMap(x -> x * 2, arr) AS doubled,
-- Фильтрация
arrayFilter(x -> x > 2, arr) AS filtered,
-- Сортировка
arraySort(x -> x, arr) AS sorted,
-- Обратный порядок
arrayReverse(arr) AS reversed,
-- Уникальные значения
arrayDistinct(arr) AS distinct,
-- Объединение массивов
arrayConcat([1, 2], [3, 4]) AS concatenated
FROM (SELECT [1, 2, 3, 2, 4] AS arr FROM numbers(1));-- Исходные данные
SELECT
user_id,
tags
FROM articles;
-- Результат:
-- user_id | tags
-- 1 | ['sql', 'database']
-- 2 | ['python', 'web']
-- arrayJoin превращает каждый элемент в строку
SELECT
user_id,
arrayJoin(tags) AS tag
FROM articles;
-- Результат:
-- user_id | tag
-- 1 | sql
-- 1 | database
-- 2 | python
-- 2 | webSELECT
-- Группировка в массив
groupArray(user_id) AS user_ids,
-- Уникальные в массиве
groupUniqArray(user_id) AS unique_users,
-- С ограничением размера
groupArray(10)(user_id) AS top_10_users,
-- Массив сумм
groupArraySum(amount) AS amounts_by_group,
-- Массив максимумов
groupArrayMax(amount) AS max_by_group
FROM transactions
GROUP BY date;-- INNER JOIN (только совпадения)
SELECT
o.order_id,
u.name
FROM orders AS o
INNER JOIN users AS u ON o.user_id = u.id;
-- LEFT JOIN (все из левой + совпадения)
SELECT
u.id,
u.name,
o.order_id
FROM users AS u
LEFT JOIN orders AS o ON u.id = o.user_id;
-- RIGHT JOIN (все из правой + совпадения)
SELECT
u.id,
o.order_id
FROM users AS u
RIGHT JOIN orders AS o ON u.id = o.user_id;
-- FULL JOIN (все строки)
SELECT
u.id,
o.order_id
FROM users AS u
FULL JOIN orders AS o ON u.id = o.user_id;-- Когда имена колонок совпадают
SELECT
u.name,
o.amount
FROM users AS u
LEFT JOIN orders AS o USING (user_id);-- Декартово произведение
SELECT
c.name AS category,
p.name AS product
FROM categories AS c
CROSS JOIN products AS p;JOIN по диапазону (для временных рядов):
SELECT
t.time,
t.price AS trade_price,
q.quote_price
FROM trades AS t
ASOF LEFT JOIN quotes AS q
ON t.symbol = q.symbol
AND t.time >= q.time -- Последняя котировка до сделки
ORDER BY t.time;-- ANY: любая подходящая строка (быстрее)
SELECT
o.user_id,
any(u.name) AS user_name
FROM orders AS o
LEFT ANY JOIN users AS u ON o.user_id = u.id
GROUP BY o.user_id;
-- ALL: все подходящие строки (по умолчанию)
SELECT
o.user_id,
u.name
FROM orders AS o
LEFT JOIN users AS u ON o.user_id = u.id;SELECT
user_id,
amount,
(SELECT avg(amount) FROM orders) AS avg_amount
FROM orders;SELECT
date,
daily_total,
avg(daily_total) OVER () AS monthly_avg
FROM (
SELECT
toDate(created_at) AS date,
sum(amount) AS daily_total
FROM orders
GROUP BY date
) AS daily_orders;-- Пользователи с заказами выше среднего
SELECT * FROM users
WHERE id IN (
SELECT user_id
FROM orders
WHERE amount > (SELECT avg(amount) FROM orders)
);
-- EXISTS для проверки наличия
SELECT * FROM users u
WHERE EXISTS (
SELECT 1 FROM orders o WHERE o.user_id = u.id
);WITH
daily_sales AS (
SELECT
toDate(created_at) AS date,
sum(amount) AS daily_total
FROM orders
GROUP BY date
),
monthly_avg AS (
SELECT
toStartOfMonth(date) AS month,
avg(daily_total) AS avg_daily
FROM daily_sales
GROUP BY month
)
SELECT
d.date,
d.daily_total,
m.avg_daily AS monthly_avg
FROM daily_sales AS d
JOIN monthly_avg AS m ON toStartOfMonth(d.date) = m.month
ORDER BY d.date;-- UNION ALL (с дубликатами, быстрее)
SELECT user_id, 'order' AS source FROM orders
UNION ALL
SELECT user_id, 'payment' AS source FROM payments;
-- UNION DISTINCT (без дубликатов)
SELECT user_id FROM orders
UNION DISTINCT
SELECT user_id FROM payments;SELECT
now() AS current_timestamp,
today() AS current_date,
-- Извлечение частей
toYear(event_time) AS year,
toMonth(event_time) AS month,
toDayOfMonth(event_time) AS day,
toHour(event_time) AS hour,
toMinute(event_time) AS minute,
toSecond(event_time) AS second,
toDayOfWeek(event_time) AS weekday, -- 1=Monday
-- Округление
toStartOfDay(event_time) AS day_start,
toStartOfWeek(event_time) AS week_start,
toStartOfMonth(event_time) AS month_start,
toStartOfQuarter(event_time) AS quarter_start,
toStartOfYear(event_time) AS year_start,
-- Усечение
date_trunc('hour', event_time) AS truncated_hour,
-- Добавление/вычитание
addDays(event_time, 7) AS next_week,
subtractMonths(event_time, 1) AS last_month,
-- Разница
dateDiff('hour', start_time, end_time) AS hours_diff
FROM events;SELECT
-- Длина
length(text) AS chars,
lengthUTF8(text) AS utf8_chars,
-- Поиск
position(text, 'pattern') AS pos, -- Позиция (1-based)
locate(text, 'pattern') AS locate, -- То же
-- Извлечение
substring(text, 1, 5) AS first_5,
left(text, 5) AS left_5,
right(text, 5) AS right_5,
-- Замена
replaceOne(text, 'old', 'new') AS replaced,
replaceAll(text, 'old', 'new') AS replaced_all,
-- Разделение
splitByString(',', text) AS parts,
splitByWhitespace(text) AS words,
-- Соединение
arrayStringConcat(['a', 'b', 'c'], ',') AS joined,
-- Регистр
lower(text) AS lowercase,
upper(text) AS uppercase,
-- Trim
trim(text) AS trimmed,
ltrim(text) AS left_trimmed,
rtrim(text) AS right_trimmed,
-- Проверка
startsWith(text, 'prefix') AS starts_with,
endsWith(text, 'suffix') AS ends_with,
-- Regex
match(text, 'regex') AS matches,
extract(text, '(\\d+)') AS extracted
FROM strings_table;SELECT
-- Проверка
isNull(value) AS is_null,
isNotNull(value) AS is_not_null,
-- Замена NULL
ifNull(value, 'default') AS with_default,
coalesce(value, 'a', 'b', 'c') AS first_non_null,
-- NULL для условия
nullIf(value, 0) AS null_if_zero -- NULL если value=0
FROM nullable_table;SELECT
-- Простое условие
if(amount > 100, 'high', 'low') AS category,
-- Много условий
multiIf(
amount < 10, 'small',
amount < 100, 'medium',
amount < 1000, 'large',
'xlarge'
) AS size,
-- CASE (стандартный SQL)
CASE
WHEN amount < 10 THEN 'small'
WHEN amount < 100 THEN 'medium'
ELSE 'large'
END AS size_case
FROM products;count, sum, avg, uniq, quantilerow_number, rank, lag, lead, sum OVERarrayJoin, arrayMap, arrayFilter, groupArrayINNER, LEFT, RIGHT, FULL, ASOF, ANY/ALLWITH для читаемости сложных запросовИзучим индексы и производительность: первичные, вторичные индексы, проекции и оптимизацию запросов.
Вопросы ещё не добавлены
Вопросы для этой подтемы ещё не добавлены.