| Таблица | Кортежей | Размер |
|---|---|---|
| genres | 19 | 8 192 B |
| links | 58 098 | 2 968 kB |
| movie_genres | 101 841 | 4 408 kB |
| movie_tags | 1 108 997 | 55 MB |
| movies | 58 098 | 3 400 kB |
| rating | 27 753 444 | 1 594 MB |
| tags | 74 715 | 3 728 kB |
Общий размер: ~2 298 MB
FOREIGN KEY объявлены, но никакие индексы не создавались.
select m.id, m.title, avg(r.rating)
from movies m
join movie_genres gm on (gm.movie_id = m.id)
join genres g on (g.id = gm.genre_id)
join movie_tags tm on (tm.movie_id = m.id)
join tags t on (t.id = tm.tag_id)
join ratings r on (r.movie_id = m.id)
where lower(g.name) = lower('Comedy')
and lower(t.name) like lower('Zombie%')
group by m.id, m.title
order by avg(r.rating) desc;
select m.id, m.title, avg(r.rating)
from movies m
join movie_genres gm on (gm.movie_id = m.id)
join genres g on (g.id = gm.genre_id)
join movie_tags tm on (tm.movie_id = m.id)
join tags t on (t.id = tm.tag_id)
join ratings r on (r.movie_id = m.id)
where lower(g.name) = lower('Comedy')
and lower(t.name) like lower('Zombie%')
group by m.id, m.title
order by avg(r.rating) desc;
select m.id, m.title, avg(r.rating)
from (
select m.id, m.title
from movies m
join movie_genres gm on (gm.movie_id = m.id)
join genres g on (g.id = gm.genre_id)
join movie_tags tm on (tm.movie_id = m.id)
join tags t on (t.id = tm.tag_id)
where lower(g.name) = lower('Comedy')
and lower(t.name) like lower('Zombie%')
group by m.id
) m
join ratings r on (r.movie_id = m.id)
group by m.id, m.title
order by avg(r.rating) desc;
select m.id, m.title, avg(r.rating)
from (
select m.id, m.title
from movies m
join movie_genres gm on (gm.movie_id = m.id)
join genres g on (g.id = gm.genre_id)
join movie_tags tm on (tm.movie_id = m.id)
join tags t on (t.id = tm.tag_id)
where lower(g.name) = lower('Comedy')
and lower(t.name) like lower('Zombie%')
group by m.id
) m
join ratings r on (r.movie_id = m.id)
group by m.id, m.title
order by avg(r.rating) desc;
SELECT E.*
FROM Employee E WHERE EXISTS (
SELECT *
FROM Department D WHERE D.DepartmentID = E.DepartmentID
);
SELECT E.*
FROM Employee E WHERE E.DepartmentID IN (
SELECT DepartmentID
FROM Department D
);
SELECT E.*
FROM Employee E
JOIN Department D ON (E.DepartmentID = D.DepartmentID);
Удалось ускорить в 93.90 раз! Победа?
Заменим 'Zombie%' на 'Comedy%' и снова более 10 секунд :(
select m.id, m.title, avg(r.rating)
from (
select m.id, m.title
from movies m
join movie_genres gm on (gm.movie_id = m.id)
join genres g on (g.id = gm.genre_id)
join movie_tags tm on (tm.movie_id = m.id)
join tags t on (t.id = tm.tag_id)
where lower(g.name) = lower('Comedy')
and lower(t.name) like lower('ZombieComedy%')
group by m.id
) m
join ratings r on (r.movie_id = m.id)
group by m.id, m.title
order by avg(r.rating) desc;
alter table movies
add column rating_sum float default 0 not null,
add column rating_cnt int default 0 not null;
update movies m
set
rating_cnt = r.rating_cnt,
rating_sum = r.rating_sum
from (
select movie_id, count(*) as rating_cnt, sum(rating) as rating_sum
from ratings
group by movie_id
) r where (m.id = r.movie_id);
create trigger ...;
select m.id, m.title, m.rating_sum / m.rating_cnt
from movies m
join movie_genres gm on (gm.movie_id = m.id)
join genres g on (g.id = gm.genre_id)
join movie_tags tm on (tm.movie_id = m.id)
join tags t on (t.id = tm.tag_id)
where lower(g.name) = lower('Comedy')
and lower(t.name) like lower('Zombie%')
and m.rating_cnt > 0
group by m.id
order by 3 desc
DELETE FROM films;
Для удаления всех записей из таблицы/таблиц есть отдельная команда
TRUNCATE TABLE films CASCADE;
Особенности:
SELECT COUNT(*) FROM ratings;
Если нужно ориентировочное количество записей в таблице, то можно выполнить запрос вида:
SELECT reltuples::bigint AS estimate
FROM pg_class
WHERE oid = 'ratings'::regclass;
SELECT COUNT(*) FROM (
SELECT * FROM tags WHERE name LIKE 'Zombie%'
) as t;
Если нужно ориентировочное количество записей в результате выполнения запроса, то можно получить их из плана выполнения:
EXPLAIN SELECT * FROM tags WHERE name LIKE 'Comedy%';
CREATE FUNCTION count_estimate(query text) RETURNS INTEGER AS
$func$
DECLARE
rec record;
ROWS INTEGER;
BEGIN
FOR rec IN EXECUTE 'EXPLAIN ' || query LOOP
ROWS := SUBSTRING(rec."QUERY PLAN" FROM ' rows=([[:digit:]]+)');
EXIT WHEN ROWS IS NOT NULL;
END LOOP;
RETURN ROWS;
END
$func$ LANGUAGE plpgsql;
SELECT count_estimate('SELECT * FROM tags WHERE name LIKE ''Comedy%''');
SELECT * FROM movies
ORDER BY title LIMIT 100 OFFSET 2000;
SELECT * FROM movies
WHERE title > 'Angry Video Game Nerd: The Movie'
ORDER BY title LIMIT 100;
SELECT *
FROM movies
ORDER BY title
LIMIT 10 OFFSET 100;
SELECT m.*
FROM movies m
JOIN (
SELECT id
FROM movies m
ORDER BY title
LIMIT 10 OFFSET 100
) l ON (m.id = l.id)
ORDER BY title;
xxx: биг дата - это кладбище данных
xxx: раз в столетие туда приходит спиритист чтобы спросить какого-то мертвеца о чем-то - и снова тишина..
CREATE TABLE test (
id SERIAL PRIMARY KEY,
title TEXT,
created_on TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_test_created_on ON test (created_on);
CREATE TABLE test_1
(CHECK ( id >= 100 AND id < 200 ))
INHERITS (test);
CREATE TABLE test_2 ( LIKE test INCLUDING ALL );
ALTER TABLE test_2
INHERIT test,
ADD CONSTRAINT partition_check CHECK (id >= 200 and id < 300);
CREATE EXTENSION pg_pathman;
CREATE TABLE journal (
id SERIAL,
dt TIMESTAMP NOT NULL,
level INTEGER,
msg TEXT);
CREATE INDEX ON journal(dt);
INSERT INTO journal (dt, level, msg)
SELECT g, random() * 6, md5(g::text)
FROM generate_series('2015-01-01'::date, '2015-12-31'::date, '1 minute') as g;
SELECT create_range_partitions('journal', 'dt',
'2015-01-01'::date, '1 day'::interval);
CREATE TABLE list_customers (
id INTEGER,
status TEXT,
arr NUMERIC
) PARTITION BY LIST(status);
CREATE TABLE customers_active PARTITION OF list_customers FOR VALUES IN ('ACTIVE');
CREATE TABLE customers_archived PARTITION OF list_customers FOR VALUES IN ('EXPIRED');
CREATE TABLE customers_others PARTITION OF list_customers DEFAULT;
INSERT INTO list_customers VALUES
(1,'ACTIVE',100),
(2,'RECURRING',20),
(3,'EXPIRED',38),
(4,'REACTIVATED',144);
CREATE TABLE hash_customers (
id INTEGER,
status TEXT,
arr NUMERIC
) PARTITION BY HASH(id);
CREATE TABLE customers_part1 PARTITION OF hash_customers
FOR VALUES WITH (modulus 3, remainder 0);
CREATE TABLE customers_part2 PARTITION OF hash_customers
FOR VALUES WITH (modulus 3, remainder 1);
CREATE TABLE customers_part3 PARTITION OF hash_customers
FOR VALUES WITH (modulus 3, remainder 2);
INSERT INTO hash_customers VALUES
(1,'ACTIVE',100),
(2,'RECURRING',20),
(3,'EXPIRED',38),
(4,'REACTIVATED',144);
CREATE TABLE range_customers (
id INTEGER,
status TEXT,
arr NUMERIC
) PARTITION BY RANGE(arr);
CREATE TABLE customers_arr_small PARTITION OF range_customers
FOR VALUES FROM (MINVALUE) TO (25);
CREATE TABLE customers_arr_medium PARTITION OF range_customers
FOR VALUES FROM (25) TO (75);
CREATE TABLE customers_arr_large PARTITION OF range_customers
FOR VALUES FROM (75) TO (MAXVALUE);
INSERT INTO customers VALUES
(1,'ACTIVE',100),
(2,'RECURRING',20),
(3,'EXPIRED',38),
(4,'REACTIVATED',144);
Уменьшение времени блокировок за счет избавления от крупных запросов вида «обновить всё».
До:
DELETE FROM work_logs
WHERE created_at > NOW() - interval '90 days';
После (выполнять в цикле):
BEGIN;
DELETE FROM work_logs
WHERE id IN (
SELECT id
DELETE FROM work_logs
WHERE created_at > NOW() - interval '90 days'
LIMIT 10000
);
COMMIT;
Решение «в лоб»:
String sql = "INSERT INTO foo (name, description) VALUES (?, ?)";
try (PreparedStatement ps = db.prepareStatement(sql)) {
for (int i = 0; i < 100; ++i) {
ps.setString(1, "name-" + i);
ps.setString(2, "description-" + i);
ps.execute();
}
}
Более производительное решение:
String sql = "INSERT INTO foo (name, description) VALUES (?, ?)";
try (PreparedStatement ps = db.prepareStatement(sql)) {
for (int i = 0; i < 100; ++i) {
ps.setString(1, "name-" + i);
ps.setString(2, "description-" + i);
ps.addBatch();
}
ps.executeBatch();
}
Решение без покемонов:
String sql = "INSERT INTO foo (name, description) VALUES (?, ?)";
try (PreparedStatement ps = db.prepareStatement(sql,
Statement.NO_GENERATED_KEYS)) {
for (int i = 0; i < 100; ++i) {
ps.setString(1, "name-" + i);
ps.setString(2, "description-" + i);
ps.addBatch();
}
ps.executeBatch();
}
Загрузка данных через COPY гораздо быстрее:
COPY movie_tags (
user_id,
movie_id,
tag,
created)
FROM 'ml-latest/tags.csv'
WITH (
DELIMITER ',',
FORMAT CSV,
HEADER true,
ESCAPE '"',
ENCODING 'UTF-8'
);
BEGIN;
SELECT * FROM player WHERE id = 42 FOR UPDATE;
...
UPDATE player SET money = 100500 WHERE id = 42;
COMMIT;
BEGIN;
SELECT * FROM player WHERE id = 42;
COMMIT;
...
BEGIN;
UPDATE player SET money = 100500, ver = 13
WHERE id = 42 AND ver = 12;
COMMIT;
UPDATE players SET spent_daily = 0;
BEGIN;
UPDATE player SET money = 100500 WHERE id = 42;
COMMIT;
BEGIN;
SET LOCAL lock_timeout = '100ms';
UPDATE player SET money = 100500 WHERE id = 42;
COMMIT;
CREATE INDEX sales_quantity_index
ON sales_table (quantity);
CREATE INDEX CONCURRENTLY sales_quantity_index
ON sales_table (quantity);
ALTER TABLE ratings ADD COLUMN created_new TIMESTAMPTZ NULL;
UPDATE ratings SET
created_new = 'epoch'::TIMESTEMPTZ
+ created * INTERVAL '1 second';
ALTER TABLE ratings DROP COLUMN created;
ALTER TABLE ratings RENAME COLUMN created_new TO created;
ALTER TABLE ratings ALTER COLUMN created SET NOT NULL;
ALTER TABLE ratings
ALTER COLUMN created SET DATA TYPE TIMESTAMPTZ
USING 'epoch'::TIMESTEMPTZ + created * INTERVAL '1 second';