Optimize import data from one table into another
0
votes
1
answer
34
views
Edited on mustaccio request.
I have simple table:
-- Table: public.objects
-- DROP TABLE IF EXISTS public.objects;
CREATE TABLE IF NOT EXISTS public.objects
(
id integer NOT NULL DEFAULT nextval('objects_id_seq'::regclass),
value text COLLATE pg_catalog."default",
key text COLLATE pg_catalog."default",
is_valid boolean DEFAULT false
) PARTITION BY LIST (is_valid);
ALTER TABLE IF EXISTS public.objects
OWNER to postgres;
-- Index: key_idx
-- DROP INDEX IF EXISTS public.key_idx;
CREATE INDEX IF NOT EXISTS key_idx
ON public.objects USING btree
(key COLLATE pg_catalog."default" ASC NULLS LAST)
;
-- Index: id_idx
-- DROP INDEX IF EXISTS public.id_idx;
CREATE INDEX IF NOT EXISTS id_idx
ON public.objects USING btree
(id ASC NULLS LAST)
;
-- Index: value_idx
-- DROP INDEX IF EXISTS public.value_idx;
CREATE INDEX IF NOT EXISTS value_idx
ON public.objects USING btree
(value COLLATE pg_catalog."default" ASC NULLS LAST)
;
-- Partitions SQL
CREATE TABLE public.objects_with_valid PARTITION OF public.objects
FOR VALUES IN (true)
PARTITION BY LIST (lower("left"(value, 1)))
TABLESPACE pg_default;
ALTER TABLE IF EXISTS public.objects_with_valid
OWNER to postgres;
CREATE TABLE public.objects_without_valid PARTITION OF public.objects
FOR VALUES IN (false)
PARTITION BY LIST (lower("left"(value, 1)))
TABLESPACE pg_default;
ALTER TABLE IF EXISTS public.objects_without_valid
OWNER to postgres;
with 92M+ rows. Selecting is quite fast, WHERE around 0.06 sec LIKE around 0.2 sec
Then i imported data to temporary table (around 500K rows). I want to insert data from temp table into production one. But method that I use is extremly slow.
MERGE INTO table ca
USING temptable t
ON t.value = ca.value and t.key = ca.key
WHEN NOT MATCHED THEN
INSERT (value, key, is_valid)
VALUES (t.value, t.key, true);
EXPLAIN for above:
"Merge on objects ca (cost=40500054.86..42121761.54 rows=0 width=0)"
" Merge on objects_with_0 ca_1"
" Merge on objects_with_1 ca_2"
" Merge on objects_with_2 ca_3"
" Merge on objects_with_3 ca_4"
" Merge on objects_with_4 ca_5"
" Merge on objects_with_5 ca_6"
" Merge on objects_with_6 ca_7"
" Merge on objects_with_7 ca_8"
" Merge on objects_with_8 ca_9"
" Merge on objects_with_9 ca_10"
" Merge on objects_with_a ca_11"
" Merge on objects_with_b ca_12"
" Merge on objects_with_c ca_13"
" Merge on objects_with_d ca_14"
" Merge on objects_with_e ca_15"
" Merge on objects_with_f ca_16"
" Merge on objects_with_g ca_17"
" Merge on objects_with_h ca_18"
" Merge on objects_with_i ca_19"
" Merge on objects_with_j ca_20"
" Merge on objects_with_k ca_21"
" Merge on objects_with_l ca_22"
" Merge on objects_with_m ca_23"
" Merge on objects_with_n ca_24"
" Merge on objects_with_o ca_25"
" Merge on objects_with_p ca_26"
" Merge on objects_with_q ca_27"
" Merge on objects_with_r ca_28"
" Merge on objects_with_s ca_29"
" Merge on objects_with_t ca_30"
" Merge on objects_with_u ca_31"
" Merge on objects_with_v ca_32"
" Merge on objects_with_w ca_33"
" Merge on objects_with_x ca_34"
" Merge on objects_with_y ca_35"
" Merge on objects_with_z ca_36"
" Merge on objects_with_default ca_37"
" -> Merge Left Join (cost=40500054.86..42121761.54 rows=92459232 width=31)"
" Merge Cond: ((t.value = ca.value) AND (t.key = ca.key))"
" -> Sort (cost=19611819.49..19842967.57 rows=92459232 width=21)"
" Sort Key: t.value, t.key"
" -> Seq Scan on tempemails t (cost=0.00..1689976.32 rows=92459232 width=21)"
" -> Materialize (cost=20888235.37..21349648.96 rows=92282717 width=31)"
" -> Sort (cost=20888235.37..21118942.17 rows=92282717 width=31)"
" Sort Key: ca.value, ca.key"
" -> Append (cost=0.00..2055626.75 rows=92282717 width=31)"
" -> Seq Scan on objects_with_0 ca_1 (cost=0.00..777.18 rows=45418 width=31)"
" Filter: is_valid"
" -> Seq Scan on objects_with_1 ca_2 (cost=0.00..2561.07 rows=151407 width=30)"
" Filter: is_valid"
" -> Seq Scan on objects_with_2 ca_3 (cost=0.00..1427.53 rows=84653 width=29)"
" Filter: is_valid"
" -> Seq Scan on objects_with_3 ca_4 (cost=0.00..937.78 rows=56378 width=27)"
" Filter: is_valid"
" -> Seq Scan on objects_with_4 ca_5 (cost=0.00..853.42 rows=51242 width=27)"
" Filter: is_valid"
" -> Seq Scan on objects_with_5 ca_6 (cost=0.00..670.16 rows=40216 width=27)"
" Filter: is_valid"
" -> Seq Scan on objects_with_6 ca_7 (cost=0.00..521.06 rows=31206 width=27)"
" Filter: is_valid"
" -> Seq Scan on objects_with_7 ca_8 (cost=0.00..590.27 rows=35327 width=27)"
" Filter: is_valid"
" -> Seq Scan on objects_with_8 ca_9 (cost=0.00..515.52 rows=30752 width=27)"
" Filter: is_valid"
" -> Seq Scan on objects_with_9 ca_10 (cost=0.00..479.70 rows=28670 width=28)"
" Filter: is_valid"
" -> Seq Scan on objects_with_a ca_11 (cost=0.00..131448.62 rows=7602062 width=32)"
" Filter: is_valid"
" -> Seq Scan on objects_with_b ca_12 (cost=0.00..81150.92 rows=4700792 width=31)"
" Filter: is_valid"
" -> Seq Scan on objects_with_c ca_13 (cost=0.00..101959.91 rows=5871091 width=32)"
" Filter: is_valid"
" -> Seq Scan on objects_with_d ca_14 (cost=0.00..83177.18 rows=4819218 width=31)"
" Filter: is_valid"
" -> Seq Scan on objects_with_e ca_15 (cost=0.00..57243.55 rows=3315255 width=31)"
" Filter: is_valid"
" -> Seq Scan on objects_with_f ca_16 (cost=0.00..42702.34 rows=2466034 width=32)"
" Filter: is_valid"
" -> Seq Scan on objects_with_g ca_17 (cost=0.00..57396.93 rows=3321793 width=31)"
" Filter: is_valid"
" -> Seq Scan on objects_with_h ca_18 (cost=0.00..44526.97 rows=2579897 width=31)"
" Filter: is_valid"
" -> Seq Scan on objects_with_i ca_19 (cost=0.00..31954.73 rows=1852873 width=31)"
" Filter: is_valid"
" -> Seq Scan on objects_with_j ca_20 (cost=0.00..110328.16 rows=6384516 width=31)"
" Filter: is_valid"
" -> Seq Scan on objects_with_k ca_21 (cost=0.00..74663.27 rows=4339927 width=31)"
" Filter: is_valid"
" -> Seq Scan on objects_with_l ca_22 (cost=0.00..92877.07 rows=5369207 width=31)"
" Filter: is_valid"
" -> Seq Scan on objects_with_m ca_23 (cost=0.00..156545.37 rows=9049237 width=32)"
" Filter: is_valid"
" -> Seq Scan on objects_with_n ca_24 (cost=0.00..48544.22 rows=2814722 width=31)"
" Filter: is_valid"
" -> Seq Scan on objects_with_o ca_25 (cost=0.00..20505.73 rows=1193873 width=31)"
" Filter: is_valid"
" -> Seq Scan on objects_with_p ca_26 (cost=0.00..67841.19 rows=3923819 width=32)"
" Filter: is_valid"
" -> Seq Scan on objects_with_q ca_27 (cost=0.00..3265.34 rows=190334 width=31)"
" Filter: is_valid"
" -> Seq Scan on objects_with_r ca_28 (cost=0.00..74204.71 rows=4295571 width=31)"
" Filter: is_valid"
" -> Seq Scan on objects_with_s ca_29 (cost=0.00..132988.34 rows=7689234 width=32)"
" Filter: is_valid"
" -> Seq Scan on objects_with_t ca_30 (cost=0.00..68148.51 rows=3949151 width=31)"
" Filter: is_valid"
" -> Seq Scan on objects_with_u ca_31 (cost=0.00..7068.79 rows=412379 width=31)"
" Filter: is_valid"
" -> Seq Scan on objects_with_v ca_32 (cost=0.00..30176.51 rows=1746351 width=31)"
" Filter: is_valid"
" -> Seq Scan on objects_with_w ca_33 (cost=0.00..27847.93 rows=1613693 width=31)"
" Filter: is_valid"
" -> Seq Scan on objects_with_x ca_34 (cost=0.00..10339.96 rows=595496 width=32)"
" Filter: is_valid"
" -> Seq Scan on objects_with_y ca_35 (cost=0.00..15530.14 rows=902014 width=31)"
" Filter: is_valid"
" -> Seq Scan on objects_with_z ca_36 (cost=0.00..12222.85 rows=715885 width=30)"
" Filter: is_valid"
" -> Seq Scan on objects_with_default ca_37 (cost=0.00..220.24 rows=13024 width=29)"
" Filter: is_valid"
"JIT:"
" Functions: 196"
" Options: Inlining true, Optimization true, Expressions true, Deforming true"
I use db inside docker:
PostgreSQL 15.4 on x86_64-pc-linux-musl, compiled by gcc (Alpine 12.2.1_git20220924-r10) 12.2.1 20220924, 64-bit
host:
CPU i7 4 core
Ram 16GB 2133
Storage 256GB ssd
How to optimize process?
Asked by Solmorth
(1 rep)
Sep 7, 2023, 09:43 PM
Last activity: Sep 8, 2023, 01:17 PM
Last activity: Sep 8, 2023, 01:17 PM