Sample Header Ad - 728x90

Optimize import data from one table into another

0 votes
1 answer
34 views
Edited on mustaccio request. I have simple table:
-- Table: public.objects

-- DROP TABLE IF EXISTS public.objects;

CREATE TABLE IF NOT EXISTS public.objects
(
    id integer NOT NULL DEFAULT nextval('objects_id_seq'::regclass),
    value text COLLATE pg_catalog."default",
    key text COLLATE pg_catalog."default",
    is_valid boolean DEFAULT false
) PARTITION BY LIST (is_valid);

ALTER TABLE IF EXISTS public.objects
    OWNER to postgres;
-- Index: key_idx

-- DROP INDEX IF EXISTS public.key_idx;

CREATE INDEX IF NOT EXISTS key_idx
    ON public.objects USING btree
    (key COLLATE pg_catalog."default" ASC NULLS LAST)
;
-- Index: id_idx

-- DROP INDEX IF EXISTS public.id_idx;

CREATE INDEX IF NOT EXISTS id_idx
    ON public.objects USING btree
    (id ASC NULLS LAST)
;
-- Index: value_idx

-- DROP INDEX IF EXISTS public.value_idx;

CREATE INDEX IF NOT EXISTS value_idx
    ON public.objects USING btree
    (value COLLATE pg_catalog."default" ASC NULLS LAST)
;

-- Partitions SQL

CREATE TABLE public.objects_with_valid PARTITION OF public.objects
    FOR VALUES IN (true)
    PARTITION BY LIST (lower("left"(value, 1)))
TABLESPACE pg_default;

ALTER TABLE IF EXISTS public.objects_with_valid
    OWNER to postgres;
    
CREATE TABLE public.objects_without_valid PARTITION OF public.objects
    FOR VALUES IN (false)
    PARTITION BY LIST (lower("left"(value, 1)))
TABLESPACE pg_default;

ALTER TABLE IF EXISTS public.objects_without_valid
    OWNER to postgres;
with 92M+ rows. Selecting is quite fast, WHERE around 0.06 sec LIKE around 0.2 sec Then i imported data to temporary table (around 500K rows). I want to insert data from temp table into production one. But method that I use is extremly slow. MERGE INTO table ca USING temptable t ON t.value = ca.value and t.key = ca.key WHEN NOT MATCHED THEN INSERT (value, key, is_valid) VALUES (t.value, t.key, true); EXPLAIN for above:
"Merge on objects ca  (cost=40500054.86..42121761.54 rows=0 width=0)"
"  Merge on objects_with_0 ca_1"
"  Merge on objects_with_1 ca_2"
"  Merge on objects_with_2 ca_3"
"  Merge on objects_with_3 ca_4"
"  Merge on objects_with_4 ca_5"
"  Merge on objects_with_5 ca_6"
"  Merge on objects_with_6 ca_7"
"  Merge on objects_with_7 ca_8"
"  Merge on objects_with_8 ca_9"
"  Merge on objects_with_9 ca_10"
"  Merge on objects_with_a ca_11"
"  Merge on objects_with_b ca_12"
"  Merge on objects_with_c ca_13"
"  Merge on objects_with_d ca_14"
"  Merge on objects_with_e ca_15"
"  Merge on objects_with_f ca_16"
"  Merge on objects_with_g ca_17"
"  Merge on objects_with_h ca_18"
"  Merge on objects_with_i ca_19"
"  Merge on objects_with_j ca_20"
"  Merge on objects_with_k ca_21"
"  Merge on objects_with_l ca_22"
"  Merge on objects_with_m ca_23"
"  Merge on objects_with_n ca_24"
"  Merge on objects_with_o ca_25"
"  Merge on objects_with_p ca_26"
"  Merge on objects_with_q ca_27"
"  Merge on objects_with_r ca_28"
"  Merge on objects_with_s ca_29"
"  Merge on objects_with_t ca_30"
"  Merge on objects_with_u ca_31"
"  Merge on objects_with_v ca_32"
"  Merge on objects_with_w ca_33"
"  Merge on objects_with_x ca_34"
"  Merge on objects_with_y ca_35"
"  Merge on objects_with_z ca_36"
"  Merge on objects_with_default ca_37"
"  ->  Merge Left Join  (cost=40500054.86..42121761.54 rows=92459232 width=31)"
"        Merge Cond: ((t.value = ca.value) AND (t.key = ca.key))"
"        ->  Sort  (cost=19611819.49..19842967.57 rows=92459232 width=21)"
"              Sort Key: t.value, t.key"
"              ->  Seq Scan on tempemails t  (cost=0.00..1689976.32 rows=92459232 width=21)"
"        ->  Materialize  (cost=20888235.37..21349648.96 rows=92282717 width=31)"
"              ->  Sort  (cost=20888235.37..21118942.17 rows=92282717 width=31)"
"                    Sort Key: ca.value, ca.key"
"                    ->  Append  (cost=0.00..2055626.75 rows=92282717 width=31)"
"                          ->  Seq Scan on objects_with_0 ca_1  (cost=0.00..777.18 rows=45418 width=31)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_1 ca_2  (cost=0.00..2561.07 rows=151407 width=30)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_2 ca_3  (cost=0.00..1427.53 rows=84653 width=29)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_3 ca_4  (cost=0.00..937.78 rows=56378 width=27)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_4 ca_5  (cost=0.00..853.42 rows=51242 width=27)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_5 ca_6  (cost=0.00..670.16 rows=40216 width=27)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_6 ca_7  (cost=0.00..521.06 rows=31206 width=27)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_7 ca_8  (cost=0.00..590.27 rows=35327 width=27)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_8 ca_9  (cost=0.00..515.52 rows=30752 width=27)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_9 ca_10  (cost=0.00..479.70 rows=28670 width=28)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_a ca_11  (cost=0.00..131448.62 rows=7602062 width=32)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_b ca_12  (cost=0.00..81150.92 rows=4700792 width=31)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_c ca_13  (cost=0.00..101959.91 rows=5871091 width=32)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_d ca_14  (cost=0.00..83177.18 rows=4819218 width=31)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_e ca_15  (cost=0.00..57243.55 rows=3315255 width=31)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_f ca_16  (cost=0.00..42702.34 rows=2466034 width=32)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_g ca_17  (cost=0.00..57396.93 rows=3321793 width=31)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_h ca_18  (cost=0.00..44526.97 rows=2579897 width=31)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_i ca_19  (cost=0.00..31954.73 rows=1852873 width=31)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_j ca_20  (cost=0.00..110328.16 rows=6384516 width=31)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_k ca_21  (cost=0.00..74663.27 rows=4339927 width=31)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_l ca_22  (cost=0.00..92877.07 rows=5369207 width=31)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_m ca_23  (cost=0.00..156545.37 rows=9049237 width=32)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_n ca_24  (cost=0.00..48544.22 rows=2814722 width=31)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_o ca_25  (cost=0.00..20505.73 rows=1193873 width=31)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_p ca_26  (cost=0.00..67841.19 rows=3923819 width=32)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_q ca_27  (cost=0.00..3265.34 rows=190334 width=31)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_r ca_28  (cost=0.00..74204.71 rows=4295571 width=31)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_s ca_29  (cost=0.00..132988.34 rows=7689234 width=32)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_t ca_30  (cost=0.00..68148.51 rows=3949151 width=31)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_u ca_31  (cost=0.00..7068.79 rows=412379 width=31)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_v ca_32  (cost=0.00..30176.51 rows=1746351 width=31)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_w ca_33  (cost=0.00..27847.93 rows=1613693 width=31)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_x ca_34  (cost=0.00..10339.96 rows=595496 width=32)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_y ca_35  (cost=0.00..15530.14 rows=902014 width=31)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_z ca_36  (cost=0.00..12222.85 rows=715885 width=30)"
"                                Filter: is_valid"
"                          ->  Seq Scan on objects_with_default ca_37  (cost=0.00..220.24 rows=13024 width=29)"
"                                Filter: is_valid"
"JIT:"
"  Functions: 196"
"  Options: Inlining true, Optimization true, Expressions true, Deforming true"
I use db inside docker: PostgreSQL 15.4 on x86_64-pc-linux-musl, compiled by gcc (Alpine 12.2.1_git20220924-r10) 12.2.1 20220924, 64-bit host: CPU i7 4 core Ram 16GB 2133 Storage 256GB ssd How to optimize process?
Asked by Solmorth (1 rep)
Sep 7, 2023, 09:43 PM
Last activity: Sep 8, 2023, 01:17 PM