I need help for server side optimization.
I think it is a system file cache problem but I'm not sure, maybe I am wrong.
I have 2 servers (S1, S2) with PostgreSQL 9.6.2 on both of them, the database contents are same.
Profiling query:
S1
Limit (cost=0.28..7.42 rows=1 width=32) (actual time=0.035..0.035 rows=1 loops=1)
-> Nested Loop (cost=0.28..14.56 rows=2 width=32) (actual time=0.035..0.035 rows=1 loops=1)
Join Filter: (djangocms_blog_blogcategory.id = djangocms_blog_post_categories.blogcategory_id)
Rows Removed by Join Filter: 10
-> Index Only Scan using djangocms_blog_post_categories_post_id_blogcategory_id_key on djangocms_blog_post_categories (cost=0.28..12.28 rows=2 width=4) (actual time=0.017..0.017 rows=1 loops=1)
Index Cond: (post_id = 20)
Heap Fetches: 1
-> Materialize (cost=0.00..1.45 rows=30 width=32) (actual time=0.012..0.014 rows=11 loops=1)
-> Seq Scan on djangocms_blog_blogcategory (cost=0.00..1.30 rows=30 width=32) (actual time=0.008..0.008 rows=11 loops=1)
Planning time: 0.539 ms
Execution time: 0.065 ms
S2
Limit (cost=0.28..7.42 rows=1 width=32) (actual time=0.059..0.060 rows=1 loops=1)
-> Nested Loop (cost=0.28..14.57 rows=2 width=32) (actual time=0.059..0.059 rows=1 loops=1)
Join Filter: (djangocms_blog_blogcategory.id = djangocms_blog_post_categories.blogcategory_id)
Rows Removed by Join Filter: 10
-> Index Only Scan using djangocms_blog_post_categories_post_id_blogcategory_id_key on djangocms_blog_post_categories (cost=0.28..12.29 rows=2 width=4) (actual time=0.032..0.032 rows=1 loops=1)
Index Cond: (post_id = 20)
Heap Fetches: 1
-> Materialize (cost=0.00..1.45 rows=30 width=32) (actual time=0.012..0.021 rows=11 loops=1)
-> Seq Scan on djangocms_blog_blogcategory (cost=0.00..1.30 rows=30 width=32) (actual time=0.006..0.008 rows=11 loops=1)
Planning time: 0.847 ms
Execution time: 0.126 ms
Why are the above execution times different approximately ~ 2 times slower on S2 server.
System info:
S1: usage hardware storage like /dev/sda
OS: CoreOS 1185.3.0, Linux 4.7.3-coreos-r2
CPU: Intel(R) Xeon(R) CPU E5-1650 v3 @ 3.50GHz
RAM: 8 banks DIMM Synchronous 2133 MHz (0.5 ns), vendor: Micron, size: 16GiB
Motherboard: ASUSTeK COMPUTER INC. Z10PA-U8 Series
S2: usage software RAID 1 like /dev/md
OS: Ubuntu 16.04, Linux 4.8.0-49-generic
CPU: Intel(R) Xeon(R) CPU E5-1650 v3 @ 3.50GHz
RAM: 8 banks DIMM Synchronous 2400 MHz (0.4 ns), vendor: Micron, size: 31GiB
Motherboard: ASUSTeK COMPUTER INC. Z10PA-U8 Series
sysctl -a differences between S1 and S2
--- S1 2017-05-10 12:06:13.000000000 +0400
+++ S2 2017-05-10 12:06:17.000000000 +0400
@@ -2,0 +3,30 @@
+debug.kprobes-optimization = 1
@@ -3,0 +34,5 @@
+dev.mac_hid.mouse_button2_keycode = 97
+dev.mac_hid.mouse_button3_keycode = 100
+dev.mac_hid.mouse_button_emulation = 0
+dev.raid.speed_limit_max = 200000
+dev.raid.speed_limit_min = 1000
@@ -6 +41 @@
-fs.aio-nr = 16248
+fs.aio-nr = 0
@@ -8 +43 @@
-fs.dentry-state = 2479916 2429108 45 0 0 0
+fs.dentry-state = 801876 778676 45 0 0 0
@@ -10,5 +45,5 @@
-fs.epoll.max_user_watches = 27012874
-fs.file-max = 13160039
-fs.file-nr = 7936 0 13160039
-fs.inode-nr = 1219692 585566
-fs.inode-state = 1219692 585566 0 0 0 0 0
+fs.epoll.max_user_watches = 54067363
+fs.file-max = 131070
+fs.file-nr = 2944 0 131070
+fs.inode-nr = 743397 373
+fs.inode-state = 743397 373 0 0 0 0 0
@@ -19,0 +55 @@
+fs.mount-max = 100000
@@ -25,10 +60,0 @@
-fs.nfs.idmap_cache_timeout = 0
-fs.nfs.nfs_callback_tcpport = 0
-fs.nfs.nfs_congestion_kb = 262144
-fs.nfs.nfs_mountpoint_timeout = 500
-fs.nfs.nlm_grace_period = 0
-fs.nfs.nlm_tcpport = 0
-fs.nfs.nlm_timeout = 10
-fs.nfs.nlm_udpport = 0
-fs.nfs.nsm_local_state = 3
-fs.nfs.nsm_use_hostnames = 0
@@ -49 +75 @@
-fs.quota.syncs = 0
+fs.quota.syncs = 738
@@ -52,2 +77,0 @@
-fscache.object_max_active = 12
-fscache.operation_max_active = 6
@@ -62 +86 @@
-kernel.core_pattern = |/usr/lib/systemd/systemd-coredump %P %u %g %s %t %c %e
+kernel.core_pattern = core
@@ -64 +88 @@
-kernel.core_uses_pid = 1
+kernel.core_uses_pid = 0
@@ -71,2 +95,3 @@
-kernel.hardlockup_panic = 1
-kernel.hostname = db01
+kernel.hardlockup_panic = 0
+kernel.hostname = db02
+kernel.hotplug =
@@ -77 +102 @@
-kernel.io_delay_type = 0
+kernel.io_delay_type = 1
@@ -81,0 +107 @@
+kernel.keys.persistent_keyring_expiry = 259200
@@ -86 +111,0 @@
-kernel.latencytop = 0
@@ -89,0 +115 @@
+kernel.moksbstate_disabled = 0
@@ -96 +122 @@
-kernel.ns_last_pid = 30114
+kernel.ns_last_pid = 5983
@@ -102 +128 @@
-kernel.osrelease = 4.7.3-coreos-r2
+kernel.osrelease = 4.8.0-49-generic
@@ -109 +135 @@
-kernel.panic_on_stackoverflow = 0
+kernel.panic_on_rcu_stall = 0
@@ -114 +140 @@
-kernel.perf_event_max_sample_rate = 50000
+kernel.perf_event_max_sample_rate = 32000
@@ -117 +143 @@
-kernel.perf_event_paranoid = 2
+kernel.perf_event_paranoid = 3
@@ -121 +147 @@
-kernel.printk = 7 4 1 7
+kernel.printk = 4 4 1 7
@@ -122,0 +149 @@
+kernel.printk_devkmsg = ratelimit
@@ -126 +153 @@
-kernel.pty.nr = 3
+kernel.pty.nr = 8
@@ -128,2 +155,2 @@
-kernel.random.entropy_avail = 825
+kernel.random.entropy_avail = 3413
@@ -133,2 +160,2 @@
-kernel.random.write_wakeup_threshold = 896
+kernel.random.write_wakeup_threshold = 1024
@@ -148 +175 @@
-kernel.sched_domain.cpu0.domain0.max_newidle_lb_cost = 7553
+kernel.sched_domain.cpu0.domain0.max_newidle_lb_cost = 12934
@@ -161 +188 @@
-kernel.sched_domain.cpu0.domain1.max_newidle_lb_cost = 8457
+kernel.sched_domain.cpu0.domain1.max_newidle_lb_cost = 14562
@@ -174 +201 @@
-kernel.sched_domain.cpu1.domain0.max_newidle_lb_cost = 5790
+kernel.sched_domain.cpu1.domain0.max_newidle_lb_cost = 8807
@@ -187 +214 @@
-kernel.sched_domain.cpu1.domain1.max_newidle_lb_cost = 7589
+kernel.sched_domain.cpu1.domain1.max_newidle_lb_cost = 11616
@@ -200 +227 @@
-kernel.sched_domain.cpu10.domain0.max_newidle_lb_cost = 5613
+kernel.sched_domain.cpu10.domain0.max_newidle_lb_cost = 16533
@@ -213 +240 @@
-kernel.sched_domain.cpu10.domain1.max_newidle_lb_cost = 5614
+kernel.sched_domain.cpu10.domain1.max_newidle_lb_cost = 15852
@@ -226 +253 @@
-kernel.sched_domain.cpu11.domain0.max_newidle_lb_cost = 5731
+kernel.sched_domain.cpu11.domain0.max_newidle_lb_cost = 9695
@@ -239 +266 @@
-kernel.sched_domain.cpu11.domain1.max_newidle_lb_cost = 6304
+kernel.sched_domain.cpu11.domain1.max_newidle_lb_cost = 12636
@@ -252 +279 @@
-kernel.sched_domain.cpu2.domain0.max_newidle_lb_cost = 4771
+kernel.sched_domain.cpu2.domain0.max_newidle_lb_cost = 12337
@@ -265 +292 @@
-kernel.sched_domain.cpu2.domain1.max_newidle_lb_cost = 6913
+kernel.sched_domain.cpu2.domain1.max_newidle_lb_cost = 11912
@@ -278 +305 @@
-kernel.sched_domain.cpu3.domain0.max_newidle_lb_cost = 5315
+kernel.sched_domain.cpu3.domain0.max_newidle_lb_cost = 8975
@@ -291 +318 @@
-kernel.sched_domain.cpu3.domain1.max_newidle_lb_cost = 12228
+kernel.sched_domain.cpu3.domain1.max_newidle_lb_cost = 12429
@@ -304 +331 @@
-kernel.sched_domain.cpu4.domain0.max_newidle_lb_cost = 7697
+kernel.sched_domain.cpu4.domain0.max_newidle_lb_cost = 11457
@@ -317 +344 @@
-kernel.sched_domain.cpu4.domain1.max_newidle_lb_cost = 7813
+kernel.sched_domain.cpu4.domain1.max_newidle_lb_cost = 12466
@@ -330 +357 @@
-kernel.sched_domain.cpu5.domain0.max_newidle_lb_cost = 5840
+kernel.sched_domain.cpu5.domain0.max_newidle_lb_cost = 6009
@@ -343 +370 @@
-kernel.sched_domain.cpu5.domain1.max_newidle_lb_cost = 6493
+kernel.sched_domain.cpu5.domain1.max_newidle_lb_cost = 12096
@@ -356 +383 @@
-kernel.sched_domain.cpu6.domain0.max_newidle_lb_cost = 6487
+kernel.sched_domain.cpu6.domain0.max_newidle_lb_cost = 9209
@@ -369 +396 @@
-kernel.sched_domain.cpu6.domain1.max_newidle_lb_cost = 11182
+kernel.sched_domain.cpu6.domain1.max_newidle_lb_cost = 13395
@@ -382 +409 @@
-kernel.sched_domain.cpu7.domain0.max_newidle_lb_cost = 6140
+kernel.sched_domain.cpu7.domain0.max_newidle_lb_cost = 9542
@@ -395 +422 @@
-kernel.sched_domain.cpu7.domain1.max_newidle_lb_cost = 5866
+kernel.sched_domain.cpu7.domain1.max_newidle_lb_cost = 15411
@@ -408 +435 @@
-kernel.sched_domain.cpu8.domain0.max_newidle_lb_cost = 8012
+kernel.sched_domain.cpu8.domain0.max_newidle_lb_cost = 7499
@@ -421 +448 @@
-kernel.sched_domain.cpu8.domain1.max_newidle_lb_cost = 7803
+kernel.sched_domain.cpu8.domain1.max_newidle_lb_cost = 14383
@@ -434 +461 @@
-kernel.sched_domain.cpu9.domain0.max_newidle_lb_cost = 5389
+kernel.sched_domain.cpu9.domain0.max_newidle_lb_cost = 9134
@@ -447 +474 @@
-kernel.sched_domain.cpu9.domain1.max_newidle_lb_cost = 6507
+kernel.sched_domain.cpu9.domain1.max_newidle_lb_cost = 12891
@@ -456 +483 @@
-kernel.sched_rr_timeslice_ms = 100
+kernel.sched_rr_timeslice_ms = 25
@@ -463,0 +491 @@
+kernel.secure_boot = 0
@@ -465,0 +494 @@
+kernel.sg-big-buff = 32768
@@ -473 +502 @@
-kernel.softlockup_panic = 1
+kernel.softlockup_panic = 0
@@ -476 +505 @@
-kernel.sysrq = 16
+kernel.sysrq = 176
@@ -478 +507 @@
-kernel.threads-max = 1030459
+kernel.threads-max = 2062506
@@ -482,0 +512,3 @@
+kernel.unprivileged_bpf_disabled = 0
+kernel.unprivileged_userns_apparmor_policy = 1
+kernel.unprivileged_userns_clone = 1
@@ -485 +517 @@
-kernel.version = #1 SMP Tue Nov 1 01:38:43 UTC 2016
+kernel.version = #52~16.04.1-Ubuntu SMP Thu Apr 20 10:55:59 UTC 2017
@@ -488,0 +521 @@
+kernel.yama.ptrace_scope = 1
@@ -499 +532 @@
-net.core.default_qdisc = fq_codel
+net.core.default_qdisc = pfifo_fast
@@ -514 +547 @@
-net.core.somaxconn = 65535
+net.core.somaxconn = 131070
@@ -2452 +2059 @@
-vm.lowmem_reserve_ratio = 256 256 32
+vm.lowmem_reserve_ratio = 256 256 32 1
@@ -2459 +2066 @@
-vm.mmap_min_addr = 4096
+vm.mmap_min_addr = 65536
HDParm tests
S1
hdparm -Tt /dev/sda9
/dev/sda9:
Timing cached reads: 23880 MB in 1.99 seconds = 11974.99 MB/sec
Timing buffered disk reads: 1316 MB in 3.00 seconds = 438.57 MB/sec
S2
hdparm -Tt /dev/md2
/dev/md2:
Timing cached reads: 20508 MB in 2.00 seconds = 10262.05 MB/sec
Timing buffered disk reads: 1532 MB in 3.00 seconds = 510.02 MB/sec
The difference in hdparm tests show 16 % lower cached reads on S2, but I do not think this is a the main reason of the 2 times slower execution time on S2.
All tests was repeated ~ 20 times on each server.
Asked by suquant
(121 rep)
May 11, 2017, 01:50 PM
Last activity: May 23, 2022, 09:02 AM
Last activity: May 23, 2022, 09:02 AM