Resolving hostname takes 5 seconds

16 votes
5 answers
18670 views
                          I've a master bind9 DNS server and 2 slave servers running on IPv4 (Debian Jessie), using /etc/bind/named.conf:

    listen-on-v6 { none; };

When I try to connect from different server(s) each connection takes at least 5 seconds (I'm using Joseph's timing info  for debugging):

    $ curl -w "@curl-format.txt" -o /dev/null -s https://example.com 
                time_namelookup:  5.512
                   time_connect:  5.512
                time_appconnect:  5.529
               time_pretransfer:  5.529
                  time_redirect:  0.000
             time_starttransfer:  5.531
                                ----------
                     time_total:  5.531

According to curl, lookup takes most of the time, however standard nslookup is very fast:

    $ time nslookup example.com > /dev/null 2>&1
    
    real    0m0.018s
    user    0m0.016s
    sys     0m0.000s

After forcing curl to use IPv4, it gets much better:

    $ curl -4 -w "@curl-format.txt" -o /dev/null -s https://example.com 
    
                time_namelookup:  0.004
                   time_connect:  0.005
                time_appconnect:  0.020
               time_pretransfer:  0.020
                  time_redirect:  0.000
             time_starttransfer:  0.022
                                ----------
                     time_total:  0.022

I've disabled IPv6 on the host:

    echo 1 > /proc/sys/net/ipv6/conf/eth0/disable_ipv6

though the problem persists. I've tried running strace to see what's the reason of timeouts:

    write(2, "*", 1*)                        = 1
    write(2, " ", 1 )                        = 1
    write(2, "Hostname was NOT found in DNS ca"..., 36Hostname was NOT found in DNS cache
    ) = 36
    socket(PF_INET6, SOCK_DGRAM, IPPROTO_IP) = 4
    close(4)                                = 0
    mmap(NULL, 8392704, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f220bcf8000
    mprotect(0x7f220bcf8000, 4096, PROT_NONE) = 0
    clone(child_stack=0x7f220c4f7fb0, flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, parent_tidptr=0x7f220c4f89d0, tls=0x7f220c4f8700, child_tidptr=0x7f220c4f89d0) = 2004
    rt_sigaction(SIGPIPE, {SIG_IGN, [PIPE], SA_RESTORER|SA_RESTART, 0x7f22102e08d0}, NULL, 8) = 0
    rt_sigaction(SIGPIPE, NULL, {SIG_IGN, [PIPE], SA_RESTORER|SA_RESTART, 0x7f22102e08d0}, 8) = 0
    rt_sigaction(SIGPIPE, {SIG_IGN, [PIPE], SA_RESTORER|SA_RESTART, 0x7f22102e08d0}, NULL, 8) = 0
    rt_sigaction(SIGPIPE, {SIG_IGN, [PIPE], SA_RESTORER|SA_RESTART, 0x7f22102e08d0}, NULL, 8) = 0
    poll(0, 0, 4)                           = 0 (Timeout)
    rt_sigaction(SIGPIPE, NULL, {SIG_IGN, [PIPE], SA_RESTORER|SA_RESTART, 0x7f22102e08d0}, 8) = 0
    rt_sigaction(SIGPIPE, {SIG_IGN, [PIPE], SA_RESTORER|SA_RESTART, 0x7f22102e08d0}, NULL, 8) = 0
    rt_sigaction(SIGPIPE, {SIG_IGN, [PIPE], SA_RESTORER|SA_RESTART, 0x7f22102e08d0}, NULL, 8) = 0
    poll(0, 0, 8)                           = 0 (Timeout)
    rt_sigaction(SIGPIPE, NULL, {SIG_IGN, [PIPE], SA_RESTORER|SA_RESTART, 0x7f22102e08d0}, 8) = 0
    rt_sigaction(SIGPIPE, {SIG_IGN, [PIPE], SA_RESTORER|SA_RESTART, 0x7f22102e08d0}, NULL, 8) = 0
    rt_sigaction(SIGPIPE, {SIG_IGN, [PIPE], SA_RESTORER|SA_RESTART, 0x7f22102e08d0}, NULL, 8) = 0
    poll(0, 0, 16)                          = 0 (Timeout)
    rt_sigaction(SIGPIPE, NULL, {SIG_IGN, [PIPE], SA_RESTORER|SA_RESTART, 0x7f22102e08d0}, 8) = 0
    rt_sigaction(SIGPIPE, {SIG_IGN, [PIPE], SA_RESTORER|SA_RESTART, 0x7f22102e08d0}, NULL, 8) = 0
    rt_sigaction(SIGPIPE, {SIG_IGN, [PIPE], SA_RESTORER|SA_RESTART, 0x7f22102e08d0}, NULL, 8) = 0
    poll(0, 0, 32)                          = 0 (Timeout)
    rt_sigaction(SIGPIPE, NULL, {SIG_IGN, [PIPE], SA_RESTORER|SA_RESTART, 0x7f22102e08d0}, 8) = 0
    rt_sigaction(SIGPIPE, {SIG_IGN, [PIPE], SA_RESTORER|SA_RESTART, 0x7f22102e08d0}, NULL, 8) = 0
    rt_sigaction(SIGPIPE, {SIG_IGN, [PIPE], SA_RESTORER|SA_RESTART, 0x7f22102e08d0}, NULL, 8) = 0
    poll(0, 0, 64)                          = 0 (Timeout)

It doesn't seem to be a firewall issues as nslookup (or curl -4) is using the same DNS servers. Any idea what could be wrong?

Here's tcpdump from the host tcpdump -vvv -s 0 -l -n port 53:

    tcpdump: listening on eth0, link-type EN10MB (Ethernet), capture size 262144 bytes
    20:14:52.542526 IP (tos 0x0, ttl 64, id 35839, offset 0, flags [DF], proto UDP (17), length 63)
        192.168.1.1.59163 > 192.168.1.2.53: [bad udp cksum 0xf9f3 -> 0x96c7!] 39535+ A? example.com. (35)
    20:14:52.542540 IP (tos 0x0, ttl 64, id 35840, offset 0, flags [DF], proto UDP (17), length 63)
        192.168.1.1.59163 > 192.168.1.2.53: [bad udp cksum 0xf9f3 -> 0x6289!] 45997+ AAAA? example.com. (35)
    20:14:52.543281 IP (tos 0x0, ttl 61, id 63674, offset 0, flags [none], proto UDP (17), length 158)
        192.168.1.2.53 > 192.168.1.1.59163: [udp sum ok] 45997* q: AAAA? example.com. 1/1/0 example.com. [1h] CNAME s01.example.com. ns: example.com. [10m] SOA ns01.example.com. ns51.domaincontrol.com. 2016062008 28800 7200 1209600 600 (130)
    20:14:57.547439 IP (tos 0x0, ttl 64, id 36868, offset 0, flags [DF], proto UDP (17), length 63)
        192.168.1.1.59163 > 192.168.1.2.53: [bad udp cksum 0xf9f3 -> 0x96c7!] 39535+ A? example.com. (35)
    20:14:57.548188 IP (tos 0x0, ttl 61, id 64567, offset 0, flags [none], proto UDP (17), length 184)
        192.168.1.2.53 > 192.168.1.1.59163: [udp sum ok] 39535* q: A? example.com. 2/2/2 example.com. [1h] CNAME s01.example.com., s01.example.com. [1h] A 136.243.154.168 ns: example.com. [30m] NS ns01.example.com., example.com. [30m] NS ns02.example.com. ar: ns01.example.com. [1h] A 136.243.154.168, ns02.example.com. [1h] A 192.168.1.2 (156)
    20:14:57.548250 IP (tos 0x0, ttl 64, id 36869, offset 0, flags [DF], proto UDP (17), length 63)
        192.168.1.1.59163 > 192.168.1.2.53: [bad udp cksum 0xf9f3 -> 0x6289!] 45997+ AAAA? example.com. (35)
    20:14:57.548934 IP (tos 0x0, ttl 61, id 64568, offset 0, flags [none], proto UDP (17), length 158)
        192.168.1.2.53 > 192.168.1.1.59163: [udp sum ok] 45997* q: AAAA? example.com. 1/1/0 example.com. [1h] CNAME s01.example.com. ns: example.com. [10m] SOA ns01.example.com. ns51.domaincontrol.com. 2016062008 28800 7200 1209600 600 (130)

**EDIT:**
In bind logs frequently appears this message:

    error sending response: host unreachable

Though, each query is eventually answered (it just takes 5s). All machines are physical servers (it's not fault of NAT), it's more likely that packets are being blocked by a router. Here's quite likely related question: DNS lookups sometimes take 5 seconds .

                        
Asked by Tombart (3179 rep)
Jun 20, 2016, 08:21 PM
Last activity: Oct 30, 2023, 09:46 AM
Resolving hostname takes 5 seconds

Related Questions