From 5d4d53082d249e1fadb31fe369f74d25caff700d Mon Sep 17 00:00:00 2001 From: Alexey Lebedeff Date: Fri, 3 Feb 2017 19:13:14 +0300 Subject: [PATCH] Increase tcp_retries2 value Current value of 5 results in socket operation timeout after ~12.9 seconds. This is a bit too low, e.g. we've seen RabbitMQ network splits in production. This 12.9s amount is equal as 0.2*(2^1+2^2+..2^5), where 0.2 is a retry timeout (RTO) that is calculated by kernel on a per-socket basis. But in fast local networks it usually almost equal to minimum values of 0.2s hardcoded in linux kernel (and BTW, RFC says that minimum value should be 1s). On the other hand, comment in netconfig.pp says that our target timeout is ~54 seconds. And changing tcp_retries2 to 7 is consistent with that comment - tests an live env show that resulting timeout is ~52.2s Change-Id: Ib52f40ef1017a9da5a29cd62fb744a4597860763 diff --git a/deployment/puppet/osnailyfacter/manifests/netconfig/netconfig.pp b/deployment/puppet/osnailyfacter/manifests/netconfig/netconfig.pp index 78d59754a..42579ac60 100644 --- a/deployment/puppet/osnailyfacter/manifests/netconfig/netconfig.pp +++ b/deployment/puppet/osnailyfacter/manifests/netconfig/netconfig.pp @@ -42,7 +42,7 @@ class osnailyfacter::netconfig::netconfig { tcpka_time => '30', tcpka_probes => '8', tcpka_intvl => '3', - tcp_retries2 => '5', + tcp_retries2 => '7', } # increase network backlog for performance on fast networks diff --git a/tests/noop/spec/hosts/netconfig/netconfig_spec.rb b/tests/noop/spec/hosts/netconfig/netconfig_spec.rb index c175aed71..80ea0f242 100644 --- a/tests/noop/spec/hosts/netconfig/netconfig_spec.rb +++ b/tests/noop/spec/hosts/netconfig/netconfig_spec.rb @@ -27,7 +27,7 @@ describe manifest do 'tcpka_time' => '30', 'tcpka_probes' => '8', 'tcpka_intvl' => '3', - 'tcp_retries2' => '5', + 'tcp_retries2' => '7', ) } it { should contain_sysctl__value('net.core.netdev_max_backlog').with('value' => '261144') } it { should contain_class('sysfs') }