diff options
-rw-r--r-- | build/f_repos/Makefile | 3 | ||||
-rw-r--r-- | build/f_repos/patch/fuel-library/0003-Increase-tcp_retries2-value.patch | 51 | ||||
-rw-r--r-- | deploy/cloud/deployment.py | 13 |
3 files changed, 9 insertions, 58 deletions
diff --git a/build/f_repos/Makefile b/build/f_repos/Makefile index be6f82909..5dd1c1e7f 100644 --- a/build/f_repos/Makefile +++ b/build/f_repos/Makefile @@ -89,7 +89,8 @@ patches-import: sub .cachepatched SUB_FEATURE=$${p_dir#$$SUB_DIR}; \ SUB_TAG=${F_OPNFV_TAG}-fuel$$SUB_FEATURE/patch; \ echo "`tput setaf 2`-- patching $$name ($$SUB_TAG)`tput sgr0`";\ - git tag $$SUB_TAG-root && git am -3 --whitespace=nowarn \ + git tag $$SUB_TAG-root && \ + git am -3 --whitespace=nowarn --patch-format=mbox \ --committer-date-is-author-date $$SUB_PATCHES && \ git tag $$SUB_TAG || exit 1; \ fi \ diff --git a/build/f_repos/patch/fuel-library/0003-Increase-tcp_retries2-value.patch b/build/f_repos/patch/fuel-library/0003-Increase-tcp_retries2-value.patch deleted file mode 100644 index 02d0a038e..000000000 --- a/build/f_repos/patch/fuel-library/0003-Increase-tcp_retries2-value.patch +++ /dev/null @@ -1,51 +0,0 @@ -From: Alexey Lebedeff <alebedev@mirantis.com> -Date: Fri, 3 Feb 2017 19:13:14 +0300 -Subject: [PATCH] Increase tcp_retries2 value - -Current value of 5 results in socket operation timeout after ~12.9 -seconds. This is a bit too low, e.g. we've seen RabbitMQ network -splits in production. - -This 12.9s amount is equal as 0.2*(2^1+2^2+..2^5), where 0.2 is a -retry timeout (RTO) that is calculated by kernel on a per-socket -basis. But in fast local networks it usually almost equal to minimum -values of 0.2s hardcoded in linux kernel (and BTW, RFC says that -minimum value should be 1s). - -On the other hand, comment in netconfig.pp says that our target -timeout is ~54 seconds. And changing tcp_retries2 to 7 is consistent -with that comment - tests an live env show that resulting timeout is -~52.2s - -Change-Id: Ib52f40ef1017a9da5a29cd62fb744a4597860763 ---- - deployment/puppet/osnailyfacter/manifests/netconfig/netconfig.pp | 2 +- - tests/noop/spec/hosts/netconfig/netconfig_spec.rb | 2 +- - 2 files changed, 2 insertions(+), 2 deletions(-) - -diff --git a/deployment/puppet/osnailyfacter/manifests/netconfig/netconfig.pp b/deployment/puppet/osnailyfacter/manifests/netconfig/netconfig.pp -index 78d5975..42579ac 100644 ---- a/deployment/puppet/osnailyfacter/manifests/netconfig/netconfig.pp -+++ b/deployment/puppet/osnailyfacter/manifests/netconfig/netconfig.pp -@@ -42,7 +42,7 @@ class osnailyfacter::netconfig::netconfig { - tcpka_time => '30', - tcpka_probes => '8', - tcpka_intvl => '3', -- tcp_retries2 => '5', -+ tcp_retries2 => '7', - } - - # increase network backlog for performance on fast networks -diff --git a/tests/noop/spec/hosts/netconfig/netconfig_spec.rb b/tests/noop/spec/hosts/netconfig/netconfig_spec.rb -index c175aed..80ea0f2 100644 ---- a/tests/noop/spec/hosts/netconfig/netconfig_spec.rb -+++ b/tests/noop/spec/hosts/netconfig/netconfig_spec.rb -@@ -27,7 +27,7 @@ describe manifest do - 'tcpka_time' => '30', - 'tcpka_probes' => '8', - 'tcpka_intvl' => '3', -- 'tcp_retries2' => '5', -+ 'tcp_retries2' => '7', - ) } - it { should contain_sysctl__value('net.core.netdev_max_backlog').with('value' => '261144') } - it { should contain_class('sysfs') } diff --git a/deploy/cloud/deployment.py b/deploy/cloud/deployment.py index 28bcfdf1d..7f791cbc6 100644 --- a/deploy/cloud/deployment.py +++ b/deploy/cloud/deployment.py @@ -24,6 +24,8 @@ LOG_FILE = '/var/log/puppet.log' GREP_LINES_OF_LEADING_CONTEXT = 100 GREP_LINES_OF_TRAILING_CONTEXT = 100 LIST_OF_CHAR_TO_BE_ESCAPED = ['[', ']', '"'] +ERROR_MSGS = ['Critical nodes are not available for deployment', + 'offline. Remove them from environment and try again.'] class DeployNotStart(Exception): @@ -112,8 +114,8 @@ class Deployment(object): ready = False timeout = False - attempts = 0 - while attempts < 3: + attempts = 5 + while attempts > 0: try: if time.time() > start + abort_after: timeout = True @@ -132,7 +134,7 @@ class Deployment(object): time.sleep(SLEEP_TIME) except (DeployNotStart, NodesGoOffline) as e: log(e) - attempts += 1 + attempts -= 1 deploy_id = None time.sleep(SLEEP_TIME * attempts) @@ -164,8 +166,7 @@ class Deployment(object): def _deployment_status(self, id): task = self._task_fields(id) if task['status'] == 'error': - if task['message'].endswith( - 'offline. Remove them from environment and try again.'): + if any(msg in task['message'] for msg in ERROR_MSGS): raise NodesGoOffline(task['message']) return task['status'], task['progress'], task['message'] @@ -190,7 +191,7 @@ class Deployment(object): exec_cmd('rm -f /var/log/remote/fuel-snapshot-*', False) exec_cmd('rm -f /root/deploy-*', False) log('Generating Fuel deploy snap-shot') - if exec_cmd('fuel snapshot < /dev/null &> snapshot.log', False)[1] <> 0: + if exec_cmd('fuel snapshot < /dev/null &> snapshot.log', False)[1] != 0: log('Could not create a Fuel snapshot') else: exec_cmd('mv /root/fuel-snapshot* /var/log/remote/', False) |