diff options
author | carey.xu <carey.xuhan@huawei.com> | 2016-01-21 16:19:08 +0800 |
---|---|---|
committer | Justin chi <chigang@huawei.com> | 2016-01-21 13:16:06 +0000 |
commit | 9e2cecc26fef35eb0afffed0075d33d83ff48868 (patch) | |
tree | 41427d1fa6b5aaa380946a9c0413214ab9caec4a | |
parent | a9a13f03f0aa0896d1504b54f220e9306249ee4c (diff) |
bugfix: for sometimes installation hang up
Change-Id: Idd266cf986e0546b0b80d6486698d340c530ff37
Signed-off-by: carey.xu <carey.xuhan@huawei.com>
(cherry picked from commit 8b1206a666d2d825a8aed654e07e0b60c9470d93)
-rw-r--r-- | deploy/client.py | 73 |
1 files changed, 37 insertions, 36 deletions
diff --git a/deploy/client.py b/deploy/client.py index 62d12885..174356e0 100644 --- a/deploy/client.py +++ b/deploy/client.py @@ -835,48 +835,49 @@ class CompassClient(object): raise RuntimeError("redeploy cluster failed") def get_installing_progress(self, cluster_id): - """get intalling progress.""" - action_timeout = time.time() + 60 * float(CONF.action_timeout) - deployment_timeout = time.time() + 60 * float( - CONF.deployment_timeout) - - current_time = time.time - deployment_failed = True - while current_time() < deployment_timeout: - status, cluster_state = self.client.get_cluster_state(cluster_id) - if not self.is_ok(status): - raise RuntimeError("can not get cluster state") + def _get_installing_progress(): + """get intalling progress.""" + action_timeout = time.time() + 60 * float(CONF.action_timeout) + deployment_timeout = time.time() + 60 * float( + CONF.deployment_timeout) + + current_time = time.time + while current_time() < deployment_timeout: + status, cluster_state = self.client.get_cluster_state(cluster_id) + if not self.is_ok(status): + LOG.error("can not get cluster state") - if cluster_state['state'] in ['UNINITIALIZED', 'INITIALIZED']: - if current_time() >= action_timeout: - deployment_failed = True + # maybe a transient error? + time.sleep(5) + status, cluster_state = self.client.get_cluster_state(cluster_id) + if not self.is_ok(status): + # OK, there's something wrong + raise RuntimeError("can not get cluster state") + + if cluster_state['state'] in ['UNINITIALIZED', 'INITIALIZED']: + if current_time() >= action_timeout: + raise RuntimeError("installation timeout") + else: + time.sleep(5) + continue + + elif cluster_state['state'] == 'SUCCESSFUL': LOG.info( 'get cluster %s state status %s: %s, successful', cluster_id, status, cluster_state ) break - else: - time.sleep(5) - continue - - elif cluster_state['state'] == 'SUCCESSFUL': - deployment_failed = False - LOG.info( - 'get cluster %s state status %s: %s, successful', - cluster_id, status, cluster_state - ) - break - elif cluster_state['state'] == 'ERROR': - deployment_failed = True - LOG.info( - 'get cluster %s state status %s: %s, error', - cluster_id, status, cluster_state - ) - break - - kill_print_proc() - if deployment_failed: - raise RuntimeError("deploy cluster failed") + elif cluster_state['state'] == 'ERROR': + raise RuntimeError( + 'get cluster %s state status %s: %s, error', + (cluster_id, status, cluster_state) + ) + try: + _get_installing_progress() + finally: + # do this twice, make sure process be killed + kill_print_proc() + kill_print_proc() def check_dashboard_links(self, cluster_id): dashboard_url = CONF.dashboard_url |