diff options
-rw-r--r-- | environments/manila-cephfsnative-config.yaml | 18 | ||||
-rwxr-xr-x | extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh | 35 | ||||
-rwxr-xr-x | extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh | 12 | ||||
-rw-r--r-- | extraconfig/tasks/major_upgrade_pacemaker_migrations.sh | 40 | ||||
-rw-r--r-- | overcloud-resource-registry-puppet.yaml | 1 | ||||
-rw-r--r-- | puppet/services/database/mysql.yaml | 6 | ||||
-rw-r--r-- | puppet/services/keystone.yaml | 1 | ||||
-rw-r--r-- | puppet/services/manila-backend-cephfs.yaml | 61 | ||||
-rw-r--r-- | puppet/services/neutron-metadata.yaml | 13 | ||||
-rw-r--r-- | puppet/services/pacemaker/database/mysql.yaml | 9 | ||||
-rw-r--r-- | roles_data.yaml | 1 |
11 files changed, 167 insertions, 30 deletions
diff --git a/environments/manila-cephfsnative-config.yaml b/environments/manila-cephfsnative-config.yaml new file mode 100644 index 00000000..4115d8b2 --- /dev/null +++ b/environments/manila-cephfsnative-config.yaml @@ -0,0 +1,18 @@ +# A Heat environment file which can be used to enable a +# a Manila CephFS Native driver backend. +resource_registry: + OS::Tripleo::Services::ManilaApi: ../puppet/services/manila-api.yaml + OS::Tripleo::Services::ManilaScheduler: ../puppet/services/manila-scheduler.yaml + # Only manila-share is pacemaker managed: + OS::Tripleo::Services::ManilaShare: ../puppet/services/pacemaker/manila-share.yaml + OS::Tripleo::Services::ManilaBackendCephFs: ../puppet/services/manila-backend-cephfs.yaml + + +parameter_defaults: + ManilaCephFSNativeEnableBackend: true + ManilaCephFSNativeBackendName: cephfsnative + ManilaCephFSNativeDriverHandlesShareServers: false + ManilaCephFSNativeCephFSConfPath: '/etc/ceph/cephfs.conf' + ManilaCephFSNativeCephFSAuthId: 'manila' + ManilaCephFSNativeCephFSClusterName: 'ceph' + ManilaCephFSNativeCephFSEnableSnapshots: true diff --git a/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh b/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh index 2490ce27..0c590a42 100755 --- a/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh +++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh @@ -19,7 +19,7 @@ STONITH_STATE=$(pcs property show stonith-enabled | grep "stonith-enabled" | awk pcs property set stonith-enabled=false # Migrate to HA NG -if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then +if [[ -n $(is_bootstrap_node) ]]; then migrate_full_to_ng_ha fi @@ -29,9 +29,26 @@ fi # is going to take a long time because rabbit is down. By having the service stopped # systemctl try-restart is a noop -for $service in $(services_to_migrate); do +for service in $(services_to_migrate); do manage_systemd_service stop "${service%%-clone}" - check_resource_systemd "${service%%-clone}" stopped 600 + # So the reason for not reusing check_resource_systemd is that + # I have observed systemctl is-active returning unknown with at least + # one service that was stopped (See LP 1627254) + timeout=600 + tstart=$(date +%s) + tend=$(( $tstart + $timeout )) + check_interval=3 + while (( $(date +%s) < $tend )); do + if [[ "$(systemctl is-active ${service%%-clone})" = "active" ]]; then + echo "$service still active, sleeping $check_interval seconds." + sleep $check_interval + else + # we do not care if it is inactive, unknown or failed as long as it is + # not running + break + fi + + done done # In case the mysql package is updated, the database on disk must be @@ -46,7 +63,7 @@ done # on mysql package versionning, but this can be overriden manually # to support specific upgrade scenario -if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then +if [[ -n $(is_bootstrap_node) ]]; then if [ $DO_MYSQL_UPGRADE -eq 1 ]; then mysqldump $backup_flags > "$MYSQL_BACKUP_DIR/openstack_database.sql" cp -rdp /etc/my.cnf* "$MYSQL_BACKUP_DIR" @@ -68,7 +85,7 @@ if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname) fi -# Swift isn't controled by pacemaker +# Swift isn't controlled by pacemaker systemctl_swift stop tstart=$(date +%s) @@ -151,5 +168,13 @@ fi # Pin messages sent to compute nodes to kilo, these will be upgraded later crudini --set /etc/nova/nova.conf upgrade_levels compute "$upgrade_level_nova_compute" +# https://bugzilla.redhat.com/show_bug.cgi?id=1284047 +# Change-Id: Ib3f6c12ff5471e1f017f28b16b1e6496a4a4b435 +crudini --set /etc/ceilometer/ceilometer.conf DEFAULT rpc_backend rabbit +# https://bugzilla.redhat.com/show_bug.cgi?id=1284058 +# Ifd1861e3df46fad0e44ff9b5cbd58711bbc87c97 Swift Ceilometer middleware no longer exists +crudini --set /etc/swift/proxy-server.conf pipeline:main pipeline "catch_errors healthcheck cache ratelimit tempurl formpost authtoken keystone staticweb proxy-logging proxy-server" +# LP: 1615035, required only for M/N upgrade. +crudini --set /etc/nova/nova.conf DEFAULT scheduler_host_manager host_manager crudini --set /etc/sahara/sahara.conf DEFAULT plugins ambari,cdh,mapr,vanilla,spark,storm diff --git a/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh b/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh index 6bb2fa73..6055a3f9 100755 --- a/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh +++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh @@ -32,6 +32,13 @@ fi start_or_enable_service galera check_resource galera started 600 +# We need mongod which is now a systemd service up and running before calling +# ceilometer-dbsync. There is still a race here: mongod might not be up on all nodes +# so ceilometer-dbsync will fail a couple of times before that. As it retries indefinitely +# we should be good. +# Due to LP Bug https://bugs.launchpad.net/tripleo/+bug/1627254 am using systemctl directly atm +systemctl start mongod +check_resource mongod started 600 if [[ -n $(is_bootstrap_node) ]]; then tstart=$(date +%s) @@ -53,6 +60,7 @@ if [[ -n $(is_bootstrap_node) ]]; then keystone-manage db_sync neutron-db-manage --config-file /etc/neutron/neutron.conf --config-file /etc/neutron/plugin.ini upgrade head nova-manage db sync + nova-manage api_db sync #TODO(marios):someone from sahara needs to check this: # sahara-db-manage --config-file /etc/sahara/sahara.conf upgrade head fi @@ -68,7 +76,7 @@ systemctl_swift start # We need to start the systemd services we explicitely stopped at step _1.sh # FIXME: Should we let puppet during the convergence step do the service enabling or # should we add it here? -for $service in $(services_to_migrate); do - manage_systemd_service stop "${service%%-clone}" +for service in $(services_to_migrate); do + manage_systemd_service start "${service%%-clone}" check_resource_systemd "${service%%-clone}" started 600 done diff --git a/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh b/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh index b8c5321b..d974bb79 100644 --- a/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh +++ b/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh @@ -109,10 +109,11 @@ function services_to_migrate { # during the conversion # 2. Remove all the colocation constraints and then the ordering constraints, except the # ones related to haproxy/VIPs which exist in Newton as well -# 3. Remove all the resources that won't be managed by pacemaker in newton. Note that they -# will show up as ORPHANED but they will keep running normally via systemd. They will be -# enabled to start at boot by puppet during the converge step -# 4. Take the cluster out of maintenance-mode and do a resource cleanup +# 3. Take the cluster out of maintenance-mode and do a resource cleanup +# 4. Remove all the resources that won't be managed by pacemaker in newton. The +# outcome will be +# that they are stopped and removed from pacemakers control +# 5. Do a resource cleanup to make sure the cluster is in a clean state function migrate_full_to_ng_ha { if [[ -n $(pcmk_running) ]]; then pcs property set maintenance-mode=true @@ -135,32 +136,35 @@ function migrate_full_to_ng_ha { log_debug "Deleting ordering constraint $constraint from CIB" pcs constraint remove "$constraint" done + # At this stage all the pacemaker resources are removed from the CIB. + # Once we remove the maintenance-mode those systemd resources will keep + # on running. They shall be systemd enabled via the puppet converge + # step later on + pcs property set maintenance-mode=false # At this stage there are no constraints whatsoever except the haproxy/ip ones - # which we want to keep. We now delete each resource that will move to systemd - # Note that the corresponding systemd resource will stay running, which means that - # later when we do the "yum update", things will be a bit slower because each - # "systemctl try-restart <service>" is not a no-op any longer because the service is up - # and running and it will be restarted with rabbitmq being down. + # which we want to keep. We now disable and then delete each resource + # that will move to systemd. + # We want the systemd resources be stopped before doing "yum update", + # that way "systemctl try-restart <service>" is no-op because the + # service was down already PCS_STATUS_OUTPUT="$(pcs status)" for resource in $(services_to_migrate) "delay-clone" "openstack-core-clone"; do if echo "$PCS_STATUS_OUTPUT" | grep "$resource"; then log_debug "Deleting $resource from the CIB" - - # We need to add --force because the cluster is in maintenance mode and the resource - # is unmanaged. The if serves to make this idempotent + if ! pcs resource disable "$resource" --wait=600; then + echo_error "ERROR: resource $resource failed to be disabled" + exit 1 + fi pcs resource delete --force "$resource" else log_debug "Service $service not found as a pacemaker resource, not trying to delete." fi done - # At this stage all the pacemaker resources are removed from the CIB. Once we remove the - # maintenance-mode those systemd resources will keep on running. They shall be systemd enabled - # via the puppet converge step later on - pcs property set maintenance-mode=false - # We need to do a pcs resource cleanup here + crm_resource --wait to make sure the - # cluster is in a clean state before we stop everything, upgrade and restart everything + # We need to do a pcs resource cleanup here + crm_resource --wait to + # make sure the cluster is in a clean state before we stop everything, + # upgrade and restart everything pcs resource cleanup # We are making sure here that the cluster is stable before proceeding if ! timeout -k 10 600 crm_resource --wait; then diff --git a/overcloud-resource-registry-puppet.yaml b/overcloud-resource-registry-puppet.yaml index 505f033d..f0a6035a 100644 --- a/overcloud-resource-registry-puppet.yaml +++ b/overcloud-resource-registry-puppet.yaml @@ -220,6 +220,7 @@ resource_registry: OS::Tripleo::Services::ManilaShare: OS::Heat::None OS::Tripleo::Services::ManilaBackendGeneric: OS::Heat::None OS::Tripleo::Services::ManilaBackendNetapp: OS::Heat::None + OS::Tripleo::Services::ManilaBackendCephFs: OS::Heat::None OS::TripleO::Services::ComputeNeutronL3Agent: OS::Heat::None OS::TripleO::Services::ComputeNeutronMetadataAgent: OS::Heat::None OS::TripleO::Services::AodhApi: puppet/services/aodh-api.yaml diff --git a/puppet/services/database/mysql.yaml b/puppet/services/database/mysql.yaml index b0eea481..094a7c9f 100644 --- a/puppet/services/database/mysql.yaml +++ b/puppet/services/database/mysql.yaml @@ -74,5 +74,11 @@ outputs: # internal_api_uri -> [IP] # internal_api_subnet - > IP/CIDR mysql_bind_host: {get_param: [ServiceNetMap, MysqlNetwork]} + tripleo::profile::base::database::mysql::bind_address: + str_replace: + template: + '"%{::fqdn_$NETWORK}"' + params: + $NETWORK: {get_param: [ServiceNetMap, MysqlNetwork]} step_config: | include ::tripleo::profile::base::database::mysql diff --git a/puppet/services/keystone.yaml b/puppet/services/keystone.yaml index e358930b..b7a807fa 100644 --- a/puppet/services/keystone.yaml +++ b/puppet/services/keystone.yaml @@ -118,7 +118,6 @@ outputs: logging_groups: - keystone config_settings: - config_settings: map_merge: - get_attr: [ApacheServiceBase, role_data, config_settings] - keystone::database_connection: diff --git a/puppet/services/manila-backend-cephfs.yaml b/puppet/services/manila-backend-cephfs.yaml new file mode 100644 index 00000000..89a36d21 --- /dev/null +++ b/puppet/services/manila-backend-cephfs.yaml @@ -0,0 +1,61 @@ +heat_template_version: 2016-04-08 + +description: > + Openstack Manila Cephfs backend + +parameters: + ServiceNetMap: + default: {} + description: Mapping of service_name -> network name. Typically set + via parameter_defaults in the resource registry. This + mapping overrides those in ServiceNetMapDefaults. + type: json + DefaultPasswords: + default: {} + type: json + EndpointMap: + default: {} + description: Mapping of service endpoint -> protocol. Typically set + via parameter_defaults in the resource registry. + type: json + # CephFS Native backend params: + ManilaCephFSNativeEnableBackend: + type: boolean + default: false + ManilaCephFSNativeBackendName: + type: string + default: cephfsnative + ManilaCephFSNativeDriverHandlesShareServers: + type: boolean + default: false + ManilaCephFSNativeShareBackendName: + type: string + default: 'cephfs' + ManilaCephFSNativeCephFSConfPath: + type: string + default: '/etc/ceph/cephfs.conf' + ManilaCephFSNativeCephFSAuthId: + type: string + default: 'manila' + ManilaCephFSNativeCephFSClusterName: + type: string + default: 'ceph' + ManilaCephFSNativeCephFSEnableSnapshots: + type: boolean + default: true + +outputs: + role_data: + description: Role data for the Manila Cephfs backend. + value: + service_name: manila_backend_cephfs + config_settings: + manila::backend::cephfsnative::enable_backend: {get_param: ManilaCephFSNativeEnableBackend} + manila::backend::cephfsnative::title: {get_param: ManilaCephFSNativeBackendName} + manila::backend::cephfsnative::driver_handles_share_servers: {get_param: ManilaCephFSNativeDriverHandlesShareServers} + manila::backend::cephfsnative::share_backend_name: {get_param: ManilaCephFSNativeShareBackendName} + manila::backend::cephfsnative::cephfs_conf_path: {get_param: ManilaCephFSNativeCephFSConfPath} + manila::backend::cephfsnative::cephfs_auth_id: {get_param: ManilaCephFSNativeCephFSAuthId} + manila::backend::cephfsnative::cephfs_cluster_name: {get_param: ManilaCephFSNativeCephFSClusterName} + manila::backend::cephfsnative::cephfs_enable_snapshots: {get_param: ManilaCephFSNativeCephFSEnableSnapshots} + step_config: diff --git a/puppet/services/neutron-metadata.yaml b/puppet/services/neutron-metadata.yaml index a124d4a1..8be4c6d6 100644 --- a/puppet/services/neutron-metadata.yaml +++ b/puppet/services/neutron-metadata.yaml @@ -23,9 +23,16 @@ parameters: type: string hidden: true NeutronWorkers: - default: 0 - description: Number of workers for Neutron service. - type: number + default: '' + description: | + Sets the number of worker processes for the neutron metadata agent. The + default value results in the configuration being left unset and a + system-dependent default will be chosen (usually the number of + processors). Please note that this can result in a large number of + processes and memory consumption on systems with a large core count. On + such systems it is recommended that a non-default value be selected that + matches the load requirements. + type: string NeutronPassword: description: The password for the neutron service and db account, used by neutron agents. type: string diff --git a/puppet/services/pacemaker/database/mysql.yaml b/puppet/services/pacemaker/database/mysql.yaml index d555ed0a..f6d4be20 100644 --- a/puppet/services/pacemaker/database/mysql.yaml +++ b/puppet/services/pacemaker/database/mysql.yaml @@ -35,6 +35,13 @@ outputs: value: service_name: mysql config_settings: - get_attr: [MysqlBase, role_data, config_settings] + map_merge: + - get_attr: [MysqlBase, role_data, config_settings] + - tripleo::profile::pacemaker::database::mysql::bind_address: + str_replace: + template: + '"%{::fqdn_$NETWORK}"' + params: + $NETWORK: {get_param: [ServiceNetMap, MysqlNetwork]} step_config: | include ::tripleo::profile::pacemaker::database::mysql diff --git a/roles_data.yaml b/roles_data.yaml index e052aeef..fe98d827 100644 --- a/roles_data.yaml +++ b/roles_data.yaml @@ -77,6 +77,7 @@ - OS::Tripleo::Services::ManilaScheduler - OS::Tripleo::Services::ManilaBackendGeneric - OS::Tripleo::Services::ManilaBackendNetapp + - OS::Tripleo::Services::ManilaBackendCephFs - OS::Tripleo::Services::ManilaShare - OS::TripleO::Services::AodhApi - OS::TripleO::Services::AodhEvaluator |