diff options
Diffstat (limited to 'extraconfig')
23 files changed, 890 insertions, 62 deletions
diff --git a/extraconfig/all_nodes/swap-partition.yaml b/extraconfig/all_nodes/swap-partition.yaml new file mode 100644 index 00000000..89a2adb0 --- /dev/null +++ b/extraconfig/all_nodes/swap-partition.yaml @@ -0,0 +1,90 @@ +heat_template_version: 2014-10-16 + +description: > + Extra config to add swap space to nodes. + +# Parameters passed from the parent template - note if you maintain +# out-of-tree templates they may require additional parameters if the +# in-tree templates add a new role. +parameters: + controller_servers: + type: json + compute_servers: + type: json + blockstorage_servers: + type: json + objectstorage_servers: + type: json + cephstorage_servers: + type: json + swap_partition_label: + type: string + description: Swap partition label + default: 'swap1' + + +resources: + + SwapConfig: + type: OS::Heat::SoftwareConfig + properties: + group: script + config: | + #!/bin/bash + set -eux + swap_partition=$(realpath /dev/disk/by-label/$swap_partition_label) + swapon $swap_partition + echo "$swap_partition swap swap defaults 0 0" >> /etc/fstab + inputs: + - name: swap_partition_label + description: Swap partition label + default: 'swap1' + + ControllerSwapDeployment: + type: OS::Heat::SoftwareDeploymentGroup + properties: + config: {get_resource: SwapConfig} + servers: {get_param: controller_servers} + input_values: + swap_partition_label: {get_param: swap_partition_label} + actions: ["CREATE"] + + ComputeSwapDeployment: + type: OS::Heat::SoftwareDeploymentGroup + properties: + config: {get_resource: SwapConfig} + servers: {get_param: compute_servers} + input_values: + swap_partition_label: {get_param: swap_partition_label} + actions: ["CREATE"] + + BlockStorageSwapDeployment: + type: OS::Heat::SoftwareDeploymentGroup + properties: + config: {get_resource: SwapConfig} + servers: {get_param: blockstorage_servers} + input_values: + swap_partition_label: {get_param: swap_partition_label} + actions: ["CREATE"] + + ObjectStorageSwapDeployment: + type: OS::Heat::SoftwareDeploymentGroup + properties: + config: {get_resource: SwapConfig} + servers: {get_param: objectstorage_servers} + input_values: + swap_partition_label: {get_param: swap_partition_label} + actions: ["CREATE"] + + CephStorageSwapDeployment: + type: OS::Heat::SoftwareDeploymentGroup + properties: + config: {get_resource: SwapConfig} + servers: {get_param: cephstorage_servers} + input_values: + swap_partition_label: {get_param: swap_partition_label} + actions: ["CREATE"] + +outputs: + config_identifier: + value: none diff --git a/extraconfig/all_nodes/swap.yaml b/extraconfig/all_nodes/swap.yaml new file mode 100644 index 00000000..374b1e5d --- /dev/null +++ b/extraconfig/all_nodes/swap.yaml @@ -0,0 +1,108 @@ +heat_template_version: 2014-10-16 + +description: > + Extra config to add swap space to nodes. + +# Parameters passed from the parent template - note if you maintain +# out-of-tree templates they may require additional parameters if the +# in-tree templates add a new role. +parameters: + controller_servers: + type: json + compute_servers: + type: json + blockstorage_servers: + type: json + objectstorage_servers: + type: json + cephstorage_servers: + type: json + swap_size_megabytes: + type: string + description: Amount of swap space to allocate in megabytes + default: '4096' + swap_path: + type: string + description: Full path to location of swap file + default: '/swap' + + +resources: + + SwapConfig: + type: OS::Heat::SoftwareConfig + properties: + group: script + config: | + #!/bin/bash + set -eux + if [ ! -f $swap_path ]; then + dd if=/dev/zero of=$swap_path count=$swap_size_megabytes bs=1M + chmod 0600 $swap_path + mkswap $swap_path + swapon $swap_path + else + echo "$swap_path already exists" + fi + echo "$swap_path swap swap defaults 0 0" >> /etc/fstab + inputs: + - name: swap_size_megabytes + description: Amount of swap space to allocate in megabytes + default: '4096' + - name: swap_path + description: Full path to location of swap file + default: '/swap' + + ControllerSwapDeployment: + type: OS::Heat::SoftwareDeploymentGroup + properties: + config: {get_resource: SwapConfig} + servers: {get_param: controller_servers} + input_values: + swap_size_megabytes: {get_param: swap_size_megabytes} + swap_path: {get_param: swap_path} + actions: ["CREATE"] + + ComputeSwapDeployment: + type: OS::Heat::SoftwareDeploymentGroup + properties: + config: {get_resource: SwapConfig} + servers: {get_param: compute_servers} + input_values: + swap_size_megabytes: {get_param: swap_size_megabytes} + swap_path: {get_param: swap_path} + actions: ["CREATE"] + + BlockStorageSwapDeployment: + type: OS::Heat::SoftwareDeploymentGroup + properties: + config: {get_resource: SwapConfig} + servers: {get_param: blockstorage_servers} + input_values: + swap_size_megabytes: {get_param: swap_size_megabytes} + swap_path: {get_param: swap_path} + actions: ["CREATE"] + + ObjectStorageSwapDeployment: + type: OS::Heat::SoftwareDeploymentGroup + properties: + config: {get_resource: SwapConfig} + servers: {get_param: objectstorage_servers} + input_values: + swap_size_megabytes: {get_param: swap_size_megabytes} + swap_path: {get_param: swap_path} + actions: ["CREATE"] + + CephStorageSwapDeployment: + type: OS::Heat::SoftwareDeploymentGroup + properties: + config: {get_resource: SwapConfig} + servers: {get_param: cephstorage_servers} + input_values: + swap_size_megabytes: {get_param: swap_size_megabytes} + swap_path: {get_param: swap_path} + actions: ["CREATE"] + +outputs: + config_identifier: + value: none diff --git a/extraconfig/pre_deploy/rhel-registration/environment-rhel-registration.yaml b/extraconfig/pre_deploy/rhel-registration/environment-rhel-registration.yaml index 70437a8a..c388358a 100644 --- a/extraconfig/pre_deploy/rhel-registration/environment-rhel-registration.yaml +++ b/extraconfig/pre_deploy/rhel-registration/environment-rhel-registration.yaml @@ -20,3 +20,4 @@ parameter_defaults: rhel_reg_user: "" rhel_reg_type: "" rhel_reg_method: "" + rhel_reg_sat_repo: "rhel-7-server-satellite-tools-6.1-rpms" diff --git a/extraconfig/pre_deploy/rhel-registration/rhel-registration.yaml b/extraconfig/pre_deploy/rhel-registration/rhel-registration.yaml index a884bdae..7c65bd8b 100644 --- a/extraconfig/pre_deploy/rhel-registration/rhel-registration.yaml +++ b/extraconfig/pre_deploy/rhel-registration/rhel-registration.yaml @@ -43,6 +43,8 @@ parameters: type: string rhel_reg_method: type: string + rhel_reg_sat_repo: + type: string resources: @@ -68,6 +70,7 @@ resources: - name: REG_USER - name: REG_TYPE - name: REG_METHOD + - name: REG_SAT_REPO config: {get_file: scripts/rhel-registration} RHELRegistrationDeployment: @@ -95,6 +98,7 @@ resources: REG_USER: {get_param: rhel_reg_user} REG_TYPE: {get_param: rhel_reg_type} REG_METHOD: {get_param: rhel_reg_method} + REG_SAT_REPO: {get_param: rhel_reg_sat_repo} RHELUnregistration: type: OS::Heat::SoftwareConfig diff --git a/extraconfig/pre_deploy/rhel-registration/scripts/rhel-registration b/extraconfig/pre_deploy/rhel-registration/scripts/rhel-registration index cbbd6a1d..1c9acd2b 100644 --- a/extraconfig/pre_deploy/rhel-registration/scripts/rhel-registration +++ b/extraconfig/pre_deploy/rhel-registration/scripts/rhel-registration @@ -13,8 +13,9 @@ fi opts= attach_opts= +sat5_opts= repos="repos --enable rhel-7-server-rpms" -satellite_repo="rhel-7-server-rh-common-rpms" +satellite_repo=${REG_SAT_REPO} if [ -n "${REG_AUTO_ATTACH:-}" ]; then opts="$opts --auto-attach" @@ -49,6 +50,7 @@ fi if [ -n "${REG_FORCE:-}" ]; then opts="$opts --force" + sat5_opts="$sat5_opts --force" fi if [ -n "${REG_SERVER_URL:-}" ]; then @@ -57,6 +59,7 @@ fi if [ -n "${REG_ACTIVATION_KEY:-}" ]; then opts="$opts --activationkey=$REG_ACTIVATION_KEY" + sat5_opts="$sat5_opts --activationkey=$REG_ACTIVATION_KEY" if [ -z "${REG_ORG:-}" ]; then echo "WARNING: REG_ACTIVATION_KEY set without REG_ORG." @@ -75,10 +78,12 @@ fi if [ -n "${REG_MACHINE_NAME:-}" ]; then opts="$opts --name $REG_MACHINE_NAME" + sat5_opts="$sat5_opts --profilename=$REG_MACHINE_NAME" fi if [ -n "${REG_ORG:-}" ]; then opts="$opts --org=$REG_ORG" + sat5_opts="$sat5_opts --systemorgid=$REG_ORG" fi if [ -n "${REG_REPOS:-}" ]; then @@ -91,6 +96,20 @@ if [ -n "${REG_TYPE:-}" ]; then opts="$opts --type=$REG_TYPE" fi +function detect_satellite_version { + ping_api=$REG_SAT_URL/katello/api/ping + if curl -L -k -s -D - -o /dev/null $ping_api | grep "200 OK"; then + echo Satellite 6 detected at $REG_SAT_URL + satellite_version=6 + elif curl -L -k -s -D - -o /dev/null $REG_SAT_URL/rhn/Login.do | grep "200 OK"; then + echo Satellite 5 detected at $REG_SAT_URL + satellite_version=5 + else + echo No Satellite detected at $REG_SAT_URL + exit 1 + fi +} + case "${REG_METHOD:-}" in portal) subscription-manager register $opts @@ -100,13 +119,22 @@ case "${REG_METHOD:-}" in subscription-manager $repos ;; satellite) - repos="$repos --enable ${satellite_repo}" - rpm -Uvh "$REG_SAT_URL/pub/katello-ca-consumer-latest.noarch.rpm" || true - subscription-manager register $opts - subscription-manager $repos - yum install -y katello-agent || true # needed for errata reporting to satellite6 - katello-package-upload - subscription-manager repos --disable ${satellite_repo} + detect_satellite_version + if [ "$satellite_version" = "6" ]; then + repos="$repos --enable ${satellite_repo}" + curl -L -k -O "$REG_SAT_URL/pub/katello-ca-consumer-latest.noarch.rpm" + rpm -Uvh katello-ca-consumer-latest.noarch.rpm || true + subscription-manager register $opts + subscription-manager $repos + yum install -y katello-agent || true # needed for errata reporting to satellite6 + katello-package-upload + subscription-manager repos --disable ${satellite_repo} + else + pushd /usr/share/rhn/ + curl -k -O $REG_SAT_URL/pub/RHN-ORG-TRUSTED-SSL-CERT + popd + rhnreg_ks --serverUrl=$REG_SAT_URL/XMLRPC $sat5_opts + fi ;; disable) echo "Disabling RHEL registration" diff --git a/extraconfig/pre_deploy/rhel-registration/scripts/rhel-unregistration b/extraconfig/pre_deploy/rhel-registration/scripts/rhel-unregistration index 1e72e0a6..916f97e3 100644 --- a/extraconfig/pre_deploy/rhel-registration/scripts/rhel-unregistration +++ b/extraconfig/pre_deploy/rhel-registration/scripts/rhel-unregistration @@ -7,6 +7,8 @@ case "${REG_METHOD:-}" in portal|satellite) # Allow unregistration to fail. # We don't want to fail stack deletes if unregistration fails. + # Note that this will be a no-op on satellite 5, which doesn't support + # unregistering from the cli. subscription-manager unregister || true subscription-manager clean || true ;; diff --git a/extraconfig/tasks/major_upgrade_block_storage.sh b/extraconfig/tasks/major_upgrade_block_storage.sh new file mode 100644 index 00000000..07666245 --- /dev/null +++ b/extraconfig/tasks/major_upgrade_block_storage.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# +# This runs an upgrade of Cinder Block Storage nodes. +# +set -eu + +yum -y install python-zaqarclient # needed for os-collect-config +yum -y -q update diff --git a/extraconfig/tasks/major_upgrade_ceph_storage.sh b/extraconfig/tasks/major_upgrade_ceph_storage.sh new file mode 100644 index 00000000..de42b16d --- /dev/null +++ b/extraconfig/tasks/major_upgrade_ceph_storage.sh @@ -0,0 +1,35 @@ +#!/bin/bash +# +# This delivers the ceph-storage upgrade script to be invoked as part of the tripleo +# major upgrade workflow. +# +set -eu + +UPGRADE_SCRIPT=/root/tripleo_upgrade_node.sh + +cat > $UPGRADE_SCRIPT << ENDOFCAT +### DO NOT MODIFY THIS FILE +### This file is automatically delivered to the ceph-storage nodes as part of the +### tripleo upgrades workflow + + +function systemctl_ceph { + action=\$1 + systemctl \$action ceph +} + +# "so that mirrors aren't rebalanced as if the OSD died" - gfidente +ceph osd set noout + +systemctl_ceph stop +yum -y install python-zaqarclient # needed for os-collect-config +yum -y update +systemctl_ceph start + +ceph osd unset noout + +ENDOFCAT + +# ensure the permissions are OK +chmod 0755 $UPGRADE_SCRIPT + diff --git a/extraconfig/tasks/major_upgrade_compute.sh b/extraconfig/tasks/major_upgrade_compute.sh new file mode 100644 index 00000000..78628c8c --- /dev/null +++ b/extraconfig/tasks/major_upgrade_compute.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# +# This delivers the compute upgrade script to be invoked as part of the tripleo +# major upgrade workflow. +# +set -eu + +UPGRADE_SCRIPT=/root/tripleo_upgrade_node.sh + +cat > $UPGRADE_SCRIPT << ENDOFCAT +### DO NOT MODIFY THIS FILE +### This file is automatically delivered to the compute nodes as part of the +### tripleo upgrades workflow + +# pin nova to kilo (messaging +-1) for the nova-compute service + +crudini --set /etc/nova/nova.conf upgrade_levels compute $upgrade_level_nova_compute + +yum -y install python-zaqarclient # needed for os-collect-config +yum -y update + +ENDOFCAT + +# ensure the permissions are OK +chmod 0755 $UPGRADE_SCRIPT + diff --git a/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh b/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh new file mode 100755 index 00000000..bf2ee330 --- /dev/null +++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_1.sh @@ -0,0 +1,55 @@ +#!/bin/bash + +set -eu + +cluster_sync_timeout=600 + +if pcs status 2>&1 | grep -E '(cluster is not currently running)|(OFFLINE:)'; then + echo_error "ERROR: upgrade cannot start with some cluster nodes being offline" + exit 1 +fi + +if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then + pcs resource disable httpd + check_resource httpd stopped 1800 + if pcs status | grep openstack-keystone; then + pcs resource disable openstack-keystone + check_resource openstack-keystone stopped 1800 + fi + pcs resource disable redis + check_resource redis stopped 600 + pcs resource disable mongod + check_resource mongod stopped 600 + pcs resource disable rabbitmq + check_resource rabbitmq stopped 600 + pcs resource disable memcached + check_resource memcached stopped 600 + pcs resource disable galera + check_resource galera stopped 600 + pcs cluster stop --all +fi + +# Swift isn't controled by pacemaker +systemctl_swift stop + +tstart=$(date +%s) +while systemctl is-active pacemaker; do + sleep 5 + tnow=$(date +%s) + if (( tnow-tstart > cluster_sync_timeout )) ; then + echo_error "ERROR: cluster shutdown timed out" + exit 1 + fi +done + +yum -y install python-zaqarclient # needed for os-collect-config +yum -y -q update + +# Pin messages sent to compute nodes to kilo, these will be upgraded later +crudini --set /etc/nova/nova.conf upgrade_levels compute "$upgrade_level_nova_compute" +# https://bugzilla.redhat.com/show_bug.cgi?id=1284047 +# Change-Id: Ib3f6c12ff5471e1f017f28b16b1e6496a4a4b435 +crudini --set /etc/ceilometer/ceilometer.conf DEFAULT rpc_backend rabbit +# https://bugzilla.redhat.com/show_bug.cgi?id=1284058 +# Ifd1861e3df46fad0e44ff9b5cbd58711bbc87c97 Swift Ceilometer middleware no longer exists +crudini --set /etc/swift/proxy-server.conf pipeline:main pipeline "catch_errors healthcheck cache ratelimit tempurl formpost authtoken keystone staticweb proxy-logging proxy-server" diff --git a/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh b/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh new file mode 100755 index 00000000..10bea573 --- /dev/null +++ b/extraconfig/tasks/major_upgrade_controller_pacemaker_2.sh @@ -0,0 +1,67 @@ +#!/bin/bash + +set -eu + +cluster_form_timeout=600 +cluster_settle_timeout=600 +galera_sync_timeout=600 + +if [ "$(hiera -c /etc/puppet/hiera.yaml bootstrap_nodeid)" = "$(facter hostname)" ]; then + pcs cluster start --all + + tstart=$(date +%s) + while pcs status 2>&1 | grep -E '(cluster is not currently running)|(OFFLINE:)'; do + sleep 5 + tnow=$(date +%s) + if (( tnow-tstart > cluster_form_timeout )) ; then + echo_error "ERROR: timed out forming the cluster" + exit 1 + fi + done + + if ! timeout -k 10 $cluster_settle_timeout crm_resource --wait; then + echo_error "ERROR: timed out waiting for cluster to finish transition" + exit 1 + fi + + pcs resource enable galera + check_resource galera started 600 + pcs resource enable mongod + check_resource mongod started 600 + + tstart=$(date +%s) + while ! clustercheck; do + sleep 5 + tnow=$(date +%s) + if (( tnow-tstart > galera_sync_timeout )) ; then + echo_error "ERROR galera sync timed out" + exit 1 + fi + done + + # Run all the db syncs + # TODO: check if this can be triggered in puppet and removed from here + ceilometer-dbsync --config-file=/etc/ceilometer/ceilometer.conf + cinder-manage db sync + glance-manage --config-file=/etc/glance/glance-registry.conf db_sync + heat-manage --config-file /etc/heat/heat.conf db_sync + keystone-manage db_sync + neutron-db-manage --config-file /etc/neutron/neutron.conf --config-file /etc/neutron/plugin.ini upgrade head + nova-manage db sync + + pcs resource enable memcached + check_resource memcached started 600 + pcs resource enable rabbitmq + check_resource rabbitmq started 600 + pcs resource enable redis + check_resource redis started 600 + if pcs status | grep openstack-keystone; then + pcs resource enable openstack-keystone + check_resource openstack-keystone started 1800 + fi + pcs resource enable httpd + check_resource httpd started 1800 +fi + +# Swift isn't controled by heat +systemctl_swift start diff --git a/extraconfig/tasks/major_upgrade_object_storage.sh b/extraconfig/tasks/major_upgrade_object_storage.sh new file mode 100644 index 00000000..931f4f42 --- /dev/null +++ b/extraconfig/tasks/major_upgrade_object_storage.sh @@ -0,0 +1,39 @@ +#!/bin/bash +# +# This delivers the swift-storage upgrade script to be invoked as part of the tripleo +# major upgrade workflow. +# +set -eu + +UPGRADE_SCRIPT=/root/tripleo_upgrade_node.sh + +cat > $UPGRADE_SCRIPT << ENDOFCAT +### DO NOT MODIFY THIS FILE +### This file is automatically delivered to the swift-storage nodes as part of the +### tripleo upgrades workflow + + +function systemctl_swift { + action=\$1 + for S in openstack-swift-account-auditor openstack-swift-account-reaper openstack-swift-account-replicator openstack-swift-account \ + openstack-swift-container-auditor openstack-swift-container-replicator openstack-swift-container-updater openstack-swift-container \ + openstack-swift-object-auditor openstack-swift-object-replicator openstack-swift-object-updater openstack-swift-object; do + systemctl \$action \$S + done +} + + +systemctl_swift stop + +yum -y install python-zaqarclient # needed for os-collect-config +yum -y update + +systemctl_swift start + + + +ENDOFCAT + +# ensure the permissions are OK +chmod 0755 $UPGRADE_SCRIPT + diff --git a/extraconfig/tasks/major_upgrade_pacemaker.yaml b/extraconfig/tasks/major_upgrade_pacemaker.yaml new file mode 100644 index 00000000..4af3186c --- /dev/null +++ b/extraconfig/tasks/major_upgrade_pacemaker.yaml @@ -0,0 +1,85 @@ +heat_template_version: 2014-10-16 +description: 'Upgrade for Pacemaker deployments' + +parameters: + controller_servers: + type: json + compute_servers: + type: json + blockstorage_servers: + type: json + objectstorage_servers: + type: json + cephstorage_servers: + type: json + input_values: + type: json + description: input values for the software deployments + + UpgradeLevelNovaCompute: + type: string + description: Nova Compute upgrade level + default: '' + +resources: + # TODO(jistr): for Mitaka->Newton upgrades and further we can use + # map_merge with input_values instead of feeding params into scripts + # via str_replace on bash snippets + + ControllerPacemakerUpgradeConfig_Step1: + type: OS::Heat::SoftwareConfig + properties: + group: script + config: + list_join: + - '' + - - str_replace: + template: | + #!/bin/bash + upgrade_level_nova_compute='UPGRADE_LEVEL_NOVA_COMPUTE' + params: + UPGRADE_LEVEL_NOVA_COMPUTE: {get_param: UpgradeLevelNovaCompute} + - get_file: pacemaker_common_functions.sh + - get_file: major_upgrade_pacemaker_migrations.sh + - get_file: major_upgrade_controller_pacemaker_1.sh + + ControllerPacemakerUpgradeDeployment_Step1: + type: OS::Heat::SoftwareDeploymentGroup + properties: + servers: {get_param: controller_servers} + config: {get_resource: ControllerPacemakerUpgradeConfig_Step1} + input_values: {get_param: input_values} + + BlockStorageUpgradeConfig: + type: OS::Heat::SoftwareConfig + depends_on: ControllerPacemakerUpgradeDeployment_Step1 + properties: + group: script + config: {get_file: major_upgrade_block_storage.sh} + + BlockStorageUpgradeDeployment: + type: OS::Heat::SoftwareDeploymentGroup + properties: + servers: {get_param: blockstorage_servers} + config: {get_resource: BlockStorageUpgradeConfig} + input_values: {get_param: input_values} + + ControllerPacemakerUpgradeConfig_Step2: + type: OS::Heat::SoftwareConfig + properties: + group: script + config: + list_join: + - '' + - - get_file: pacemaker_common_functions.sh + - get_file: major_upgrade_pacemaker_migrations.sh + - get_file: major_upgrade_controller_pacemaker_2.sh + + ControllerPacemakerUpgradeDeployment_Step2: + type: OS::Heat::SoftwareDeploymentGroup + depends_on: BlockStorageUpgradeDeployment + properties: + servers: {get_param: controller_servers} + config: {get_resource: ControllerPacemakerUpgradeConfig_Step2} + input_values: {get_param: input_values} + diff --git a/extraconfig/tasks/major_upgrade_pacemaker_init.yaml b/extraconfig/tasks/major_upgrade_pacemaker_init.yaml new file mode 100644 index 00000000..623549a0 --- /dev/null +++ b/extraconfig/tasks/major_upgrade_pacemaker_init.yaml @@ -0,0 +1,131 @@ +heat_template_version: 2014-10-16 +description: 'Upgrade for Pacemaker deployments' + +parameters: + + controller_servers: + type: json + compute_servers: + type: json + blockstorage_servers: + type: json + objectstorage_servers: + type: json + cephstorage_servers: + type: json + input_values: + type: json + description: input values for the software deployments + + UpgradeInitCommand: + type: string + description: | + Command or script snippet to run on all overcloud nodes to + initialize the upgrade process. E.g. a repository switch. + default: '' + UpgradeLevelNovaCompute: + type: string + description: Nova Compute upgrade level + default: '' + +resources: + + # For the UpgradeInit also rename /etc/resolv.conf.save for +bug/1567004 + + UpgradeInitConfig: + type: OS::Heat::SoftwareConfig + properties: + group: script + config: + list_join: + - '' + - - "#!/bin/bash\n\n" + - "if [[ -f /etc/resolv.conf.save ]] ; then rm /etc/resolv.conf.save; fi\n\n" + - get_param: UpgradeInitCommand + + UpgradeInitControllerDeployment: + type: OS::Heat::SoftwareDeploymentGroup + properties: + servers: {get_param: controller_servers} + config: {get_resource: UpgradeInitConfig} + input_values: {get_param: input_values} + + UpgradeInitComputeDeployment: + type: OS::Heat::SoftwareDeploymentGroup + properties: + servers: {get_param: compute_servers} + config: {get_resource: UpgradeInitConfig} + input_values: {get_param: input_values} + + UpgradeInitBlockStorageDeployment: + type: OS::Heat::SoftwareDeploymentGroup + properties: + servers: {get_param: blockstorage_servers} + config: {get_resource: UpgradeInitConfig} + input_values: {get_param: input_values} + + UpgradeInitObjectStorageDeployment: + type: OS::Heat::SoftwareDeploymentGroup + properties: + servers: {get_param: objectstorage_servers} + config: {get_resource: UpgradeInitConfig} + input_values: {get_param: input_values} + + UpgradeInitCephStorageDeployment: + type: OS::Heat::SoftwareDeploymentGroup + properties: + servers: {get_param: cephstorage_servers} + config: {get_resource: UpgradeInitConfig} + input_values: {get_param: input_values} + + # TODO(jistr): for Mitaka->Newton upgrades and further we can use + # map_merge with input_values instead of feeding params into scripts + # via str_replace on bash snippets + + ComputeDeliverUpgradeScriptConfig: + type: OS::Heat::SoftwareConfig + properties: + group: script + config: + list_join: + - '' + - - str_replace: + template: | + #!/bin/bash + upgrade_level_nova_compute='UPGRADE_LEVEL_NOVA_COMPUTE' + params: + UPGRADE_LEVEL_NOVA_COMPUTE: {get_param: UpgradeLevelNovaCompute} + - get_file: major_upgrade_compute.sh + + ComputeDeliverUpgradeScriptDeployment: + type: OS::Heat::SoftwareDeploymentGroup + properties: + servers: {get_param: compute_servers} + config: {get_resource: ComputeDeliverUpgradeScriptConfig} + input_values: {get_param: input_values} + + ObjectStorageDeliverUpgradeScriptConfig: + type: OS::Heat::SoftwareConfig + properties: + group: script + config: {get_file: major_upgrade_object_storage.sh} + + ObjectStorageDeliverUpgradeScriptDeployment: + type: OS::Heat::SoftwareDeploymentGroup + properties: + servers: {get_param: objectstorage_servers} + config: {get_resource: ObjectStorageDeliverUpgradeScriptConfig} + input_values: {get_param: input_values} + + CephStorageDeliverUpgradeScriptConfig: + type: OS::Heat::SoftwareConfig + properties: + group: script + config: {get_file: major_upgrade_ceph_storage.sh} + + CephStorageDeliverUpgradeScriptDeployment: + type: OS::Heat::SoftwareDeploymentGroup + properties: + servers: {get_param: cephstorage_servers} + config: {get_resource: CephStorageDeliverUpgradeScriptConfig} + input_values: {get_param: input_values} diff --git a/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh b/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh new file mode 100644 index 00000000..b63198db --- /dev/null +++ b/extraconfig/tasks/major_upgrade_pacemaker_migrations.sh @@ -0,0 +1,75 @@ +#!/bin/bash + +# Special pieces of upgrade migration logic go into this +# file. E.g. Pacemaker cluster transitions for existing deployments, +# matching changes to overcloud_controller_pacemaker.pp (Puppet +# handles deployment, this file handles migrations). +# +# This file shouldn't execute any action on its own, all logic should +# be wrapped into bash functions. Upgrade scripts will source this +# file and call the functions defined in this file where appropriate. +# +# The migration functions should be idempotent. If the migration has +# been already applied, it should be possible to call the function +# again without damaging the deployment or failing the upgrade. + +function add_missing_openstack_core_constraints { + # The CIBs are saved under /root as they might contain sensitive data + CIB="/root/migration.cib" + CIB_BACKUP="/root/backup.cib" + CIB_PUSH_NEEDED=n + + rm -f "$CIB" "$CIB_BACKUP" || /bin/true + pcs cluster cib "$CIB" + cp "$CIB" "$CIB_BACKUP" + + if ! pcs -f "$CIB" constraint --full | grep 'start openstack-sahara-api-clone then start openstack-sahara-engine-clone'; then + pcs -f "$CIB" constraint order start openstack-sahara-api-clone then start openstack-sahara-engine-clone + CIB_PUSH_NEEDED=y + fi + + if ! pcs -f "$CIB" constraint --full | grep 'start openstack-core-clone then start openstack-ceilometer-notification-clone'; then + pcs -f "$CIB" constraint order start openstack-core-clone then start openstack-ceilometer-notification-clone + CIB_PUSH_NEEDED=y + fi + + if ! pcs -f "$CIB" constraint --full | grep 'start openstack-aodh-evaluator-clone then start openstack-aodh-listener-clone'; then + pcs -f "$CIB" constraint order start openstack-aodh-evaluator-clone then start openstack-aodh-listener-clone + CIB_PUSH_NEEDED=y + fi + + if pcs -f "$CIB" constraint --full | grep 'start openstack-core-clone then start openstack-heat-api-clone'; then + CID=$(pcs -f "$CIB" constraint --full | grep 'start openstack-core-clone then start openstack-heat-api-clone' | sed -e 's/.*id\://g' -e 's/)//g') + pcs -f "$CIB" constraint remove $CID + CIB_PUSH_NEEDED=y + fi + + if [ "$CIB_PUSH_NEEDED" = 'y' ]; then + pcs cluster cib-push "$CIB" + fi +} + +function remove_ceilometer_alarm { + if pcs status | grep openstack-ceilometer-alarm; then + # Disable pacemaker resources for ceilometer-alarms + pcs resource disable openstack-ceilometer-alarm-evaluator + check_resource openstack-ceilometer-alarm-evaluator stopped 600 + pcs resource delete openstack-ceilometer-alarm-evaluator + pcs resource disable openstack-ceilometer-alarm-notifier + check_resource openstack-ceilometer-alarm-notifier stopped 600 + pcs resource delete openstack-ceilometer-alarm-notifier + + # remove constraints + pcs constraint remove ceilometer-delay-then-ceilometer-alarm-evaluator-constraint + pcs constraint remove ceilometer-alarm-evaluator-with-ceilometer-delay-colocation + pcs constraint remove ceilometer-alarm-evaluator-then-ceilometer-alarm-notifier-constraint + pcs constraint remove ceilometer-alarm-notifier-with-ceilometer-alarm-evaluator-colocation + pcs constraint remove ceilometer-alarm-notifier-then-ceilometer-notification-constraint + pcs constraint remove ceilometer-notification-with-ceilometer-alarm-notifier-colocation + + fi + + # uninstall openstack-ceilometer-alarm package + yum -y remove openstack-ceilometer-alarm + +} diff --git a/extraconfig/tasks/noop.yaml b/extraconfig/tasks/noop.yaml deleted file mode 100644 index 0cff7469..00000000 --- a/extraconfig/tasks/noop.yaml +++ /dev/null @@ -1,10 +0,0 @@ -heat_template_version: 2014-10-16 -description: 'No-op task' - -parameters: - servers: - type: json - input_values: - type: json - default: {} - description: input values for the software deployments diff --git a/extraconfig/tasks/pacemaker_common_functions.sh b/extraconfig/tasks/pacemaker_common_functions.sh new file mode 100755 index 00000000..7d794c97 --- /dev/null +++ b/extraconfig/tasks/pacemaker_common_functions.sh @@ -0,0 +1,62 @@ +#!/bin/bash + +set -eu + +function check_resource { + + if [ "$#" -ne 3 ]; then + echo_error "ERROR: check_resource function expects 3 parameters, $# given" + exit 1 + fi + + service=$1 + state=$2 + timeout=$3 + + if [ "$state" = "stopped" ]; then + match_for_incomplete='Started' + else # started + match_for_incomplete='Stopped' + fi + + nodes_local=$(pcs status | grep ^Online | sed 's/.*\[ \(.*\) \]/\1/g' | sed 's/ /\|/g') + if timeout -k 10 $timeout crm_resource --wait; then + node_states=$(pcs status --full | grep "$service" | grep -v Clone | { egrep "$nodes_local" || true; } ) + if echo "$node_states" | grep -q "$match_for_incomplete"; then + echo_error "ERROR: cluster finished transition but $service was not in $state state, exiting." + exit 1 + else + echo "$service has $state" + fi + else + echo_error "ERROR: cluster remained unstable for more than $timeout seconds, exiting." + exit 1 + fi + +} + +function echo_error { + echo "$@" | tee /dev/fd2 +} + +function systemctl_swift { + services=( openstack-swift-account-auditor openstack-swift-account-reaper openstack-swift-account-replicator openstack-swift-account \ + openstack-swift-container-auditor openstack-swift-container-replicator openstack-swift-container-updater openstack-swift-container \ + openstack-swift-object-auditor openstack-swift-object-replicator openstack-swift-object-updater openstack-swift-object openstack-swift-proxy ) + action=$1 + case $action in + stop) + services=$(systemctl | grep swift | grep running | awk '{print $1}') + ;; + start) + enable_swift_storage=$(hiera -c /etc/puppet/hiera.yaml 'enable_swift_storage') + if [[ $enable_swift_storage != "true" ]]; then + services=( openstack-swift-proxy ) + fi + ;; + *) services=() ;; # for safetly, should never happen + esac + for S in ${services[@]}; do + systemctl $action $S + done +} diff --git a/extraconfig/tasks/pacemaker_maintenance_mode.sh b/extraconfig/tasks/pacemaker_maintenance_mode.sh new file mode 100755 index 00000000..ddc84ad2 --- /dev/null +++ b/extraconfig/tasks/pacemaker_maintenance_mode.sh @@ -0,0 +1,19 @@ +#!/bin/bash +set -x + +# On initial deployment, the pacemaker service is disabled and is-active exits +# 3 in that case, so allow this to fail gracefully. +pacemaker_status=$(systemctl is-active pacemaker || :) + +if [ "$pacemaker_status" = "active" ]; then + pcs property set maintenance-mode=true +fi + +# We need to reload haproxy in case the certificate changed because +# puppet doesn't know the contents of the cert file. We shouldn't +# reload it if it wasn't already active (such as if using external +# loadbalancer or on initial deployment). +haproxy_status=$(systemctl is-active haproxy || :) +if [ "$haproxy_status" = "active" ]; then + systemctl reload haproxy +fi diff --git a/extraconfig/tasks/pacemaker_resource_restart.sh b/extraconfig/tasks/pacemaker_resource_restart.sh index 12201097..b2bdc55a 100755 --- a/extraconfig/tasks/pacemaker_resource_restart.sh +++ b/extraconfig/tasks/pacemaker_resource_restart.sh @@ -3,38 +3,6 @@ set -eux pacemaker_status=$(systemctl is-active pacemaker) -check_interval=3 - -function check_resource { - - service=$1 - state=$2 - timeout=$3 - tstart=$(date +%s) - tend=$(( $tstart + $timeout )) - - if [ "$state" = "stopped" ]; then - match_for_incomplete='Started' - else # started - match_for_incomplete='Stopped' - fi - - while (( $(date +%s) < $tend )); do - node_states=$(pcs status --full | grep "$service" | grep -v Clone) - if echo "$node_states" | grep -q "$match_for_incomplete"; then - echo "$service not yet $state, sleeping $check_interval seconds." - sleep $check_interval - else - echo "$service has $state" - timeout -k 10 $timeout crm_resource --wait - return - fi - done - - echo "$service never $state after $timeout seconds" | tee /dev/fd/2 - exit 1 - -} # Run if pacemaker is running, we're the bootstrap node, # and we're updating the deployment (not creating). diff --git a/extraconfig/tasks/post_puppet_pacemaker.yaml b/extraconfig/tasks/post_puppet_pacemaker.yaml index 7de41d94..fbed9ce5 100644 --- a/extraconfig/tasks/post_puppet_pacemaker.yaml +++ b/extraconfig/tasks/post_puppet_pacemaker.yaml @@ -33,7 +33,11 @@ resources: type: OS::Heat::SoftwareConfig properties: group: script - config: {get_file: pacemaker_resource_restart.sh} + config: + list_join: + - '' + - - get_file: pacemaker_common_functions.sh + - get_file: pacemaker_resource_restart.sh ControllerPostPuppetRestartDeployment: type: OS::Heat::SoftwareDeployments diff --git a/extraconfig/tasks/pre_puppet_pacemaker.yaml b/extraconfig/tasks/pre_puppet_pacemaker.yaml index 2cfe92a7..82546588 100644 --- a/extraconfig/tasks/pre_puppet_pacemaker.yaml +++ b/extraconfig/tasks/pre_puppet_pacemaker.yaml @@ -14,13 +14,8 @@ resources: type: OS::Heat::SoftwareConfig properties: group: script - config: | - #!/bin/bash - pacemaker_status=$(systemctl is-active pacemaker) - - if [ "$pacemaker_status" = "active" ]; then - pcs property set maintenance-mode=true - fi + config: + get_file: pacemaker_maintenance_mode.sh ControllerPrePuppetMaintenanceModeDeployment: type: OS::Heat::SoftwareDeployments diff --git a/extraconfig/tasks/yum_update.sh b/extraconfig/tasks/yum_update.sh index 364d7343..f3c3b4bf 100755 --- a/extraconfig/tasks/yum_update.sh +++ b/extraconfig/tasks/yum_update.sh @@ -53,12 +53,13 @@ neutron-l3-agent neutron-metadata-agent neutron-openvswitch-agent neutron-server -openstack-ceilometer-alarm-evaluator -openstack-ceilometer-alarm-notifier openstack-ceilometer-api openstack-ceilometer-central openstack-ceilometer-collector openstack-ceilometer-notification +openstack-aodh-evaluator +openstack-aodh-notifier +openstack-aodh-listener openstack-cinder-api openstack-cinder-scheduler openstack-cinder-volume @@ -107,6 +108,9 @@ openstack-nova-scheduler" pcs -f $pacemaker_dumpfile constraint order promote redis-master then start openstack-ceilometer-central-clone require-all=false fi + if ! pcs constraint order show | grep "promote redis-master then start openstack-aodh-evaluator-clone"; then + pcs -f $pacemaker_dumpfile constraint order promote redis-master then start openstack-aodh-evaluator-clone require-all=false + fi # ensure neutron constraints https://review.openstack.org/#/c/229466 # remove ovs-cleanup after server and add openvswitch-agent instead if pcs constraint order show | grep "start neutron-server-clone then start neutron-ovs-cleanup-clone"; then @@ -128,6 +132,9 @@ openstack-nova-scheduler" # mongod start timeout is higher, setting only stop timeout pcs -f $pacemaker_dumpfile resource update mongod op start timeout=370s op stop timeout=200s + echo "Making sure rabbitmq has the notify=true meta parameter" + pcs -f $pacemaker_dumpfile resource update rabbitmq meta notify=true + echo "Applying new Pacemaker config" if ! pcs cluster cib-push $pacemaker_dumpfile; then echo "ERROR failed to apply new pacemaker config" @@ -151,14 +158,14 @@ openstack-nova-scheduler" kill $(ps ax | grep -e "radvd.*\.pid\.radvd" | awk '{print $1}') 2>/dev/null || : else echo "Upgrading openstack-puppet-modules" - yum -y update openstack-puppet-modules + yum -q -y update openstack-puppet-modules echo "Upgrading other packages is handled by config management tooling" echo -n "true" > $heat_outputs_path.update_managed_packages exit 0 fi command=${command:-update} -full_command="yum -y $command $command_arguments" +full_command="yum -q -y $command $command_arguments" echo "Running: $full_command" result=$($full_command) diff --git a/extraconfig/tasks/yum_update_noop.yaml b/extraconfig/tasks/yum_update_noop.yaml new file mode 100644 index 00000000..b759d9c5 --- /dev/null +++ b/extraconfig/tasks/yum_update_noop.yaml @@ -0,0 +1,29 @@ +heat_template_version: 2014-10-16 +description: 'No-op yum update task' + +resources: + + config: + type: OS::Heat::SoftwareConfig + properties: + group: script + config: | + #!/bin/bash + echo -n "false" > $heat_outputs_path.update_managed_packages + inputs: + - name: update_identifier + description: yum will only run for previously unused values of update_identifier + default: '' + - name: command + description: yum sub-command to run, defaults to "update" + default: update + - name: command_arguments + description: yum command arguments, defaults to "" + default: '' + outputs: + - name: update_managed_packages + description: boolean value indicating whether to upgrade managed packages + +outputs: + OS::stack_id: + value: {get_resource: config} |