LCOV - code coverage report
Current view: top level - pengine - unpack.c (source / functions) Hit Total Coverage
Test: Pacemaker code coverage Lines: 12 2073 0.6 %
Date: 2024-05-07 11:09:47 Functions: 1 82 1.2 %

          Line data    Source code
       1             : /*
       2             :  * Copyright 2004-2024 the Pacemaker project contributors
       3             :  *
       4             :  * The version control history for this file may have further details.
       5             :  *
       6             :  * This source code is licensed under the GNU Lesser General Public License
       7             :  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
       8             :  */
       9             : 
      10             : #include <crm_internal.h>
      11             : 
      12             : #include <stdio.h>
      13             : #include <string.h>
      14             : #include <glib.h>
      15             : #include <time.h>
      16             : 
      17             : #include <crm/crm.h>
      18             : #include <crm/services.h>
      19             : #include <crm/common/xml.h>
      20             : #include <crm/common/xml_internal.h>
      21             : 
      22             : #include <crm/common/util.h>
      23             : #include <crm/pengine/rules.h>
      24             : #include <crm/pengine/internal.h>
      25             : #include <pe_status_private.h>
      26             : 
      27             : CRM_TRACE_INIT_DATA(pe_status);
      28             : 
      29             : // A (parsed) resource action history entry
      30             : struct action_history {
      31             :     pcmk_resource_t *rsc;       // Resource that history is for
      32             :     pcmk_node_t *node;        // Node that history is for
      33             :     xmlNode *xml;             // History entry XML
      34             : 
      35             :     // Parsed from entry XML
      36             :     const char *id;           // XML ID of history entry
      37             :     const char *key;          // Operation key of action
      38             :     const char *task;         // Action name
      39             :     const char *exit_reason;  // Exit reason given for result
      40             :     guint interval_ms;        // Action interval
      41             :     int call_id;              // Call ID of action
      42             :     int expected_exit_status; // Expected exit status of action
      43             :     int exit_status;          // Actual exit status of action
      44             :     int execution_status;     // Execution status of action
      45             : };
      46             : 
      47             : /* This uses pcmk__set_flags_as()/pcmk__clear_flags_as() directly rather than
      48             :  * use pcmk__set_scheduler_flags()/pcmk__clear_scheduler_flags() so that the
      49             :  * flag is stringified more readably in log messages.
      50             :  */
      51             : #define set_config_flag(scheduler, option, flag) do {                         \
      52             :         GHashTable *config_hash = (scheduler)->config_hash;                   \
      53             :         const char *scf_value = pcmk__cluster_option(config_hash, (option));  \
      54             :                                                                               \
      55             :         if (scf_value != NULL) {                                              \
      56             :             if (crm_is_true(scf_value)) {                                     \
      57             :                 (scheduler)->flags = pcmk__set_flags_as(__func__, __LINE__,   \
      58             :                                     LOG_TRACE, "Scheduler",                   \
      59             :                                     crm_system_name, (scheduler)->flags,      \
      60             :                                     (flag), #flag);                           \
      61             :             } else {                                                          \
      62             :                 (scheduler)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
      63             :                                     LOG_TRACE, "Scheduler",                   \
      64             :                                     crm_system_name, (scheduler)->flags,      \
      65             :                                     (flag), #flag);                           \
      66             :             }                                                                 \
      67             :         }                                                                     \
      68             :     } while(0)
      69             : 
      70             : static void unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node,
      71             :                           xmlNode *xml_op, xmlNode **last_failure,
      72             :                           enum action_fail_response *failed);
      73             : static void determine_remote_online_status(pcmk_scheduler_t *scheduler,
      74             :                                            pcmk_node_t *this_node);
      75             : static void add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node,
      76             :                            bool overwrite, pcmk_scheduler_t *scheduler);
      77             : static void determine_online_status(const xmlNode *node_state,
      78             :                                     pcmk_node_t *this_node,
      79             :                                     pcmk_scheduler_t *scheduler);
      80             : 
      81             : static void unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
      82             :                             pcmk_scheduler_t *scheduler);
      83             : 
      84             : 
      85             : static gboolean
      86           0 : is_dangling_guest_node(pcmk_node_t *node)
      87             : {
      88             :     /* we are looking for a remote-node that was supposed to be mapped to a
      89             :      * container resource, but all traces of that container have disappeared 
      90             :      * from both the config and the status section. */
      91           0 :     if (pcmk__is_pacemaker_remote_node(node)
      92           0 :         && (node->details->remote_rsc != NULL)
      93           0 :         && (node->details->remote_rsc->container == NULL)
      94           0 :         && pcmk_is_set(node->details->remote_rsc->flags,
      95             :                        pcmk_rsc_removed_filler)) {
      96           0 :         return TRUE;
      97             :     }
      98             : 
      99           0 :     return FALSE;
     100             : }
     101             : 
     102             : /*!
     103             :  * \brief Schedule a fence action for a node
     104             :  *
     105             :  * \param[in,out] scheduler       Scheduler data
     106             :  * \param[in,out] node            Node to fence
     107             :  * \param[in]     reason          Text description of why fencing is needed
     108             :  * \param[in]     priority_delay  Whether to consider
     109             :  *                                \c PCMK_OPT_PRIORITY_FENCING_DELAY
     110             :  */
     111             : void
     112           0 : pe_fence_node(pcmk_scheduler_t *scheduler, pcmk_node_t *node,
     113             :               const char *reason, bool priority_delay)
     114             : {
     115           0 :     CRM_CHECK(node, return);
     116             : 
     117             :     /* A guest node is fenced by marking its container as failed */
     118           0 :     if (pcmk__is_guest_or_bundle_node(node)) {
     119           0 :         pcmk_resource_t *rsc = node->details->remote_rsc->container;
     120             : 
     121           0 :         if (!pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
     122           0 :             if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
     123           0 :                 crm_notice("Not fencing guest node %s "
     124             :                            "(otherwise would because %s): "
     125             :                            "its guest resource %s is unmanaged",
     126             :                            pcmk__node_name(node), reason, rsc->id);
     127             :             } else {
     128           0 :                 pcmk__sched_warn("Guest node %s will be fenced "
     129             :                                  "(by recovering its guest resource %s): %s",
     130             :                                  pcmk__node_name(node), rsc->id, reason);
     131             : 
     132             :                 /* We don't mark the node as unclean because that would prevent the
     133             :                  * node from running resources. We want to allow it to run resources
     134             :                  * in this transition if the recovery succeeds.
     135             :                  */
     136           0 :                 node->details->remote_requires_reset = TRUE;
     137           0 :                 pcmk__set_rsc_flags(rsc,
     138             :                                     pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
     139             :             }
     140             :         }
     141             : 
     142           0 :     } else if (is_dangling_guest_node(node)) {
     143           0 :         crm_info("Cleaning up dangling connection for guest node %s: "
     144             :                  "fencing was already done because %s, "
     145             :                  "and guest resource no longer exists",
     146             :                  pcmk__node_name(node), reason);
     147           0 :         pcmk__set_rsc_flags(node->details->remote_rsc,
     148             :                             pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
     149             : 
     150           0 :     } else if (pcmk__is_remote_node(node)) {
     151           0 :         pcmk_resource_t *rsc = node->details->remote_rsc;
     152             : 
     153           0 :         if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
     154           0 :             crm_notice("Not fencing remote node %s "
     155             :                        "(otherwise would because %s): connection is unmanaged",
     156             :                        pcmk__node_name(node), reason);
     157           0 :         } else if(node->details->remote_requires_reset == FALSE) {
     158           0 :             node->details->remote_requires_reset = TRUE;
     159           0 :             pcmk__sched_warn("Remote node %s %s: %s",
     160             :                              pcmk__node_name(node),
     161             :                              pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
     162             :                              reason);
     163             :         }
     164           0 :         node->details->unclean = TRUE;
     165             :         // No need to apply PCMK_OPT_PRIORITY_FENCING_DELAY for remote nodes
     166           0 :         pe_fence_op(node, NULL, TRUE, reason, FALSE, scheduler);
     167             : 
     168           0 :     } else if (node->details->unclean) {
     169           0 :         crm_trace("Cluster node %s %s because %s",
     170             :                   pcmk__node_name(node),
     171             :                   pe_can_fence(scheduler, node)? "would also be fenced" : "also is unclean",
     172             :                   reason);
     173             : 
     174             :     } else {
     175           0 :         pcmk__sched_warn("Cluster node %s %s: %s",
     176             :                          pcmk__node_name(node),
     177             :                          pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
     178             :                          reason);
     179           0 :         node->details->unclean = TRUE;
     180           0 :         pe_fence_op(node, NULL, TRUE, reason, priority_delay, scheduler);
     181             :     }
     182             : }
     183             : 
     184             : // @TODO xpaths can't handle templates, rules, or id-refs
     185             : 
     186             : // nvpair with provides or requires set to unfencing
     187             : #define XPATH_UNFENCING_NVPAIR PCMK_XE_NVPAIR           \
     188             :     "[(@" PCMK_XA_NAME "='" PCMK_STONITH_PROVIDES "'"   \
     189             :     "or @" PCMK_XA_NAME "='" PCMK_META_REQUIRES "') "   \
     190             :     "and @" PCMK_XA_VALUE "='" PCMK_VALUE_UNFENCING "']"
     191             : 
     192             : // unfencing in rsc_defaults or any resource
     193             : #define XPATH_ENABLE_UNFENCING \
     194             :     "/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RESOURCES     \
     195             :     "//" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR             \
     196             :     "|/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RSC_DEFAULTS \
     197             :     "/" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR
     198             : 
     199             : static void
     200           0 : set_if_xpath(uint64_t flag, const char *xpath, pcmk_scheduler_t *scheduler)
     201             : {
     202           0 :     xmlXPathObjectPtr result = NULL;
     203             : 
     204           0 :     if (!pcmk_is_set(scheduler->flags, flag)) {
     205           0 :         result = xpath_search(scheduler->input, xpath);
     206           0 :         if (result && (numXpathResults(result) > 0)) {
     207           0 :             pcmk__set_scheduler_flags(scheduler, flag);
     208             :         }
     209           0 :         freeXpathObject(result);
     210             :     }
     211           0 : }
     212             : 
     213             : gboolean
     214           0 : unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler)
     215             : {
     216           0 :     const char *value = NULL;
     217           0 :     guint interval_ms = 0U;
     218           0 :     GHashTable *config_hash = pcmk__strkey_table(free, free);
     219             : 
     220           0 :     pe_rule_eval_data_t rule_data = {
     221             :         .node_hash = NULL,
     222           0 :         .now = scheduler->now,
     223             :         .match_data = NULL,
     224             :         .rsc_data = NULL,
     225             :         .op_data = NULL
     226             :     };
     227             : 
     228           0 :     scheduler->config_hash = config_hash;
     229             : 
     230           0 :     pe__unpack_dataset_nvpairs(config, PCMK_XE_CLUSTER_PROPERTY_SET, &rule_data,
     231             :                                config_hash, PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS,
     232             :                                FALSE, scheduler);
     233             : 
     234           0 :     pcmk__validate_cluster_options(config_hash);
     235             : 
     236           0 :     set_config_flag(scheduler, PCMK_OPT_ENABLE_STARTUP_PROBES,
     237             :                     pcmk_sched_probe_resources);
     238           0 :     if (!pcmk_is_set(scheduler->flags, pcmk_sched_probe_resources)) {
     239           0 :         crm_info("Startup probes: disabled (dangerous)");
     240             :     }
     241             : 
     242           0 :     value = pcmk__cluster_option(config_hash, PCMK_OPT_HAVE_WATCHDOG);
     243           0 :     if (value && crm_is_true(value)) {
     244           0 :         crm_info("Watchdog-based self-fencing will be performed via SBD if "
     245             :                  "fencing is required and " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT
     246             :                  " is nonzero");
     247           0 :         pcmk__set_scheduler_flags(scheduler, pcmk_sched_have_fencing);
     248             :     }
     249             : 
     250             :     /* Set certain flags via xpath here, so they can be used before the relevant
     251             :      * configuration sections are unpacked.
     252             :      */
     253           0 :     set_if_xpath(pcmk_sched_enable_unfencing, XPATH_ENABLE_UNFENCING,
     254             :                  scheduler);
     255             : 
     256           0 :     value = pcmk__cluster_option(config_hash, PCMK_OPT_STONITH_TIMEOUT);
     257           0 :     pcmk_parse_interval_spec(value, &interval_ms);
     258             : 
     259           0 :     if (interval_ms >= INT_MAX) {
     260           0 :         scheduler->stonith_timeout = INT_MAX;
     261             :     } else {
     262           0 :         scheduler->stonith_timeout = (int) interval_ms;
     263             :     }
     264           0 :     crm_debug("STONITH timeout: %d", scheduler->stonith_timeout);
     265             : 
     266           0 :     set_config_flag(scheduler, PCMK_OPT_STONITH_ENABLED,
     267             :                     pcmk_sched_fencing_enabled);
     268           0 :     if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
     269           0 :         crm_debug("STONITH of failed nodes is enabled");
     270             :     } else {
     271           0 :         crm_debug("STONITH of failed nodes is disabled");
     272             :     }
     273             : 
     274           0 :     scheduler->stonith_action = pcmk__cluster_option(config_hash,
     275             :                                                      PCMK_OPT_STONITH_ACTION);
     276           0 :     if (!strcmp(scheduler->stonith_action, PCMK__ACTION_POWEROFF)) {
     277           0 :         pcmk__warn_once(pcmk__wo_poweroff,
     278             :                         "Support for " PCMK_OPT_STONITH_ACTION " of "
     279             :                         "'" PCMK__ACTION_POWEROFF "' is deprecated and will be "
     280             :                         "removed in a future release "
     281             :                         "(use '" PCMK_ACTION_OFF "' instead)");
     282           0 :         scheduler->stonith_action = PCMK_ACTION_OFF;
     283             :     }
     284           0 :     crm_trace("STONITH will %s nodes", scheduler->stonith_action);
     285             : 
     286           0 :     set_config_flag(scheduler, PCMK_OPT_CONCURRENT_FENCING,
     287             :                     pcmk_sched_concurrent_fencing);
     288           0 :     if (pcmk_is_set(scheduler->flags, pcmk_sched_concurrent_fencing)) {
     289           0 :         crm_debug("Concurrent fencing is enabled");
     290             :     } else {
     291           0 :         crm_debug("Concurrent fencing is disabled");
     292             :     }
     293             : 
     294           0 :     value = pcmk__cluster_option(config_hash, PCMK_OPT_PRIORITY_FENCING_DELAY);
     295           0 :     if (value) {
     296           0 :         pcmk_parse_interval_spec(value, &interval_ms);
     297           0 :         scheduler->priority_fencing_delay = (int) (interval_ms / 1000);
     298           0 :         crm_trace("Priority fencing delay is %ds",
     299             :                   scheduler->priority_fencing_delay);
     300             :     }
     301             : 
     302           0 :     set_config_flag(scheduler, PCMK_OPT_STOP_ALL_RESOURCES,
     303             :                     pcmk_sched_stop_all);
     304           0 :     crm_debug("Stop all active resources: %s",
     305             :               pcmk__flag_text(scheduler->flags, pcmk_sched_stop_all));
     306             : 
     307           0 :     set_config_flag(scheduler, PCMK_OPT_SYMMETRIC_CLUSTER,
     308             :                     pcmk_sched_symmetric_cluster);
     309           0 :     if (pcmk_is_set(scheduler->flags, pcmk_sched_symmetric_cluster)) {
     310           0 :         crm_debug("Cluster is symmetric" " - resources can run anywhere by default");
     311             :     }
     312             : 
     313           0 :     value = pcmk__cluster_option(config_hash, PCMK_OPT_NO_QUORUM_POLICY);
     314             : 
     315           0 :     if (pcmk__str_eq(value, PCMK_VALUE_IGNORE, pcmk__str_casei)) {
     316           0 :         scheduler->no_quorum_policy = pcmk_no_quorum_ignore;
     317             : 
     318           0 :     } else if (pcmk__str_eq(value, PCMK_VALUE_FREEZE, pcmk__str_casei)) {
     319           0 :         scheduler->no_quorum_policy = pcmk_no_quorum_freeze;
     320             : 
     321           0 :     } else if (pcmk__str_eq(value, PCMK_VALUE_DEMOTE, pcmk__str_casei)) {
     322           0 :         scheduler->no_quorum_policy = pcmk_no_quorum_demote;
     323             : 
     324           0 :     } else if (pcmk__str_eq(value, PCMK_VALUE_FENCE_LEGACY, pcmk__str_casei)) {
     325           0 :         if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
     326           0 :             int do_panic = 0;
     327             : 
     328           0 :             crm_element_value_int(scheduler->input, PCMK_XA_NO_QUORUM_PANIC,
     329             :                                   &do_panic);
     330           0 :             if (do_panic || pcmk_is_set(scheduler->flags, pcmk_sched_quorate)) {
     331           0 :                 scheduler->no_quorum_policy = pcmk_no_quorum_fence;
     332             :             } else {
     333           0 :                 crm_notice("Resetting " PCMK_OPT_NO_QUORUM_POLICY
     334             :                            " to 'stop': cluster has never had quorum");
     335           0 :                 scheduler->no_quorum_policy = pcmk_no_quorum_stop;
     336             :             }
     337             :         } else {
     338           0 :             pcmk__config_err("Resetting " PCMK_OPT_NO_QUORUM_POLICY
     339             :                              " to 'stop' because fencing is disabled");
     340           0 :             scheduler->no_quorum_policy = pcmk_no_quorum_stop;
     341             :         }
     342             : 
     343             :     } else {
     344           0 :         scheduler->no_quorum_policy = pcmk_no_quorum_stop;
     345             :     }
     346             : 
     347           0 :     switch (scheduler->no_quorum_policy) {
     348           0 :         case pcmk_no_quorum_freeze:
     349           0 :             crm_debug("On loss of quorum: Freeze resources");
     350           0 :             break;
     351           0 :         case pcmk_no_quorum_stop:
     352           0 :             crm_debug("On loss of quorum: Stop ALL resources");
     353           0 :             break;
     354           0 :         case pcmk_no_quorum_demote:
     355           0 :             crm_debug("On loss of quorum: "
     356             :                       "Demote promotable resources and stop other resources");
     357           0 :             break;
     358           0 :         case pcmk_no_quorum_fence:
     359           0 :             crm_notice("On loss of quorum: Fence all remaining nodes");
     360           0 :             break;
     361           0 :         case pcmk_no_quorum_ignore:
     362           0 :             crm_notice("On loss of quorum: Ignore");
     363           0 :             break;
     364             :     }
     365             : 
     366           0 :     set_config_flag(scheduler, PCMK_OPT_STOP_ORPHAN_RESOURCES,
     367             :                     pcmk_sched_stop_removed_resources);
     368           0 :     if (pcmk_is_set(scheduler->flags, pcmk_sched_stop_removed_resources)) {
     369           0 :         crm_trace("Orphan resources are stopped");
     370             :     } else {
     371           0 :         crm_trace("Orphan resources are ignored");
     372             :     }
     373             : 
     374           0 :     set_config_flag(scheduler, PCMK_OPT_STOP_ORPHAN_ACTIONS,
     375             :                     pcmk_sched_cancel_removed_actions);
     376           0 :     if (pcmk_is_set(scheduler->flags, pcmk_sched_cancel_removed_actions)) {
     377           0 :         crm_trace("Orphan resource actions are stopped");
     378             :     } else {
     379           0 :         crm_trace("Orphan resource actions are ignored");
     380             :     }
     381             : 
     382           0 :     value = pcmk__cluster_option(config_hash, PCMK__OPT_REMOVE_AFTER_STOP);
     383           0 :     if (value != NULL) {
     384           0 :         if (crm_is_true(value)) {
     385           0 :             pcmk__set_scheduler_flags(scheduler, pcmk_sched_remove_after_stop);
     386           0 :             pcmk__warn_once(pcmk__wo_remove_after,
     387             :                             "Support for the " PCMK__OPT_REMOVE_AFTER_STOP
     388             :                             " cluster property is deprecated and will be "
     389             :                             "removed in a future release");
     390             :         } else {
     391           0 :             pcmk__clear_scheduler_flags(scheduler,
     392             :                                         pcmk_sched_remove_after_stop);
     393             :         }
     394             :     }
     395             : 
     396           0 :     set_config_flag(scheduler, PCMK_OPT_MAINTENANCE_MODE,
     397             :                     pcmk_sched_in_maintenance);
     398           0 :     crm_trace("Maintenance mode: %s",
     399             :               pcmk__flag_text(scheduler->flags, pcmk_sched_in_maintenance));
     400             : 
     401           0 :     set_config_flag(scheduler, PCMK_OPT_START_FAILURE_IS_FATAL,
     402             :                     pcmk_sched_start_failure_fatal);
     403           0 :     if (pcmk_is_set(scheduler->flags, pcmk_sched_start_failure_fatal)) {
     404           0 :         crm_trace("Start failures are always fatal");
     405             :     } else {
     406           0 :         crm_trace("Start failures are handled by failcount");
     407             :     }
     408             : 
     409           0 :     if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
     410           0 :         set_config_flag(scheduler, PCMK_OPT_STARTUP_FENCING,
     411             :                         pcmk_sched_startup_fencing);
     412             :     }
     413           0 :     if (pcmk_is_set(scheduler->flags, pcmk_sched_startup_fencing)) {
     414           0 :         crm_trace("Unseen nodes will be fenced");
     415             :     } else {
     416           0 :         pcmk__warn_once(pcmk__wo_blind,
     417             :                         "Blind faith: not fencing unseen nodes");
     418             :     }
     419             : 
     420           0 :     pe__unpack_node_health_scores(scheduler);
     421             : 
     422           0 :     scheduler->placement_strategy =
     423           0 :         pcmk__cluster_option(config_hash, PCMK_OPT_PLACEMENT_STRATEGY);
     424           0 :     crm_trace("Placement strategy: %s", scheduler->placement_strategy);
     425             : 
     426           0 :     set_config_flag(scheduler, PCMK_OPT_SHUTDOWN_LOCK,
     427             :                     pcmk_sched_shutdown_lock);
     428           0 :     if (pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)) {
     429           0 :         value = pcmk__cluster_option(config_hash, PCMK_OPT_SHUTDOWN_LOCK_LIMIT);
     430           0 :         pcmk_parse_interval_spec(value, &(scheduler->shutdown_lock));
     431           0 :         scheduler->shutdown_lock /= 1000;
     432           0 :         crm_trace("Resources will be locked to nodes that were cleanly "
     433             :                   "shut down (locks expire after %s)",
     434             :                   pcmk__readable_interval(scheduler->shutdown_lock));
     435             :     } else {
     436           0 :         crm_trace("Resources will not be locked to nodes that were cleanly "
     437             :                   "shut down");
     438             :     }
     439             : 
     440           0 :     value = pcmk__cluster_option(config_hash, PCMK_OPT_NODE_PENDING_TIMEOUT);
     441           0 :     pcmk_parse_interval_spec(value, &(scheduler->node_pending_timeout));
     442           0 :     scheduler->node_pending_timeout /= 1000;
     443           0 :     if (scheduler->node_pending_timeout == 0) {
     444           0 :         crm_trace("Do not fence pending nodes");
     445             :     } else {
     446           0 :         crm_trace("Fence pending nodes after %s",
     447             :                   pcmk__readable_interval(scheduler->node_pending_timeout
     448             :                                           * 1000));
     449             :     }
     450             : 
     451           0 :     return TRUE;
     452             : }
     453             : 
     454             : pcmk_node_t *
     455           0 : pe_create_node(const char *id, const char *uname, const char *type,
     456             :                const char *score, pcmk_scheduler_t *scheduler)
     457             : {
     458           0 :     pcmk_node_t *new_node = NULL;
     459             : 
     460           0 :     if (pcmk_find_node(scheduler, uname) != NULL) {
     461           0 :         pcmk__config_warn("More than one node entry has name '%s'", uname);
     462             :     }
     463             : 
     464           0 :     new_node = calloc(1, sizeof(pcmk_node_t));
     465           0 :     if (new_node == NULL) {
     466           0 :         pcmk__sched_err("Could not allocate memory for node %s", uname);
     467           0 :         return NULL;
     468             :     }
     469             : 
     470           0 :     new_node->weight = char2score(score);
     471           0 :     new_node->details = calloc(1, sizeof(struct pe_node_shared_s));
     472             : 
     473           0 :     if (new_node->details == NULL) {
     474           0 :         free(new_node);
     475           0 :         pcmk__sched_err("Could not allocate memory for node %s", uname);
     476           0 :         return NULL;
     477             :     }
     478             : 
     479           0 :     crm_trace("Creating node for entry %s/%s", uname, id);
     480           0 :     new_node->details->id = id;
     481           0 :     new_node->details->uname = uname;
     482           0 :     new_node->details->online = FALSE;
     483           0 :     new_node->details->shutdown = FALSE;
     484           0 :     new_node->details->rsc_discovery_enabled = TRUE;
     485           0 :     new_node->details->running_rsc = NULL;
     486           0 :     new_node->details->data_set = scheduler;
     487             : 
     488           0 :     if (pcmk__str_eq(type, PCMK_VALUE_MEMBER,
     489             :                      pcmk__str_null_matches|pcmk__str_casei)) {
     490           0 :         new_node->details->type = pcmk_node_variant_cluster;
     491             : 
     492           0 :     } else if (pcmk__str_eq(type, PCMK_VALUE_REMOTE, pcmk__str_casei)) {
     493           0 :         new_node->details->type = pcmk_node_variant_remote;
     494           0 :         pcmk__set_scheduler_flags(scheduler, pcmk_sched_have_remote_nodes);
     495             : 
     496             :     } else {
     497             :         /* @COMPAT 'ping' is the default for backward compatibility, but it
     498             :          * should be changed to 'member' at a compatibility break
     499             :          */
     500           0 :         if (!pcmk__str_eq(type, PCMK__VALUE_PING, pcmk__str_casei)) {
     501           0 :             pcmk__config_warn("Node %s has unrecognized type '%s', "
     502             :                               "assuming '" PCMK__VALUE_PING "'",
     503             :                               pcmk__s(uname, "without name"), type);
     504             :         }
     505           0 :         pcmk__warn_once(pcmk__wo_ping_node,
     506             :                         "Support for nodes of type '" PCMK__VALUE_PING "' "
     507             :                         "(such as %s) is deprecated and will be removed in a "
     508             :                         "future release",
     509             :                         pcmk__s(uname, "unnamed node"));
     510           0 :         new_node->details->type = node_ping;
     511             :     }
     512             : 
     513           0 :     new_node->details->attrs = pcmk__strkey_table(free, free);
     514             : 
     515           0 :     if (pcmk__is_pacemaker_remote_node(new_node)) {
     516           0 :         pcmk__insert_dup(new_node->details->attrs, CRM_ATTR_KIND, "remote");
     517             :     } else {
     518           0 :         pcmk__insert_dup(new_node->details->attrs, CRM_ATTR_KIND, "cluster");
     519             :     }
     520             : 
     521           0 :     new_node->details->utilization = pcmk__strkey_table(free, free);
     522           0 :     new_node->details->digest_cache = pcmk__strkey_table(free,
     523             :                                                           pe__free_digests);
     524             : 
     525           0 :     scheduler->nodes = g_list_insert_sorted(scheduler->nodes, new_node,
     526             :                                             pe__cmp_node_name);
     527           0 :     return new_node;
     528             : }
     529             : 
     530             : static const char *
     531           0 : expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pcmk_scheduler_t *data)
     532             : {
     533           0 :     xmlNode *attr_set = NULL;
     534           0 :     xmlNode *attr = NULL;
     535             : 
     536           0 :     const char *container_id = pcmk__xe_id(xml_obj);
     537           0 :     const char *remote_name = NULL;
     538           0 :     const char *remote_server = NULL;
     539           0 :     const char *remote_port = NULL;
     540           0 :     const char *connect_timeout = "60s";
     541           0 :     const char *remote_allow_migrate=NULL;
     542           0 :     const char *is_managed = NULL;
     543             : 
     544           0 :     for (attr_set = pcmk__xe_first_child(xml_obj, NULL, NULL, NULL);
     545           0 :          attr_set != NULL; attr_set = pcmk__xe_next(attr_set)) {
     546             : 
     547           0 :         if (!pcmk__xe_is(attr_set, PCMK_XE_META_ATTRIBUTES)) {
     548           0 :             continue;
     549             :         }
     550             : 
     551           0 :         for (attr = pcmk__xe_first_child(attr_set, NULL, NULL, NULL);
     552           0 :              attr != NULL; attr = pcmk__xe_next(attr)) {
     553             : 
     554           0 :             const char *value = crm_element_value(attr, PCMK_XA_VALUE);
     555           0 :             const char *name = crm_element_value(attr, PCMK_XA_NAME);
     556             : 
     557           0 :             if (name == NULL) { // Sanity
     558           0 :                 continue;
     559             :             }
     560             : 
     561           0 :             if (strcmp(name, PCMK_META_REMOTE_NODE) == 0) {
     562           0 :                 remote_name = value;
     563             : 
     564           0 :             } else if (strcmp(name, PCMK_META_REMOTE_ADDR) == 0) {
     565           0 :                 remote_server = value;
     566             : 
     567           0 :             } else if (strcmp(name, PCMK_META_REMOTE_PORT) == 0) {
     568           0 :                 remote_port = value;
     569             : 
     570           0 :             } else if (strcmp(name, PCMK_META_REMOTE_CONNECT_TIMEOUT) == 0) {
     571           0 :                 connect_timeout = value;
     572             : 
     573           0 :             } else if (strcmp(name, PCMK_META_REMOTE_ALLOW_MIGRATE) == 0) {
     574           0 :                 remote_allow_migrate = value;
     575             : 
     576           0 :             } else if (strcmp(name, PCMK_META_IS_MANAGED) == 0) {
     577           0 :                 is_managed = value;
     578             :             }
     579             :         }
     580             :     }
     581             : 
     582           0 :     if (remote_name == NULL) {
     583           0 :         return NULL;
     584             :     }
     585             : 
     586           0 :     if (pe_find_resource(data->resources, remote_name) != NULL) {
     587           0 :         return NULL;
     588             :     }
     589             : 
     590           0 :     pe_create_remote_xml(parent, remote_name, container_id,
     591             :                          remote_allow_migrate, is_managed,
     592             :                          connect_timeout, remote_server, remote_port);
     593           0 :     return remote_name;
     594             : }
     595             : 
     596             : static void
     597           0 : handle_startup_fencing(pcmk_scheduler_t *scheduler, pcmk_node_t *new_node)
     598             : {
     599           0 :     if ((new_node->details->type == pcmk_node_variant_remote)
     600           0 :         && (new_node->details->remote_rsc == NULL)) {
     601             :         /* Ignore fencing for remote nodes that don't have a connection resource
     602             :          * associated with them. This happens when remote node entries get left
     603             :          * in the nodes section after the connection resource is removed.
     604             :          */
     605           0 :         return;
     606             :     }
     607             : 
     608           0 :     if (pcmk_is_set(scheduler->flags, pcmk_sched_startup_fencing)) {
     609             :         // All nodes are unclean until we've seen their status entry
     610           0 :         new_node->details->unclean = TRUE;
     611             : 
     612             :     } else {
     613             :         // Blind faith ...
     614           0 :         new_node->details->unclean = FALSE;
     615             :     }
     616             : 
     617             :     /* We need to be able to determine if a node's status section
     618             :      * exists or not separate from whether the node is unclean. */
     619           0 :     new_node->details->unseen = TRUE;
     620             : }
     621             : 
     622             : gboolean
     623           0 : unpack_nodes(xmlNode *xml_nodes, pcmk_scheduler_t *scheduler)
     624             : {
     625           0 :     xmlNode *xml_obj = NULL;
     626           0 :     pcmk_node_t *new_node = NULL;
     627           0 :     const char *id = NULL;
     628           0 :     const char *uname = NULL;
     629           0 :     const char *type = NULL;
     630           0 :     const char *score = NULL;
     631             : 
     632           0 :     for (xml_obj = pcmk__xe_first_child(xml_nodes, NULL, NULL, NULL);
     633           0 :          xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
     634             : 
     635           0 :         if (pcmk__xe_is(xml_obj, PCMK_XE_NODE)) {
     636           0 :             new_node = NULL;
     637             : 
     638           0 :             id = crm_element_value(xml_obj, PCMK_XA_ID);
     639           0 :             uname = crm_element_value(xml_obj, PCMK_XA_UNAME);
     640           0 :             type = crm_element_value(xml_obj, PCMK_XA_TYPE);
     641           0 :             score = crm_element_value(xml_obj, PCMK_XA_SCORE);
     642           0 :             crm_trace("Processing node %s/%s", uname, id);
     643             : 
     644           0 :             if (id == NULL) {
     645           0 :                 pcmk__config_err("Ignoring <" PCMK_XE_NODE
     646             :                                  "> entry in configuration without id");
     647           0 :                 continue;
     648             :             }
     649           0 :             new_node = pe_create_node(id, uname, type, score, scheduler);
     650             : 
     651           0 :             if (new_node == NULL) {
     652           0 :                 return FALSE;
     653             :             }
     654             : 
     655           0 :             handle_startup_fencing(scheduler, new_node);
     656             : 
     657           0 :             add_node_attrs(xml_obj, new_node, FALSE, scheduler);
     658             : 
     659           0 :             crm_trace("Done with node %s",
     660             :                       crm_element_value(xml_obj, PCMK_XA_UNAME));
     661             :         }
     662             :     }
     663             : 
     664           0 :     if (scheduler->localhost
     665           0 :         && (pcmk_find_node(scheduler, scheduler->localhost) == NULL)) {
     666           0 :         crm_info("Creating a fake local node");
     667           0 :         pe_create_node(scheduler->localhost, scheduler->localhost, NULL, 0,
     668             :                        scheduler);
     669             :     }
     670             : 
     671           0 :     return TRUE;
     672             : }
     673             : 
     674             : static void
     675           0 : setup_container(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
     676             : {
     677           0 :     const char *container_id = NULL;
     678             : 
     679           0 :     if (rsc->children) {
     680           0 :         g_list_foreach(rsc->children, (GFunc) setup_container, scheduler);
     681           0 :         return;
     682             :     }
     683             : 
     684           0 :     container_id = g_hash_table_lookup(rsc->meta, PCMK__META_CONTAINER);
     685           0 :     if (container_id && !pcmk__str_eq(container_id, rsc->id, pcmk__str_casei)) {
     686           0 :         pcmk_resource_t *container = pe_find_resource(scheduler->resources,
     687             :                                                       container_id);
     688             : 
     689           0 :         if (container) {
     690           0 :             rsc->container = container;
     691           0 :             pcmk__set_rsc_flags(container, pcmk_rsc_has_filler);
     692           0 :             container->fillers = g_list_append(container->fillers, rsc);
     693           0 :             pcmk__rsc_trace(rsc, "Resource %s's container is %s",
     694             :                             rsc->id, container_id);
     695             :         } else {
     696           0 :             pcmk__config_err("Resource %s: Unknown resource container (%s)",
     697             :                              rsc->id, container_id);
     698             :         }
     699             :     }
     700             : }
     701             : 
     702             : gboolean
     703           0 : unpack_remote_nodes(xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
     704             : {
     705           0 :     xmlNode *xml_obj = NULL;
     706             : 
     707             :     /* Create remote nodes and guest nodes from the resource configuration
     708             :      * before unpacking resources.
     709             :      */
     710           0 :     for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL);
     711           0 :          xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
     712             : 
     713           0 :         const char *new_node_id = NULL;
     714             : 
     715             :         /* Check for remote nodes, which are defined by ocf:pacemaker:remote
     716             :          * primitives.
     717             :          */
     718           0 :         if (xml_contains_remote_node(xml_obj)) {
     719           0 :             new_node_id = pcmk__xe_id(xml_obj);
     720             :             /* The pcmk_find_node() check ensures we don't iterate over an
     721             :              * expanded node that has already been added to the node list
     722             :              */
     723           0 :             if (new_node_id
     724           0 :                 && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
     725           0 :                 crm_trace("Found remote node %s defined by resource %s",
     726             :                           new_node_id, pcmk__xe_id(xml_obj));
     727           0 :                 pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
     728             :                                NULL, scheduler);
     729             :             }
     730           0 :             continue;
     731             :         }
     732             : 
     733             :         /* Check for guest nodes, which are defined by special meta-attributes
     734             :          * of a primitive of any type (for example, VirtualDomain or Xen).
     735             :          */
     736           0 :         if (pcmk__xe_is(xml_obj, PCMK_XE_PRIMITIVE)) {
     737             :             /* This will add an ocf:pacemaker:remote primitive to the
     738             :              * configuration for the guest node's connection, to be unpacked
     739             :              * later.
     740             :              */
     741           0 :             new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources,
     742             :                                                  scheduler);
     743           0 :             if (new_node_id
     744           0 :                 && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
     745           0 :                 crm_trace("Found guest node %s in resource %s",
     746             :                           new_node_id, pcmk__xe_id(xml_obj));
     747           0 :                 pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
     748             :                                NULL, scheduler);
     749             :             }
     750           0 :             continue;
     751             :         }
     752             : 
     753             :         /* Check for guest nodes inside a group. Clones are currently not
     754             :          * supported as guest nodes.
     755             :          */
     756           0 :         if (pcmk__xe_is(xml_obj, PCMK_XE_GROUP)) {
     757           0 :             xmlNode *xml_obj2 = NULL;
     758           0 :             for (xml_obj2 = pcmk__xe_first_child(xml_obj, NULL, NULL, NULL);
     759           0 :                  xml_obj2 != NULL; xml_obj2 = pcmk__xe_next(xml_obj2)) {
     760             : 
     761           0 :                 new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources,
     762             :                                                      scheduler);
     763             : 
     764           0 :                 if (new_node_id
     765           0 :                     && (pcmk_find_node(scheduler, new_node_id) == NULL)) {
     766           0 :                     crm_trace("Found guest node %s in resource %s inside group %s",
     767             :                               new_node_id, pcmk__xe_id(xml_obj2),
     768             :                               pcmk__xe_id(xml_obj));
     769           0 :                     pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE,
     770             :                                    NULL, scheduler);
     771             :                 }
     772             :             }
     773             :         }
     774             :     }
     775           0 :     return TRUE;
     776             : }
     777             : 
     778             : /* Call this after all the nodes and resources have been
     779             :  * unpacked, but before the status section is read.
     780             :  *
     781             :  * A remote node's online status is reflected by the state
     782             :  * of the remote node's connection resource. We need to link
     783             :  * the remote node to this connection resource so we can have
     784             :  * easy access to the connection resource during the scheduler calculations.
     785             :  */
     786             : static void
     787           0 : link_rsc2remotenode(pcmk_scheduler_t *scheduler, pcmk_resource_t *new_rsc)
     788             : {
     789           0 :     pcmk_node_t *remote_node = NULL;
     790             : 
     791           0 :     if (new_rsc->is_remote_node == FALSE) {
     792           0 :         return;
     793             :     }
     794             : 
     795           0 :     if (pcmk_is_set(scheduler->flags, pcmk_sched_location_only)) {
     796             :         /* remote_nodes and remote_resources are not linked in quick location calculations */
     797           0 :         return;
     798             :     }
     799             : 
     800           0 :     remote_node = pcmk_find_node(scheduler, new_rsc->id);
     801           0 :     CRM_CHECK(remote_node != NULL, return);
     802             : 
     803           0 :     pcmk__rsc_trace(new_rsc, "Linking remote connection resource %s to %s",
     804             :                     new_rsc->id, pcmk__node_name(remote_node));
     805           0 :     remote_node->details->remote_rsc = new_rsc;
     806             : 
     807           0 :     if (new_rsc->container == NULL) {
     808             :         /* Handle start-up fencing for remote nodes (as opposed to guest nodes)
     809             :          * the same as is done for cluster nodes.
     810             :          */
     811           0 :         handle_startup_fencing(scheduler, remote_node);
     812             : 
     813             :     } else {
     814             :         /* pe_create_node() marks the new node as "remote" or "cluster"; now
     815             :          * that we know the node is a guest node, update it correctly.
     816             :          */
     817           0 :         pcmk__insert_dup(remote_node->details->attrs,
     818             :                          CRM_ATTR_KIND, "container");
     819             :     }
     820             : }
     821             : 
     822             : static void
     823           0 : destroy_tag(gpointer data)
     824             : {
     825           0 :     pcmk_tag_t *tag = data;
     826             : 
     827           0 :     if (tag) {
     828           0 :         free(tag->id);
     829           0 :         g_list_free_full(tag->refs, free);
     830           0 :         free(tag);
     831             :     }
     832           0 : }
     833             : 
     834             : /*!
     835             :  * \internal
     836             :  * \brief Parse configuration XML for resource information
     837             :  *
     838             :  * \param[in]     xml_resources  Top of resource configuration XML
     839             :  * \param[in,out] scheduler      Scheduler data
     840             :  *
     841             :  * \return TRUE
     842             :  *
     843             :  * \note unpack_remote_nodes() MUST be called before this, so that the nodes can
     844             :  *       be used when pe__unpack_resource() calls resource_location()
     845             :  */
     846             : gboolean
     847           0 : unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
     848             : {
     849           0 :     xmlNode *xml_obj = NULL;
     850           0 :     GList *gIter = NULL;
     851             : 
     852           0 :     scheduler->template_rsc_sets = pcmk__strkey_table(free, destroy_tag);
     853             : 
     854           0 :     for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL);
     855           0 :          xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
     856             : 
     857           0 :         pcmk_resource_t *new_rsc = NULL;
     858           0 :         const char *id = pcmk__xe_id(xml_obj);
     859             : 
     860           0 :         if (pcmk__str_empty(id)) {
     861           0 :             pcmk__config_err("Ignoring <%s> resource without ID",
     862             :                              xml_obj->name);
     863           0 :             continue;
     864             :         }
     865             : 
     866           0 :         if (pcmk__xe_is(xml_obj, PCMK_XE_TEMPLATE)) {
     867           0 :             if (g_hash_table_lookup_extended(scheduler->template_rsc_sets, id,
     868             :                                              NULL, NULL) == FALSE) {
     869             :                 /* Record the template's ID for the knowledge of its existence anyway. */
     870           0 :                 pcmk__insert_dup(scheduler->template_rsc_sets, id, NULL);
     871             :             }
     872           0 :             continue;
     873             :         }
     874             : 
     875           0 :         crm_trace("Unpacking <%s " PCMK_XA_ID "='%s'>", xml_obj->name, id);
     876           0 :         if (pe__unpack_resource(xml_obj, &new_rsc, NULL,
     877             :                                 scheduler) == pcmk_rc_ok) {
     878           0 :             scheduler->resources = g_list_append(scheduler->resources, new_rsc);
     879           0 :             pcmk__rsc_trace(new_rsc, "Added resource %s", new_rsc->id);
     880             : 
     881             :         } else {
     882           0 :             pcmk__config_err("Ignoring <%s> resource '%s' "
     883             :                              "because configuration is invalid",
     884             :                              xml_obj->name, id);
     885             :         }
     886             :     }
     887             : 
     888           0 :     for (gIter = scheduler->resources; gIter != NULL; gIter = gIter->next) {
     889           0 :         pcmk_resource_t *rsc = (pcmk_resource_t *) gIter->data;
     890             : 
     891           0 :         setup_container(rsc, scheduler);
     892           0 :         link_rsc2remotenode(scheduler, rsc);
     893             :     }
     894             : 
     895           0 :     scheduler->resources = g_list_sort(scheduler->resources,
     896             :                                       pe__cmp_rsc_priority);
     897           0 :     if (pcmk_is_set(scheduler->flags, pcmk_sched_location_only)) {
     898             :         /* Ignore */
     899             : 
     900           0 :     } else if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)
     901           0 :                && !pcmk_is_set(scheduler->flags, pcmk_sched_have_fencing)) {
     902             : 
     903           0 :         pcmk__config_err("Resource start-up disabled since no STONITH resources have been defined");
     904           0 :         pcmk__config_err("Either configure some or disable STONITH with the "
     905             :                          PCMK_OPT_STONITH_ENABLED " option");
     906           0 :         pcmk__config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
     907             :     }
     908             : 
     909           0 :     return TRUE;
     910             : }
     911             : 
     912             : gboolean
     913           0 : unpack_tags(xmlNode *xml_tags, pcmk_scheduler_t *scheduler)
     914             : {
     915           0 :     xmlNode *xml_tag = NULL;
     916             : 
     917           0 :     scheduler->tags = pcmk__strkey_table(free, destroy_tag);
     918             : 
     919           0 :     for (xml_tag = pcmk__xe_first_child(xml_tags, NULL, NULL, NULL);
     920           0 :          xml_tag != NULL; xml_tag = pcmk__xe_next(xml_tag)) {
     921             : 
     922           0 :         xmlNode *xml_obj_ref = NULL;
     923           0 :         const char *tag_id = pcmk__xe_id(xml_tag);
     924             : 
     925           0 :         if (!pcmk__xe_is(xml_tag, PCMK_XE_TAG)) {
     926           0 :             continue;
     927             :         }
     928             : 
     929           0 :         if (tag_id == NULL) {
     930           0 :             pcmk__config_err("Ignoring <%s> without " PCMK_XA_ID,
     931             :                              (const char *) xml_tag->name);
     932           0 :             continue;
     933             :         }
     934             : 
     935           0 :         for (xml_obj_ref = pcmk__xe_first_child(xml_tag, NULL, NULL, NULL);
     936           0 :              xml_obj_ref != NULL; xml_obj_ref = pcmk__xe_next(xml_obj_ref)) {
     937             : 
     938           0 :             const char *obj_ref = pcmk__xe_id(xml_obj_ref);
     939             : 
     940           0 :             if (!pcmk__xe_is(xml_obj_ref, PCMK_XE_OBJ_REF)) {
     941           0 :                 continue;
     942             :             }
     943             : 
     944           0 :             if (obj_ref == NULL) {
     945           0 :                 pcmk__config_err("Ignoring <%s> for tag '%s' without " PCMK_XA_ID,
     946             :                                  xml_obj_ref->name, tag_id);
     947           0 :                 continue;
     948             :             }
     949             : 
     950           0 :             if (add_tag_ref(scheduler->tags, tag_id, obj_ref) == FALSE) {
     951           0 :                 return FALSE;
     952             :             }
     953             :         }
     954             :     }
     955             : 
     956           0 :     return TRUE;
     957             : }
     958             : 
     959             : /* The ticket state section:
     960             :  * "/cib/status/tickets/ticket_state" */
     961             : static gboolean
     962           0 : unpack_ticket_state(xmlNode *xml_ticket, pcmk_scheduler_t *scheduler)
     963             : {
     964           0 :     const char *ticket_id = NULL;
     965           0 :     const char *granted = NULL;
     966           0 :     const char *last_granted = NULL;
     967           0 :     const char *standby = NULL;
     968           0 :     xmlAttrPtr xIter = NULL;
     969             : 
     970           0 :     pcmk_ticket_t *ticket = NULL;
     971             : 
     972           0 :     ticket_id = pcmk__xe_id(xml_ticket);
     973           0 :     if (pcmk__str_empty(ticket_id)) {
     974           0 :         return FALSE;
     975             :     }
     976             : 
     977           0 :     crm_trace("Processing ticket state for %s", ticket_id);
     978             : 
     979           0 :     ticket = g_hash_table_lookup(scheduler->tickets, ticket_id);
     980           0 :     if (ticket == NULL) {
     981           0 :         ticket = ticket_new(ticket_id, scheduler);
     982           0 :         if (ticket == NULL) {
     983           0 :             return FALSE;
     984             :         }
     985             :     }
     986             : 
     987           0 :     for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
     988           0 :         const char *prop_name = (const char *)xIter->name;
     989           0 :         const char *prop_value = pcmk__xml_attr_value(xIter);
     990             : 
     991           0 :         if (pcmk__str_eq(prop_name, PCMK_XA_ID, pcmk__str_none)) {
     992           0 :             continue;
     993             :         }
     994           0 :         pcmk__insert_dup(ticket->state, prop_name, prop_value);
     995             :     }
     996             : 
     997           0 :     granted = g_hash_table_lookup(ticket->state, PCMK__XA_GRANTED);
     998           0 :     if (granted && crm_is_true(granted)) {
     999           0 :         ticket->granted = TRUE;
    1000           0 :         crm_info("We have ticket '%s'", ticket->id);
    1001             :     } else {
    1002           0 :         ticket->granted = FALSE;
    1003           0 :         crm_info("We do not have ticket '%s'", ticket->id);
    1004             :     }
    1005             : 
    1006           0 :     last_granted = g_hash_table_lookup(ticket->state, PCMK_XA_LAST_GRANTED);
    1007           0 :     if (last_granted) {
    1008             :         long long last_granted_ll;
    1009             : 
    1010           0 :         pcmk__scan_ll(last_granted, &last_granted_ll, 0LL);
    1011           0 :         ticket->last_granted = (time_t) last_granted_ll;
    1012             :     }
    1013             : 
    1014           0 :     standby = g_hash_table_lookup(ticket->state, PCMK_XA_STANDBY);
    1015           0 :     if (standby && crm_is_true(standby)) {
    1016           0 :         ticket->standby = TRUE;
    1017           0 :         if (ticket->granted) {
    1018           0 :             crm_info("Granted ticket '%s' is in standby-mode", ticket->id);
    1019             :         }
    1020             :     } else {
    1021           0 :         ticket->standby = FALSE;
    1022             :     }
    1023             : 
    1024           0 :     crm_trace("Done with ticket state for %s", ticket_id);
    1025             : 
    1026           0 :     return TRUE;
    1027             : }
    1028             : 
    1029             : static gboolean
    1030           0 : unpack_tickets_state(xmlNode *xml_tickets, pcmk_scheduler_t *scheduler)
    1031             : {
    1032           0 :     xmlNode *xml_obj = NULL;
    1033             : 
    1034           0 :     for (xml_obj = pcmk__xe_first_child(xml_tickets, NULL, NULL, NULL);
    1035           0 :          xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) {
    1036             : 
    1037           0 :         if (!pcmk__xe_is(xml_obj, PCMK__XE_TICKET_STATE)) {
    1038           0 :             continue;
    1039             :         }
    1040           0 :         unpack_ticket_state(xml_obj, scheduler);
    1041             :     }
    1042             : 
    1043           0 :     return TRUE;
    1044             : }
    1045             : 
    1046             : static void
    1047           0 : unpack_handle_remote_attrs(pcmk_node_t *this_node, const xmlNode *state,
    1048             :                            pcmk_scheduler_t *scheduler)
    1049             : {
    1050           0 :     const char *discovery = NULL;
    1051           0 :     const xmlNode *attrs = NULL;
    1052           0 :     pcmk_resource_t *rsc = NULL;
    1053             : 
    1054           0 :     if (!pcmk__xe_is(state, PCMK__XE_NODE_STATE)) {
    1055           0 :         return;
    1056             :     }
    1057             : 
    1058           0 :     if ((this_node == NULL) || !pcmk__is_pacemaker_remote_node(this_node)) {
    1059           0 :         return;
    1060             :     }
    1061           0 :     crm_trace("Processing Pacemaker Remote node %s",
    1062             :               pcmk__node_name(this_node));
    1063             : 
    1064           0 :     pcmk__scan_min_int(crm_element_value(state, PCMK__XA_NODE_IN_MAINTENANCE),
    1065           0 :                        &(this_node->details->remote_maintenance), 0);
    1066             : 
    1067           0 :     rsc = this_node->details->remote_rsc;
    1068           0 :     if (this_node->details->remote_requires_reset == FALSE) {
    1069           0 :         this_node->details->unclean = FALSE;
    1070           0 :         this_node->details->unseen = FALSE;
    1071             :     }
    1072           0 :     attrs = pcmk__xe_first_child(state, PCMK__XE_TRANSIENT_ATTRIBUTES, NULL,
    1073             :                                  NULL);
    1074           0 :     add_node_attrs(attrs, this_node, TRUE, scheduler);
    1075             : 
    1076           0 :     if (pe__shutdown_requested(this_node)) {
    1077           0 :         crm_info("%s is shutting down", pcmk__node_name(this_node));
    1078           0 :         this_node->details->shutdown = TRUE;
    1079             :     }
    1080             : 
    1081           0 :     if (crm_is_true(pcmk__node_attr(this_node, PCMK_NODE_ATTR_STANDBY, NULL,
    1082             :                                     pcmk__rsc_node_current))) {
    1083           0 :         crm_info("%s is in standby mode", pcmk__node_name(this_node));
    1084           0 :         this_node->details->standby = TRUE;
    1085             :     }
    1086             : 
    1087           0 :     if (crm_is_true(pcmk__node_attr(this_node, PCMK_NODE_ATTR_MAINTENANCE, NULL,
    1088             :                                     pcmk__rsc_node_current))
    1089           0 :         || ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_managed))) {
    1090           0 :         crm_info("%s is in maintenance mode", pcmk__node_name(this_node));
    1091           0 :         this_node->details->maintenance = TRUE;
    1092             :     }
    1093             : 
    1094           0 :     discovery = pcmk__node_attr(this_node,
    1095             :                                 PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED,
    1096             :                                 NULL, pcmk__rsc_node_current);
    1097           0 :     if ((discovery != NULL) && !crm_is_true(discovery)) {
    1098           0 :         pcmk__warn_once(pcmk__wo_rdisc_enabled,
    1099             :                         "Support for the "
    1100             :                         PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
    1101             :                         " node attribute is deprecated and will be removed"
    1102             :                         " (and behave as 'true') in a future release.");
    1103             : 
    1104           0 :         if (pcmk__is_remote_node(this_node)
    1105           0 :             && !pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
    1106           0 :             pcmk__config_warn("Ignoring "
    1107             :                               PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
    1108             :                               " attribute on Pacemaker Remote node %s"
    1109             :                               " because fencing is disabled",
    1110             :                               pcmk__node_name(this_node));
    1111             :         } else {
    1112             :             /* This is either a remote node with fencing enabled, or a guest
    1113             :              * node. We don't care whether fencing is enabled when fencing guest
    1114             :              * nodes, because they are "fenced" by recovering their containing
    1115             :              * resource.
    1116             :              */
    1117           0 :             crm_info("%s has resource discovery disabled",
    1118             :                      pcmk__node_name(this_node));
    1119           0 :             this_node->details->rsc_discovery_enabled = FALSE;
    1120             :         }
    1121             :     }
    1122             : }
    1123             : 
    1124             : /*!
    1125             :  * \internal
    1126             :  * \brief Unpack a cluster node's transient attributes
    1127             :  *
    1128             :  * \param[in]     state      CIB node state XML
    1129             :  * \param[in,out] node       Cluster node whose attributes are being unpacked
    1130             :  * \param[in,out] scheduler  Scheduler data
    1131             :  */
    1132             : static void
    1133           0 : unpack_transient_attributes(const xmlNode *state, pcmk_node_t *node,
    1134             :                             pcmk_scheduler_t *scheduler)
    1135             : {
    1136           0 :     const char *discovery = NULL;
    1137           0 :     const xmlNode *attrs = pcmk__xe_first_child(state,
    1138             :                                                 PCMK__XE_TRANSIENT_ATTRIBUTES,
    1139             :                                                 NULL, NULL);
    1140             : 
    1141           0 :     add_node_attrs(attrs, node, TRUE, scheduler);
    1142             : 
    1143           0 :     if (crm_is_true(pcmk__node_attr(node, PCMK_NODE_ATTR_STANDBY, NULL,
    1144             :                                     pcmk__rsc_node_current))) {
    1145           0 :         crm_info("%s is in standby mode", pcmk__node_name(node));
    1146           0 :         node->details->standby = TRUE;
    1147             :     }
    1148             : 
    1149           0 :     if (crm_is_true(pcmk__node_attr(node, PCMK_NODE_ATTR_MAINTENANCE, NULL,
    1150             :                                     pcmk__rsc_node_current))) {
    1151           0 :         crm_info("%s is in maintenance mode", pcmk__node_name(node));
    1152           0 :         node->details->maintenance = TRUE;
    1153             :     }
    1154             : 
    1155           0 :     discovery = pcmk__node_attr(node,
    1156             :                                 PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED,
    1157             :                                 NULL, pcmk__rsc_node_current);
    1158           0 :     if ((discovery != NULL) && !crm_is_true(discovery)) {
    1159           0 :         pcmk__config_warn("Ignoring "
    1160             :                           PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED
    1161             :                           " attribute for %s because disabling resource"
    1162             :                           " discovery is not allowed for cluster nodes",
    1163             :                           pcmk__node_name(node));
    1164             :     }
    1165           0 : }
    1166             : 
    1167             : /*!
    1168             :  * \internal
    1169             :  * \brief Unpack a node state entry (first pass)
    1170             :  *
    1171             :  * Unpack one node state entry from status. This unpacks information from the
    1172             :  * \C PCMK__XE_NODE_STATE element itself and node attributes inside it, but not
    1173             :  * the resource history inside it. Multiple passes through the status are needed
    1174             :  * to fully unpack everything.
    1175             :  *
    1176             :  * \param[in]     state      CIB node state XML
    1177             :  * \param[in,out] scheduler  Scheduler data
    1178             :  */
    1179             : static void
    1180           0 : unpack_node_state(const xmlNode *state, pcmk_scheduler_t *scheduler)
    1181             : {
    1182           0 :     const char *id = NULL;
    1183           0 :     const char *uname = NULL;
    1184           0 :     pcmk_node_t *this_node = NULL;
    1185             : 
    1186           0 :     id = crm_element_value(state, PCMK_XA_ID);
    1187           0 :     if (id == NULL) {
    1188           0 :         pcmk__config_err("Ignoring invalid " PCMK__XE_NODE_STATE " entry without "
    1189             :                          PCMK_XA_ID);
    1190           0 :         crm_log_xml_info(state, "missing-id");
    1191           0 :         return;
    1192             :     }
    1193             : 
    1194           0 :     uname = crm_element_value(state, PCMK_XA_UNAME);
    1195           0 :     if (uname == NULL) {
    1196             :         /* If a joining peer makes the cluster acquire the quorum from corosync
    1197             :          * meanwhile it has not joined CPG membership of pacemaker-controld yet,
    1198             :          * it's possible that the created PCMK__XE_NODE_STATE entry doesn't have
    1199             :          * a PCMK_XA_UNAME yet. We should recognize the node as `pending` and
    1200             :          * wait for it to join CPG.
    1201             :          */
    1202           0 :         crm_trace("Handling " PCMK__XE_NODE_STATE " entry with id=\"%s\" "
    1203             :                   "without " PCMK_XA_UNAME,
    1204             :                   id);
    1205             :     }
    1206             : 
    1207           0 :     this_node = pe_find_node_any(scheduler->nodes, id, uname);
    1208           0 :     if (this_node == NULL) {
    1209           0 :         crm_notice("Ignoring recorded state for removed node with name %s and "
    1210             :                    PCMK_XA_ID " %s", pcmk__s(uname, "unknown"), id);
    1211           0 :         return;
    1212             :     }
    1213             : 
    1214           0 :     if (pcmk__is_pacemaker_remote_node(this_node)) {
    1215             :         /* We can't determine the online status of Pacemaker Remote nodes until
    1216             :          * after all resource history has been unpacked. In this first pass, we
    1217             :          * do need to mark whether the node has been fenced, as this plays a
    1218             :          * role during unpacking cluster node resource state.
    1219             :          */
    1220           0 :         pcmk__scan_min_int(crm_element_value(state, PCMK__XA_NODE_FENCED),
    1221           0 :                            &(this_node->details->remote_was_fenced), 0);
    1222           0 :         return;
    1223             :     }
    1224             : 
    1225           0 :     unpack_transient_attributes(state, this_node, scheduler);
    1226             : 
    1227             :     /* Provisionally mark this cluster node as clean. We have at least seen it
    1228             :      * in the current cluster's lifetime.
    1229             :      */
    1230           0 :     this_node->details->unclean = FALSE;
    1231           0 :     this_node->details->unseen = FALSE;
    1232             : 
    1233           0 :     crm_trace("Determining online status of cluster node %s (id %s)",
    1234             :               pcmk__node_name(this_node), id);
    1235           0 :     determine_online_status(state, this_node, scheduler);
    1236             : 
    1237           0 :     if (!pcmk_is_set(scheduler->flags, pcmk_sched_quorate)
    1238           0 :         && this_node->details->online
    1239           0 :         && (scheduler->no_quorum_policy == pcmk_no_quorum_fence)) {
    1240             :         /* Everything else should flow from this automatically
    1241             :          * (at least until the scheduler becomes able to migrate off
    1242             :          * healthy resources)
    1243             :          */
    1244           0 :         pe_fence_node(scheduler, this_node, "cluster does not have quorum",
    1245             :                       FALSE);
    1246             :     }
    1247             : }
    1248             : 
    1249             : /*!
    1250             :  * \internal
    1251             :  * \brief Unpack nodes' resource history as much as possible
    1252             :  *
    1253             :  * Unpack as many nodes' resource history as possible in one pass through the
    1254             :  * status. We need to process Pacemaker Remote nodes' connections/containers
    1255             :  * before unpacking their history; the connection/container history will be
    1256             :  * in another node's history, so it might take multiple passes to unpack
    1257             :  * everything.
    1258             :  *
    1259             :  * \param[in]     status     CIB XML status section
    1260             :  * \param[in]     fence      If true, treat any not-yet-unpacked nodes as unseen
    1261             :  * \param[in,out] scheduler  Scheduler data
    1262             :  *
    1263             :  * \return Standard Pacemaker return code (specifically pcmk_rc_ok if done,
    1264             :  *         or EAGAIN if more unpacking remains to be done)
    1265             :  */
    1266             : static int
    1267           0 : unpack_node_history(const xmlNode *status, bool fence,
    1268             :                     pcmk_scheduler_t *scheduler)
    1269             : {
    1270           0 :     int rc = pcmk_rc_ok;
    1271             : 
    1272             :     // Loop through all PCMK__XE_NODE_STATE entries in CIB status
    1273           0 :     for (const xmlNode *state = pcmk__xe_first_child(status,
    1274             :                                                      PCMK__XE_NODE_STATE, NULL,
    1275             :                                                      NULL);
    1276           0 :          state != NULL; state = pcmk__xe_next_same(state)) {
    1277             : 
    1278           0 :         const char *id = pcmk__xe_id(state);
    1279           0 :         const char *uname = crm_element_value(state, PCMK_XA_UNAME);
    1280           0 :         pcmk_node_t *this_node = NULL;
    1281             : 
    1282           0 :         if ((id == NULL) || (uname == NULL)) {
    1283             :             // Warning already logged in first pass through status section
    1284           0 :             crm_trace("Not unpacking resource history from malformed "
    1285             :                       PCMK__XE_NODE_STATE " without id and/or uname");
    1286           0 :             continue;
    1287             :         }
    1288             : 
    1289           0 :         this_node = pe_find_node_any(scheduler->nodes, id, uname);
    1290           0 :         if (this_node == NULL) {
    1291             :             // Warning already logged in first pass through status section
    1292           0 :             crm_trace("Not unpacking resource history for node %s because "
    1293             :                       "no longer in configuration", id);
    1294           0 :             continue;
    1295             :         }
    1296             : 
    1297           0 :         if (this_node->details->unpacked) {
    1298           0 :             crm_trace("Not unpacking resource history for node %s because "
    1299             :                       "already unpacked", id);
    1300           0 :             continue;
    1301             :         }
    1302             : 
    1303           0 :         if (fence) {
    1304             :             // We're processing all remaining nodes
    1305             : 
    1306           0 :         } else if (pcmk__is_guest_or_bundle_node(this_node)) {
    1307             :             /* We can unpack a guest node's history only after we've unpacked
    1308             :              * other resource history to the point that we know that the node's
    1309             :              * connection and containing resource are both up.
    1310             :              */
    1311           0 :             pcmk_resource_t *rsc = this_node->details->remote_rsc;
    1312             : 
    1313           0 :             if ((rsc == NULL) || (rsc->role != pcmk_role_started)
    1314           0 :                 || (rsc->container->role != pcmk_role_started)) {
    1315           0 :                 crm_trace("Not unpacking resource history for guest node %s "
    1316             :                           "because container and connection are not known to "
    1317             :                           "be up", id);
    1318           0 :                 continue;
    1319             :             }
    1320             : 
    1321           0 :         } else if (pcmk__is_remote_node(this_node)) {
    1322             :             /* We can unpack a remote node's history only after we've unpacked
    1323             :              * other resource history to the point that we know that the node's
    1324             :              * connection is up, with the exception of when shutdown locks are
    1325             :              * in use.
    1326             :              */
    1327           0 :             pcmk_resource_t *rsc = this_node->details->remote_rsc;
    1328             : 
    1329           0 :             if ((rsc == NULL)
    1330           0 :                 || (!pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)
    1331           0 :                     && (rsc->role != pcmk_role_started))) {
    1332           0 :                 crm_trace("Not unpacking resource history for remote node %s "
    1333             :                           "because connection is not known to be up", id);
    1334           0 :                 continue;
    1335             :             }
    1336             : 
    1337             :         /* If fencing and shutdown locks are disabled and we're not processing
    1338             :          * unseen nodes, then we don't want to unpack offline nodes until online
    1339             :          * nodes have been unpacked. This allows us to number active clone
    1340             :          * instances first.
    1341             :          */
    1342           0 :         } else if (!pcmk_any_flags_set(scheduler->flags,
    1343             :                                        pcmk_sched_fencing_enabled
    1344             :                                        |pcmk_sched_shutdown_lock)
    1345           0 :                    && !this_node->details->online) {
    1346           0 :             crm_trace("Not unpacking resource history for offline "
    1347             :                       "cluster node %s", id);
    1348           0 :             continue;
    1349             :         }
    1350             : 
    1351           0 :         if (pcmk__is_pacemaker_remote_node(this_node)) {
    1352           0 :             determine_remote_online_status(scheduler, this_node);
    1353           0 :             unpack_handle_remote_attrs(this_node, state, scheduler);
    1354             :         }
    1355             : 
    1356           0 :         crm_trace("Unpacking resource history for %snode %s",
    1357             :                   (fence? "unseen " : ""), id);
    1358             : 
    1359           0 :         this_node->details->unpacked = TRUE;
    1360           0 :         unpack_node_lrm(this_node, state, scheduler);
    1361             : 
    1362           0 :         rc = EAGAIN; // Other node histories might depend on this one
    1363             :     }
    1364           0 :     return rc;
    1365             : }
    1366             : 
    1367             : /* remove nodes that are down, stopping */
    1368             : /* create positive rsc_to_node constraints between resources and the nodes they are running on */
    1369             : /* anything else? */
    1370             : gboolean
    1371           0 : unpack_status(xmlNode *status, pcmk_scheduler_t *scheduler)
    1372             : {
    1373           0 :     xmlNode *state = NULL;
    1374             : 
    1375           0 :     crm_trace("Beginning unpack");
    1376             : 
    1377           0 :     if (scheduler->tickets == NULL) {
    1378           0 :         scheduler->tickets = pcmk__strkey_table(free, destroy_ticket);
    1379             :     }
    1380             : 
    1381           0 :     for (state = pcmk__xe_first_child(status, NULL, NULL, NULL); state != NULL;
    1382           0 :          state = pcmk__xe_next(state)) {
    1383             : 
    1384           0 :         if (pcmk__xe_is(state, PCMK_XE_TICKETS)) {
    1385           0 :             unpack_tickets_state((xmlNode *) state, scheduler);
    1386             : 
    1387           0 :         } else if (pcmk__xe_is(state, PCMK__XE_NODE_STATE)) {
    1388           0 :             unpack_node_state(state, scheduler);
    1389             :         }
    1390             :     }
    1391             : 
    1392           0 :     while (unpack_node_history(status, FALSE, scheduler) == EAGAIN) {
    1393           0 :         crm_trace("Another pass through node resource histories is needed");
    1394             :     }
    1395             : 
    1396             :     // Now catch any nodes we didn't see
    1397           0 :     unpack_node_history(status,
    1398           0 :                         pcmk_is_set(scheduler->flags,
    1399             :                                     pcmk_sched_fencing_enabled),
    1400             :                         scheduler);
    1401             : 
    1402             :     /* Now that we know where resources are, we can schedule stops of containers
    1403             :      * with failed bundle connections
    1404             :      */
    1405           0 :     if (scheduler->stop_needed != NULL) {
    1406           0 :         for (GList *item = scheduler->stop_needed; item; item = item->next) {
    1407           0 :             pcmk_resource_t *container = item->data;
    1408           0 :             pcmk_node_t *node = pcmk__current_node(container);
    1409             : 
    1410           0 :             if (node) {
    1411           0 :                 stop_action(container, node, FALSE);
    1412             :             }
    1413             :         }
    1414           0 :         g_list_free(scheduler->stop_needed);
    1415           0 :         scheduler->stop_needed = NULL;
    1416             :     }
    1417             : 
    1418             :     /* Now that we know status of all Pacemaker Remote connections and nodes,
    1419             :      * we can stop connections for node shutdowns, and check the online status
    1420             :      * of remote/guest nodes that didn't have any node history to unpack.
    1421             :      */
    1422           0 :     for (GList *gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) {
    1423           0 :         pcmk_node_t *this_node = gIter->data;
    1424             : 
    1425           0 :         if (!pcmk__is_pacemaker_remote_node(this_node)) {
    1426           0 :             continue;
    1427             :         }
    1428           0 :         if (this_node->details->shutdown
    1429           0 :             && (this_node->details->remote_rsc != NULL)) {
    1430           0 :             pe__set_next_role(this_node->details->remote_rsc, pcmk_role_stopped,
    1431             :                               "remote shutdown");
    1432             :         }
    1433           0 :         if (!this_node->details->unpacked) {
    1434           0 :             determine_remote_online_status(scheduler, this_node);
    1435             :         }
    1436             :     }
    1437             : 
    1438           0 :     return TRUE;
    1439             : }
    1440             : 
    1441             : /*!
    1442             :  * \internal
    1443             :  * \brief Unpack node's time when it became a member at the cluster layer
    1444             :  *
    1445             :  * \param[in]     node_state  Node's \c PCMK__XE_NODE_STATE entry
    1446             :  * \param[in,out] scheduler   Scheduler data
    1447             :  *
    1448             :  * \return Epoch time when node became a cluster member
    1449             :  *         (or scheduler effective time for legacy entries) if a member,
    1450             :  *         0 if not a member, or -1 if no valid information available
    1451             :  */
    1452             : static long long
    1453           0 : unpack_node_member(const xmlNode *node_state, pcmk_scheduler_t *scheduler)
    1454             : {
    1455           0 :     const char *member_time = crm_element_value(node_state, PCMK__XA_IN_CCM);
    1456           0 :     int member = 0;
    1457             : 
    1458           0 :     if (member_time == NULL) {
    1459           0 :         return -1LL;
    1460             : 
    1461           0 :     } else if (crm_str_to_boolean(member_time, &member) == 1) {
    1462             :         /* If in_ccm=0, we'll return 0 here. If in_ccm=1, either the entry was
    1463             :          * recorded as a boolean for a DC < 2.1.7, or the node is pending
    1464             :          * shutdown and has left the CPG, in which case it was set to 1 to avoid
    1465             :          * fencing for PCMK_OPT_NODE_PENDING_TIMEOUT.
    1466             :          *
    1467             :          * We return the effective time for in_ccm=1 because what's important to
    1468             :          * avoid fencing is that effective time minus this value is less than
    1469             :          * the pending node timeout.
    1470             :          */
    1471           0 :         return member? (long long) get_effective_time(scheduler) : 0LL;
    1472             : 
    1473             :     } else {
    1474           0 :         long long when_member = 0LL;
    1475             : 
    1476           0 :         if ((pcmk__scan_ll(member_time, &when_member,
    1477           0 :                            0LL) != pcmk_rc_ok) || (when_member < 0LL)) {
    1478           0 :             crm_warn("Unrecognized value '%s' for " PCMK__XA_IN_CCM
    1479             :                      " in " PCMK__XE_NODE_STATE " entry", member_time);
    1480           0 :             return -1LL;
    1481             :         }
    1482           0 :         return when_member;
    1483             :     }
    1484             : }
    1485             : 
    1486             : /*!
    1487             :  * \internal
    1488             :  * \brief Unpack node's time when it became online in process group
    1489             :  *
    1490             :  * \param[in] node_state  Node's \c PCMK__XE_NODE_STATE entry
    1491             :  *
    1492             :  * \return Epoch time when node became online in process group (or 0 if not
    1493             :  *         online, or 1 for legacy online entries)
    1494             :  */
    1495             : static long long
    1496           0 : unpack_node_online(const xmlNode *node_state)
    1497             : {
    1498           0 :     const char *peer_time = crm_element_value(node_state, PCMK_XA_CRMD);
    1499             : 
    1500             :     // @COMPAT Entries recorded for DCs < 2.1.7 have "online" or "offline"
    1501           0 :     if (pcmk__str_eq(peer_time, PCMK_VALUE_OFFLINE,
    1502             :                      pcmk__str_casei|pcmk__str_null_matches)) {
    1503           0 :         return 0LL;
    1504             : 
    1505           0 :     } else if (pcmk__str_eq(peer_time, PCMK_VALUE_ONLINE, pcmk__str_casei)) {
    1506           0 :         return 1LL;
    1507             : 
    1508             :     } else {
    1509           0 :         long long when_online = 0LL;
    1510             : 
    1511           0 :         if ((pcmk__scan_ll(peer_time, &when_online, 0LL) != pcmk_rc_ok)
    1512           0 :             || (when_online < 0)) {
    1513           0 :             crm_warn("Unrecognized value '%s' for " PCMK_XA_CRMD " in "
    1514             :                      PCMK__XE_NODE_STATE " entry, assuming offline", peer_time);
    1515           0 :             return 0LL;
    1516             :         }
    1517           0 :         return when_online;
    1518             :     }
    1519             : }
    1520             : 
    1521             : /*!
    1522             :  * \internal
    1523             :  * \brief Unpack node attribute for user-requested fencing
    1524             :  *
    1525             :  * \param[in] node        Node to check
    1526             :  * \param[in] node_state  Node's \c PCMK__XE_NODE_STATE entry in CIB status
    1527             :  *
    1528             :  * \return \c true if fencing has been requested for \p node, otherwise \c false
    1529             :  */
    1530             : static bool
    1531           0 : unpack_node_terminate(const pcmk_node_t *node, const xmlNode *node_state)
    1532             : {
    1533           0 :     long long value = 0LL;
    1534           0 :     int value_i = 0;
    1535           0 :     const char *value_s = pcmk__node_attr(node, PCMK_NODE_ATTR_TERMINATE,
    1536             :                                           NULL, pcmk__rsc_node_current);
    1537             : 
    1538             :     // Value may be boolean or an epoch time
    1539           0 :     if (crm_str_to_boolean(value_s, &value_i) == 1) {
    1540           0 :         return (value_i != 0);
    1541             :     }
    1542           0 :     if (pcmk__scan_ll(value_s, &value, 0LL) == pcmk_rc_ok) {
    1543           0 :         return (value > 0);
    1544             :     }
    1545           0 :     crm_warn("Ignoring unrecognized value '%s' for " PCMK_NODE_ATTR_TERMINATE
    1546             :              "node attribute for %s", value_s, pcmk__node_name(node));
    1547           0 :     return false;
    1548             : }
    1549             : 
    1550             : static gboolean
    1551           0 : determine_online_status_no_fencing(pcmk_scheduler_t *scheduler,
    1552             :                                    const xmlNode *node_state,
    1553             :                                    pcmk_node_t *this_node)
    1554             : {
    1555           0 :     gboolean online = FALSE;
    1556           0 :     const char *join = crm_element_value(node_state, PCMK__XA_JOIN);
    1557           0 :     const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
    1558           0 :     long long when_member = unpack_node_member(node_state, scheduler);
    1559           0 :     long long when_online = unpack_node_online(node_state);
    1560             : 
    1561           0 :     if (when_member <= 0) {
    1562           0 :         crm_trace("Node %s is %sdown", pcmk__node_name(this_node),
    1563             :                   ((when_member < 0)? "presumed " : ""));
    1564             : 
    1565           0 :     } else if (when_online > 0) {
    1566           0 :         if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
    1567           0 :             online = TRUE;
    1568             :         } else {
    1569           0 :             crm_debug("Node %s is not ready to run resources: %s",
    1570             :                       pcmk__node_name(this_node), join);
    1571             :         }
    1572             : 
    1573           0 :     } else if (this_node->details->expected_up == FALSE) {
    1574           0 :         crm_trace("Node %s controller is down: "
    1575             :                   "member@%lld online@%lld join=%s expected=%s",
    1576             :                   pcmk__node_name(this_node), when_member, when_online,
    1577             :                   pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
    1578             : 
    1579             :     } else {
    1580             :         /* mark it unclean */
    1581           0 :         pe_fence_node(scheduler, this_node, "peer is unexpectedly down", FALSE);
    1582           0 :         crm_info("Node %s member@%lld online@%lld join=%s expected=%s",
    1583             :                  pcmk__node_name(this_node), when_member, when_online,
    1584             :                  pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
    1585             :     }
    1586           0 :     return online;
    1587             : }
    1588             : 
    1589             : /*!
    1590             :  * \internal
    1591             :  * \brief Check whether a node has taken too long to join controller group
    1592             :  *
    1593             :  * \param[in,out] scheduler    Scheduler data
    1594             :  * \param[in]     node         Node to check
    1595             :  * \param[in]     when_member  Epoch time when node became a cluster member
    1596             :  * \param[in]     when_online  Epoch time when node joined controller group
    1597             :  *
    1598             :  * \return true if node has been pending (on the way up) longer than
    1599             :  *         \c PCMK_OPT_NODE_PENDING_TIMEOUT, otherwise false
    1600             :  * \note This will also update the cluster's recheck time if appropriate.
    1601             :  */
    1602             : static inline bool
    1603           0 : pending_too_long(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
    1604             :                  long long when_member, long long when_online)
    1605             : {
    1606           0 :     if ((scheduler->node_pending_timeout > 0)
    1607           0 :         && (when_member > 0) && (when_online <= 0)) {
    1608             :         // There is a timeout on pending nodes, and node is pending
    1609             : 
    1610           0 :         time_t timeout = when_member + scheduler->node_pending_timeout;
    1611             : 
    1612           0 :         if (get_effective_time(node->details->data_set) >= timeout) {
    1613           0 :             return true; // Node has timed out
    1614             :         }
    1615             : 
    1616             :         // Node is pending, but still has time
    1617           0 :         pe__update_recheck_time(timeout, scheduler, "pending node timeout");
    1618             :     }
    1619           0 :     return false;
    1620             : }
    1621             : 
    1622             : static bool
    1623           0 : determine_online_status_fencing(pcmk_scheduler_t *scheduler,
    1624             :                                 const xmlNode *node_state,
    1625             :                                 pcmk_node_t *this_node)
    1626             : {
    1627           0 :     bool termination_requested = unpack_node_terminate(this_node, node_state);
    1628           0 :     const char *join = crm_element_value(node_state, PCMK__XA_JOIN);
    1629           0 :     const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
    1630           0 :     long long when_member = unpack_node_member(node_state, scheduler);
    1631           0 :     long long when_online = unpack_node_online(node_state);
    1632             : 
    1633             : /*
    1634             :   - PCMK__XA_JOIN          ::= member|down|pending|banned
    1635             :   - PCMK_XA_EXPECTED       ::= member|down
    1636             : 
    1637             :   @COMPAT with entries recorded for DCs < 2.1.7
    1638             :   - PCMK__XA_IN_CCM        ::= true|false
    1639             :   - PCMK_XA_CRMD           ::= online|offline
    1640             : 
    1641             :   Since crm_feature_set 3.18.0 (pacemaker-2.1.7):
    1642             :   - PCMK__XA_IN_CCM        ::= <timestamp>|0
    1643             :   Since when node has been a cluster member. A value 0 of means the node is not
    1644             :   a cluster member.
    1645             : 
    1646             :   - PCMK_XA_CRMD           ::= <timestamp>|0
    1647             :   Since when peer has been online in CPG. A value 0 means the peer is offline
    1648             :   in CPG.
    1649             : */
    1650             : 
    1651           0 :     crm_trace("Node %s member@%lld online@%lld join=%s expected=%s%s",
    1652             :               pcmk__node_name(this_node), when_member, when_online,
    1653             :               pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"),
    1654             :               (termination_requested? " (termination requested)" : ""));
    1655             : 
    1656           0 :     if (this_node->details->shutdown) {
    1657           0 :         crm_debug("%s is shutting down", pcmk__node_name(this_node));
    1658             : 
    1659             :         /* Slightly different criteria since we can't shut down a dead peer */
    1660           0 :         return (when_online > 0);
    1661             :     }
    1662             : 
    1663           0 :     if (when_member < 0) {
    1664           0 :         pe_fence_node(scheduler, this_node,
    1665             :                       "peer has not been seen by the cluster", FALSE);
    1666           0 :         return false;
    1667             :     }
    1668             : 
    1669           0 :     if (pcmk__str_eq(join, CRMD_JOINSTATE_NACK, pcmk__str_none)) {
    1670           0 :         pe_fence_node(scheduler, this_node,
    1671             :                       "peer failed Pacemaker membership criteria", FALSE);
    1672             : 
    1673           0 :     } else if (termination_requested) {
    1674           0 :         if ((when_member <= 0) && (when_online <= 0)
    1675           0 :             && pcmk__str_eq(join, CRMD_JOINSTATE_DOWN, pcmk__str_none)) {
    1676           0 :             crm_info("%s was fenced as requested", pcmk__node_name(this_node));
    1677           0 :             return false;
    1678             :         }
    1679           0 :         pe_fence_node(scheduler, this_node, "fencing was requested", false);
    1680             : 
    1681           0 :     } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_DOWN,
    1682             :                             pcmk__str_null_matches)) {
    1683             : 
    1684           0 :         if (pending_too_long(scheduler, this_node, when_member, when_online)) {
    1685           0 :             pe_fence_node(scheduler, this_node,
    1686             :                           "peer pending timed out on joining the process group",
    1687             :                           FALSE);
    1688             : 
    1689           0 :         } else if ((when_member > 0) || (when_online > 0)) {
    1690           0 :             crm_info("- %s is not ready to run resources",
    1691             :                      pcmk__node_name(this_node));
    1692           0 :             this_node->details->standby = TRUE;
    1693           0 :             this_node->details->pending = TRUE;
    1694             : 
    1695             :         } else {
    1696           0 :             crm_trace("%s is down or still coming up",
    1697             :                       pcmk__node_name(this_node));
    1698             :         }
    1699             : 
    1700           0 :     } else if (when_member <= 0) {
    1701             :         // Consider PCMK_OPT_PRIORITY_FENCING_DELAY for lost nodes
    1702           0 :         pe_fence_node(scheduler, this_node,
    1703             :                       "peer is no longer part of the cluster", TRUE);
    1704             : 
    1705           0 :     } else if (when_online <= 0) {
    1706           0 :         pe_fence_node(scheduler, this_node,
    1707             :                       "peer process is no longer available", FALSE);
    1708             : 
    1709             :         /* Everything is running at this point, now check join state */
    1710             : 
    1711           0 :     } else if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_none)) {
    1712           0 :         crm_info("%s is active", pcmk__node_name(this_node));
    1713             : 
    1714           0 :     } else if (pcmk__str_any_of(join, CRMD_JOINSTATE_PENDING,
    1715             :                                 CRMD_JOINSTATE_DOWN, NULL)) {
    1716           0 :         crm_info("%s is not ready to run resources",
    1717             :                  pcmk__node_name(this_node));
    1718           0 :         this_node->details->standby = TRUE;
    1719           0 :         this_node->details->pending = TRUE;
    1720             : 
    1721             :     } else {
    1722           0 :         pe_fence_node(scheduler, this_node, "peer was in an unknown state",
    1723             :                       FALSE);
    1724             :     }
    1725             : 
    1726           0 :     return (when_member > 0);
    1727             : }
    1728             : 
    1729             : static void
    1730           0 : determine_remote_online_status(pcmk_scheduler_t *scheduler,
    1731             :                                pcmk_node_t *this_node)
    1732             : {
    1733           0 :     pcmk_resource_t *rsc = this_node->details->remote_rsc;
    1734           0 :     pcmk_resource_t *container = NULL;
    1735           0 :     pcmk_node_t *host = NULL;
    1736             : 
    1737             :     /* If there is a node state entry for a (former) Pacemaker Remote node
    1738             :      * but no resource creating that node, the node's connection resource will
    1739             :      * be NULL. Consider it an offline remote node in that case.
    1740             :      */
    1741           0 :     if (rsc == NULL) {
    1742           0 :         this_node->details->online = FALSE;
    1743           0 :         goto remote_online_done;
    1744             :     }
    1745             : 
    1746           0 :     container = rsc->container;
    1747             : 
    1748           0 :     if (container && pcmk__list_of_1(rsc->running_on)) {
    1749           0 :         host = rsc->running_on->data;
    1750             :     }
    1751             : 
    1752             :     /* If the resource is currently started, mark it online. */
    1753           0 :     if (rsc->role == pcmk_role_started) {
    1754           0 :         crm_trace("%s node %s presumed ONLINE because connection resource is started",
    1755             :                   (container? "Guest" : "Remote"), this_node->details->id);
    1756           0 :         this_node->details->online = TRUE;
    1757             :     }
    1758             : 
    1759             :     /* consider this node shutting down if transitioning start->stop */
    1760           0 :     if ((rsc->role == pcmk_role_started)
    1761           0 :         && (rsc->next_role == pcmk_role_stopped)) {
    1762             : 
    1763           0 :         crm_trace("%s node %s shutting down because connection resource is stopping",
    1764             :                   (container? "Guest" : "Remote"), this_node->details->id);
    1765           0 :         this_node->details->shutdown = TRUE;
    1766             :     }
    1767             : 
    1768             :     /* Now check all the failure conditions. */
    1769           0 :     if(container && pcmk_is_set(container->flags, pcmk_rsc_failed)) {
    1770           0 :         crm_trace("Guest node %s UNCLEAN because guest resource failed",
    1771             :                   this_node->details->id);
    1772           0 :         this_node->details->online = FALSE;
    1773           0 :         this_node->details->remote_requires_reset = TRUE;
    1774             : 
    1775           0 :     } else if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
    1776           0 :         crm_trace("%s node %s OFFLINE because connection resource failed",
    1777             :                   (container? "Guest" : "Remote"), this_node->details->id);
    1778           0 :         this_node->details->online = FALSE;
    1779             : 
    1780           0 :     } else if ((rsc->role == pcmk_role_stopped)
    1781           0 :                || ((container != NULL)
    1782           0 :                    && (container->role == pcmk_role_stopped))) {
    1783             : 
    1784           0 :         crm_trace("%s node %s OFFLINE because its resource is stopped",
    1785             :                   (container? "Guest" : "Remote"), this_node->details->id);
    1786           0 :         this_node->details->online = FALSE;
    1787           0 :         this_node->details->remote_requires_reset = FALSE;
    1788             : 
    1789           0 :     } else if (host && (host->details->online == FALSE)
    1790           0 :                && host->details->unclean) {
    1791           0 :         crm_trace("Guest node %s UNCLEAN because host is unclean",
    1792             :                   this_node->details->id);
    1793           0 :         this_node->details->online = FALSE;
    1794           0 :         this_node->details->remote_requires_reset = TRUE;
    1795             :     }
    1796             : 
    1797           0 : remote_online_done:
    1798           0 :     crm_trace("Remote node %s online=%s",
    1799             :         this_node->details->id, this_node->details->online ? "TRUE" : "FALSE");
    1800           0 : }
    1801             : 
    1802             : static void
    1803           0 : determine_online_status(const xmlNode *node_state, pcmk_node_t *this_node,
    1804             :                         pcmk_scheduler_t *scheduler)
    1805             : {
    1806           0 :     gboolean online = FALSE;
    1807           0 :     const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED);
    1808             : 
    1809           0 :     CRM_CHECK(this_node != NULL, return);
    1810             : 
    1811           0 :     this_node->details->shutdown = FALSE;
    1812           0 :     this_node->details->expected_up = FALSE;
    1813             : 
    1814           0 :     if (pe__shutdown_requested(this_node)) {
    1815           0 :         this_node->details->shutdown = TRUE;
    1816             : 
    1817           0 :     } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
    1818           0 :         this_node->details->expected_up = TRUE;
    1819             :     }
    1820             : 
    1821           0 :     if (this_node->details->type == node_ping) {
    1822           0 :         this_node->details->unclean = FALSE;
    1823           0 :         online = FALSE;         /* As far as resource management is concerned,
    1824             :                                  * the node is safely offline.
    1825             :                                  * Anyone caught abusing this logic will be shot
    1826             :                                  */
    1827             : 
    1828           0 :     } else if (!pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
    1829           0 :         online = determine_online_status_no_fencing(scheduler, node_state,
    1830             :                                                     this_node);
    1831             : 
    1832             :     } else {
    1833           0 :         online = determine_online_status_fencing(scheduler, node_state,
    1834             :                                                  this_node);
    1835             :     }
    1836             : 
    1837           0 :     if (online) {
    1838           0 :         this_node->details->online = TRUE;
    1839             : 
    1840             :     } else {
    1841             :         /* remove node from contention */
    1842           0 :         this_node->fixed = TRUE; // @COMPAT deprecated and unused
    1843           0 :         this_node->weight = -PCMK_SCORE_INFINITY;
    1844             :     }
    1845             : 
    1846           0 :     if (online && this_node->details->shutdown) {
    1847             :         /* don't run resources here */
    1848           0 :         this_node->fixed = TRUE; // @COMPAT deprecated and unused
    1849           0 :         this_node->weight = -PCMK_SCORE_INFINITY;
    1850             :     }
    1851             : 
    1852           0 :     if (this_node->details->type == node_ping) {
    1853           0 :         crm_info("%s is not a Pacemaker node", pcmk__node_name(this_node));
    1854             : 
    1855           0 :     } else if (this_node->details->unclean) {
    1856           0 :         pcmk__sched_warn("%s is unclean", pcmk__node_name(this_node));
    1857             : 
    1858           0 :     } else if (this_node->details->online) {
    1859           0 :         crm_info("%s is %s", pcmk__node_name(this_node),
    1860             :                  this_node->details->shutdown ? "shutting down" :
    1861             :                  this_node->details->pending ? "pending" :
    1862             :                  this_node->details->standby ? "standby" :
    1863             :                  this_node->details->maintenance ? "maintenance" : "online");
    1864             : 
    1865             :     } else {
    1866           0 :         crm_trace("%s is offline", pcmk__node_name(this_node));
    1867             :     }
    1868             : }
    1869             : 
    1870             : /*!
    1871             :  * \internal
    1872             :  * \brief Find the end of a resource's name, excluding any clone suffix
    1873             :  *
    1874             :  * \param[in] id  Resource ID to check
    1875             :  *
    1876             :  * \return Pointer to last character of resource's base name
    1877             :  */
    1878             : const char *
    1879         112 : pe_base_name_end(const char *id)
    1880             : {
    1881         112 :     if (!pcmk__str_empty(id)) {
    1882         110 :         const char *end = id + strlen(id) - 1;
    1883             : 
    1884         173 :         for (const char *s = end; s > id; --s) {
    1885         173 :             switch (*s) {
    1886          63 :                 case '0':
    1887             :                 case '1':
    1888             :                 case '2':
    1889             :                 case '3':
    1890             :                 case '4':
    1891             :                 case '5':
    1892             :                 case '6':
    1893             :                 case '7':
    1894             :                 case '8':
    1895             :                 case '9':
    1896          63 :                     break;
    1897          60 :                 case ':':
    1898          60 :                     return (s == end)? s : (s - 1);
    1899          50 :                 default:
    1900          50 :                     return end;
    1901             :             }
    1902             :         }
    1903           0 :         return end;
    1904             :     }
    1905           2 :     return NULL;
    1906             : }
    1907             : 
    1908             : /*!
    1909             :  * \internal
    1910             :  * \brief Get a resource name excluding any clone suffix
    1911             :  *
    1912             :  * \param[in] last_rsc_id  Resource ID to check
    1913             :  *
    1914             :  * \return Pointer to newly allocated string with resource's base name
    1915             :  * \note It is the caller's responsibility to free() the result.
    1916             :  *       This asserts on error, so callers can assume result is not NULL.
    1917             :  */
    1918             : char *
    1919           0 : clone_strip(const char *last_rsc_id)
    1920             : {
    1921           0 :     const char *end = pe_base_name_end(last_rsc_id);
    1922           0 :     char *basename = NULL;
    1923             : 
    1924           0 :     CRM_ASSERT(end);
    1925           0 :     basename = strndup(last_rsc_id, end - last_rsc_id + 1);
    1926           0 :     CRM_ASSERT(basename);
    1927           0 :     return basename;
    1928             : }
    1929             : 
    1930             : /*!
    1931             :  * \internal
    1932             :  * \brief Get the name of the first instance of a cloned resource
    1933             :  *
    1934             :  * \param[in] last_rsc_id  Resource ID to check
    1935             :  *
    1936             :  * \return Pointer to newly allocated string with resource's base name plus :0
    1937             :  * \note It is the caller's responsibility to free() the result.
    1938             :  *       This asserts on error, so callers can assume result is not NULL.
    1939             :  */
    1940             : char *
    1941           0 : clone_zero(const char *last_rsc_id)
    1942             : {
    1943           0 :     const char *end = pe_base_name_end(last_rsc_id);
    1944           0 :     size_t base_name_len = end - last_rsc_id + 1;
    1945           0 :     char *zero = NULL;
    1946             : 
    1947           0 :     CRM_ASSERT(end);
    1948           0 :     zero = pcmk__assert_alloc(base_name_len + 3, sizeof(char));
    1949           0 :     memcpy(zero, last_rsc_id, base_name_len);
    1950           0 :     zero[base_name_len] = ':';
    1951           0 :     zero[base_name_len + 1] = '0';
    1952           0 :     return zero;
    1953             : }
    1954             : 
    1955             : static pcmk_resource_t *
    1956           0 : create_fake_resource(const char *rsc_id, const xmlNode *rsc_entry,
    1957             :                      pcmk_scheduler_t *scheduler)
    1958             : {
    1959           0 :     pcmk_resource_t *rsc = NULL;
    1960           0 :     xmlNode *xml_rsc = pcmk__xe_create(NULL, PCMK_XE_PRIMITIVE);
    1961             : 
    1962           0 :     pcmk__xe_copy_attrs(xml_rsc, rsc_entry, pcmk__xaf_none);
    1963           0 :     crm_xml_add(xml_rsc, PCMK_XA_ID, rsc_id);
    1964           0 :     crm_log_xml_debug(xml_rsc, "Orphan resource");
    1965             : 
    1966           0 :     if (pe__unpack_resource(xml_rsc, &rsc, NULL, scheduler) != pcmk_rc_ok) {
    1967           0 :         return NULL;
    1968             :     }
    1969             : 
    1970           0 :     if (xml_contains_remote_node(xml_rsc)) {
    1971             :         pcmk_node_t *node;
    1972             : 
    1973           0 :         crm_debug("Detected orphaned remote node %s", rsc_id);
    1974           0 :         node = pcmk_find_node(scheduler, rsc_id);
    1975           0 :         if (node == NULL) {
    1976           0 :             node = pe_create_node(rsc_id, rsc_id, PCMK_VALUE_REMOTE, NULL,
    1977             :                                   scheduler);
    1978             :         }
    1979           0 :         link_rsc2remotenode(scheduler, rsc);
    1980             : 
    1981           0 :         if (node) {
    1982           0 :             crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id);
    1983           0 :             node->details->shutdown = TRUE;
    1984             :         }
    1985             :     }
    1986             : 
    1987           0 :     if (crm_element_value(rsc_entry, PCMK__META_CONTAINER)) {
    1988             :         /* This orphaned rsc needs to be mapped to a container. */
    1989           0 :         crm_trace("Detected orphaned container filler %s", rsc_id);
    1990           0 :         pcmk__set_rsc_flags(rsc, pcmk_rsc_removed_filler);
    1991             :     }
    1992           0 :     pcmk__set_rsc_flags(rsc, pcmk_rsc_removed);
    1993           0 :     scheduler->resources = g_list_append(scheduler->resources, rsc);
    1994           0 :     return rsc;
    1995             : }
    1996             : 
    1997             : /*!
    1998             :  * \internal
    1999             :  * \brief Create orphan instance for anonymous clone resource history
    2000             :  *
    2001             :  * \param[in,out] parent     Clone resource that orphan will be added to
    2002             :  * \param[in]     rsc_id     Orphan's resource ID
    2003             :  * \param[in]     node       Where orphan is active (for logging only)
    2004             :  * \param[in,out] scheduler  Scheduler data
    2005             :  *
    2006             :  * \return Newly added orphaned instance of \p parent
    2007             :  */
    2008             : static pcmk_resource_t *
    2009           0 : create_anonymous_orphan(pcmk_resource_t *parent, const char *rsc_id,
    2010             :                         const pcmk_node_t *node, pcmk_scheduler_t *scheduler)
    2011             : {
    2012           0 :     pcmk_resource_t *top = pe__create_clone_child(parent, scheduler);
    2013             : 
    2014             :     // find_rsc() because we might be a cloned group
    2015           0 :     pcmk_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL,
    2016             :                                                pcmk_rsc_match_clone_only);
    2017             : 
    2018           0 :     pcmk__rsc_debug(parent, "Created orphan %s for %s: %s on %s",
    2019             :                     top->id, parent->id, rsc_id, pcmk__node_name(node));
    2020           0 :     return orphan;
    2021             : }
    2022             : 
    2023             : /*!
    2024             :  * \internal
    2025             :  * \brief Check a node for an instance of an anonymous clone
    2026             :  *
    2027             :  * Return a child instance of the specified anonymous clone, in order of
    2028             :  * preference: (1) the instance running on the specified node, if any;
    2029             :  * (2) an inactive instance (i.e. within the total of \c PCMK_META_CLONE_MAX
    2030             :  * instances); (3) a newly created orphan (that is, \c PCMK_META_CLONE_MAX
    2031             :  * instances are already active).
    2032             :  *
    2033             :  * \param[in,out] scheduler  Scheduler data
    2034             :  * \param[in]     node       Node on which to check for instance
    2035             :  * \param[in,out] parent     Clone to check
    2036             :  * \param[in]     rsc_id     Name of cloned resource in history (no instance)
    2037             :  */
    2038             : static pcmk_resource_t *
    2039           0 : find_anonymous_clone(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
    2040             :                      pcmk_resource_t *parent, const char *rsc_id)
    2041             : {
    2042           0 :     GList *rIter = NULL;
    2043           0 :     pcmk_resource_t *rsc = NULL;
    2044           0 :     pcmk_resource_t *inactive_instance = NULL;
    2045           0 :     gboolean skip_inactive = FALSE;
    2046             : 
    2047           0 :     CRM_ASSERT(parent != NULL);
    2048           0 :     CRM_ASSERT(pcmk__is_clone(parent));
    2049           0 :     CRM_ASSERT(!pcmk_is_set(parent->flags, pcmk_rsc_unique));
    2050             : 
    2051             :     // Check for active (or partially active, for cloned groups) instance
    2052           0 :     pcmk__rsc_trace(parent, "Looking for %s on %s in %s",
    2053             :                     rsc_id, pcmk__node_name(node), parent->id);
    2054           0 :     for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) {
    2055           0 :         GList *locations = NULL;
    2056           0 :         pcmk_resource_t *child = rIter->data;
    2057             : 
    2058             :         /* Check whether this instance is already known to be active or pending
    2059             :          * anywhere, at this stage of unpacking. Because this function is called
    2060             :          * for a resource before the resource's individual operation history
    2061             :          * entries are unpacked, locations will generally not contain the
    2062             :          * desired node.
    2063             :          *
    2064             :          * However, there are three exceptions:
    2065             :          * (1) when child is a cloned group and we have already unpacked the
    2066             :          *     history of another member of the group on the same node;
    2067             :          * (2) when we've already unpacked the history of another numbered
    2068             :          *     instance on the same node (which can happen if
    2069             :          *     PCMK_META_GLOBALLY_UNIQUE was flipped from true to false); and
    2070             :          * (3) when we re-run calculations on the same scheduler data as part of
    2071             :          *     a simulation.
    2072             :          */
    2073           0 :         child->fns->location(child, &locations, 2);
    2074           0 :         if (locations) {
    2075             :             /* We should never associate the same numbered anonymous clone
    2076             :              * instance with multiple nodes, and clone instances can't migrate,
    2077             :              * so there must be only one location, regardless of history.
    2078             :              */
    2079           0 :             CRM_LOG_ASSERT(locations->next == NULL);
    2080             : 
    2081           0 :             if (pcmk__same_node((pcmk_node_t *) locations->data, node)) {
    2082             :                 /* This child instance is active on the requested node, so check
    2083             :                  * for a corresponding configured resource. We use find_rsc()
    2084             :                  * instead of child because child may be a cloned group, and we
    2085             :                  * need the particular member corresponding to rsc_id.
    2086             :                  *
    2087             :                  * If the history entry is orphaned, rsc will be NULL.
    2088             :                  */
    2089           0 :                 rsc = parent->fns->find_rsc(child, rsc_id, NULL,
    2090             :                                             pcmk_rsc_match_clone_only);
    2091           0 :                 if (rsc) {
    2092             :                     /* If there are multiple instance history entries for an
    2093             :                      * anonymous clone in a single node's history (which can
    2094             :                      * happen if PCMK_META_GLOBALLY_UNIQUE is switched from true
    2095             :                      * to false), we want to consider the instances beyond the
    2096             :                      * first as orphans, even if there are inactive instance
    2097             :                      * numbers available.
    2098             :                      */
    2099           0 :                     if (rsc->running_on) {
    2100           0 :                         crm_notice("Active (now-)anonymous clone %s has "
    2101             :                                    "multiple (orphan) instance histories on %s",
    2102             :                                    parent->id, pcmk__node_name(node));
    2103           0 :                         skip_inactive = TRUE;
    2104           0 :                         rsc = NULL;
    2105             :                     } else {
    2106           0 :                         pcmk__rsc_trace(parent, "Resource %s, active", rsc->id);
    2107             :                     }
    2108             :                 }
    2109             :             }
    2110           0 :             g_list_free(locations);
    2111             : 
    2112             :         } else {
    2113           0 :             pcmk__rsc_trace(parent, "Resource %s, skip inactive", child->id);
    2114           0 :             if (!skip_inactive && !inactive_instance
    2115           0 :                 && !pcmk_is_set(child->flags, pcmk_rsc_blocked)) {
    2116             :                 // Remember one inactive instance in case we don't find active
    2117           0 :                 inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL,
    2118             :                                                           pcmk_rsc_match_clone_only);
    2119             : 
    2120             :                 /* ... but don't use it if it was already associated with a
    2121             :                  * pending action on another node
    2122             :                  */
    2123           0 :                 if ((inactive_instance != NULL) &&
    2124           0 :                     (inactive_instance->pending_node != NULL) &&
    2125           0 :                     !pcmk__same_node(inactive_instance->pending_node, node)) {
    2126           0 :                     inactive_instance = NULL;
    2127             :                 }
    2128             :             }
    2129             :         }
    2130             :     }
    2131             : 
    2132           0 :     if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
    2133           0 :         pcmk__rsc_trace(parent, "Resource %s, empty slot",
    2134             :                         inactive_instance->id);
    2135           0 :         rsc = inactive_instance;
    2136             :     }
    2137             : 
    2138             :     /* If the resource has PCMK_META_REQUIRES set to PCMK_VALUE_QUORUM or
    2139             :      * PCMK_VALUE_NOTHING, and we don't have a clone instance for every node, we
    2140             :      * don't want to consume a valid instance number for unclean nodes. Such
    2141             :      * instances may appear to be active according to the history, but should be
    2142             :      * considered inactive, so we can start an instance elsewhere. Treat such
    2143             :      * instances as orphans.
    2144             :      *
    2145             :      * An exception is instances running on guest nodes -- since guest node
    2146             :      * "fencing" is actually just a resource stop, requires shouldn't apply.
    2147             :      *
    2148             :      * @TODO Ideally, we'd use an inactive instance number if it is not needed
    2149             :      * for any clean instances. However, we don't know that at this point.
    2150             :      */
    2151           0 :     if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_needs_fencing)
    2152           0 :         && (!node->details->online || node->details->unclean)
    2153           0 :         && !pcmk__is_guest_or_bundle_node(node)
    2154           0 :         && !pe__is_universal_clone(parent, scheduler)) {
    2155             : 
    2156           0 :         rsc = NULL;
    2157             :     }
    2158             : 
    2159           0 :     if (rsc == NULL) {
    2160           0 :         rsc = create_anonymous_orphan(parent, rsc_id, node, scheduler);
    2161           0 :         pcmk__rsc_trace(parent, "Resource %s, orphan", rsc->id);
    2162             :     }
    2163           0 :     return rsc;
    2164             : }
    2165             : 
    2166             : static pcmk_resource_t *
    2167           0 : unpack_find_resource(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
    2168             :                      const char *rsc_id)
    2169             : {
    2170           0 :     pcmk_resource_t *rsc = NULL;
    2171           0 :     pcmk_resource_t *parent = NULL;
    2172             : 
    2173           0 :     crm_trace("looking for %s", rsc_id);
    2174           0 :     rsc = pe_find_resource(scheduler->resources, rsc_id);
    2175             : 
    2176           0 :     if (rsc == NULL) {
    2177             :         /* If we didn't find the resource by its name in the operation history,
    2178             :          * check it again as a clone instance. Even when PCMK_META_CLONE_MAX=0,
    2179             :          * we create a single :0 orphan to match against here.
    2180             :          */
    2181           0 :         char *clone0_id = clone_zero(rsc_id);
    2182           0 :         pcmk_resource_t *clone0 = pe_find_resource(scheduler->resources,
    2183             :                                                    clone0_id);
    2184             : 
    2185           0 :         if (clone0 && !pcmk_is_set(clone0->flags, pcmk_rsc_unique)) {
    2186           0 :             rsc = clone0;
    2187           0 :             parent = uber_parent(clone0);
    2188           0 :             crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id);
    2189             :         } else {
    2190           0 :             crm_trace("%s is not known as %s either (orphan)",
    2191             :                       rsc_id, clone0_id);
    2192             :         }
    2193           0 :         free(clone0_id);
    2194             : 
    2195           0 :     } else if (rsc->variant > pcmk_rsc_variant_primitive) {
    2196           0 :         crm_trace("Resource history for %s is orphaned because it is no longer primitive",
    2197             :                   rsc_id);
    2198           0 :         return NULL;
    2199             : 
    2200             :     } else {
    2201           0 :         parent = uber_parent(rsc);
    2202             :     }
    2203             : 
    2204           0 :     if (pcmk__is_anonymous_clone(parent)) {
    2205             : 
    2206           0 :         if (pcmk__is_bundled(parent)) {
    2207           0 :             rsc = pe__find_bundle_replica(parent->parent, node);
    2208             :         } else {
    2209           0 :             char *base = clone_strip(rsc_id);
    2210             : 
    2211           0 :             rsc = find_anonymous_clone(scheduler, node, parent, base);
    2212           0 :             free(base);
    2213           0 :             CRM_ASSERT(rsc != NULL);
    2214             :         }
    2215             :     }
    2216             : 
    2217           0 :     if (rsc && !pcmk__str_eq(rsc_id, rsc->id, pcmk__str_casei)
    2218           0 :         && !pcmk__str_eq(rsc_id, rsc->clone_name, pcmk__str_casei)) {
    2219             : 
    2220           0 :         pcmk__str_update(&rsc->clone_name, rsc_id);
    2221           0 :         pcmk__rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
    2222             :                         rsc_id, pcmk__node_name(node), rsc->id,
    2223             :                         pcmk_is_set(rsc->flags, pcmk_rsc_removed)? " (ORPHAN)" : "");
    2224             :     }
    2225           0 :     return rsc;
    2226             : }
    2227             : 
    2228             : static pcmk_resource_t *
    2229           0 : process_orphan_resource(const xmlNode *rsc_entry, const pcmk_node_t *node,
    2230             :                         pcmk_scheduler_t *scheduler)
    2231             : {
    2232           0 :     pcmk_resource_t *rsc = NULL;
    2233           0 :     const char *rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID);
    2234             : 
    2235           0 :     crm_debug("Detected orphan resource %s on %s",
    2236             :               rsc_id, pcmk__node_name(node));
    2237           0 :     rsc = create_fake_resource(rsc_id, rsc_entry, scheduler);
    2238           0 :     if (rsc == NULL) {
    2239           0 :         return NULL;
    2240             :     }
    2241             : 
    2242           0 :     if (!pcmk_is_set(scheduler->flags, pcmk_sched_stop_removed_resources)) {
    2243           0 :         pcmk__clear_rsc_flags(rsc, pcmk_rsc_managed);
    2244             : 
    2245             :     } else {
    2246           0 :         CRM_CHECK(rsc != NULL, return NULL);
    2247           0 :         pcmk__rsc_trace(rsc, "Added orphan %s", rsc->id);
    2248           0 :         resource_location(rsc, NULL, -PCMK_SCORE_INFINITY,
    2249             :                           "__orphan_do_not_run__", scheduler);
    2250             :     }
    2251           0 :     return rsc;
    2252             : }
    2253             : 
    2254             : static void
    2255           0 : process_rsc_state(pcmk_resource_t *rsc, pcmk_node_t *node,
    2256             :                   enum action_fail_response on_fail)
    2257             : {
    2258           0 :     pcmk_node_t *tmpnode = NULL;
    2259           0 :     char *reason = NULL;
    2260           0 :     enum action_fail_response save_on_fail = pcmk_on_fail_ignore;
    2261             : 
    2262           0 :     CRM_ASSERT(rsc);
    2263           0 :     pcmk__rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
    2264             :                     rsc->id, pcmk_role_text(rsc->role), pcmk__node_name(node),
    2265             :                     pcmk_on_fail_text(on_fail));
    2266             : 
    2267             :     /* process current state */
    2268           0 :     if (rsc->role != pcmk_role_unknown) {
    2269           0 :         pcmk_resource_t *iter = rsc;
    2270             : 
    2271           0 :         while (iter) {
    2272           0 :             if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) {
    2273           0 :                 pcmk_node_t *n = pe__copy_node(node);
    2274             : 
    2275           0 :                 pcmk__rsc_trace(rsc, "%s%s%s known on %s",
    2276             :                                 rsc->id,
    2277             :                                 ((rsc->clone_name == NULL)? "" : " also known as "),
    2278             :                                 ((rsc->clone_name == NULL)? "" : rsc->clone_name),
    2279             :                                 pcmk__node_name(n));
    2280           0 :                 g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n);
    2281             :             }
    2282           0 :             if (pcmk_is_set(iter->flags, pcmk_rsc_unique)) {
    2283           0 :                 break;
    2284             :             }
    2285           0 :             iter = iter->parent;
    2286             :         }
    2287             :     }
    2288             : 
    2289             :     /* If a managed resource is believed to be running, but node is down ... */
    2290           0 :     if ((rsc->role > pcmk_role_stopped)
    2291           0 :         && node->details->online == FALSE
    2292           0 :         && node->details->maintenance == FALSE
    2293           0 :         && pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
    2294             : 
    2295           0 :         gboolean should_fence = FALSE;
    2296             : 
    2297             :         /* If this is a guest node, fence it (regardless of whether fencing is
    2298             :          * enabled, because guest node fencing is done by recovery of the
    2299             :          * container resource rather than by the fencer). Mark the resource
    2300             :          * we're processing as failed. When the guest comes back up, its
    2301             :          * operation history in the CIB will be cleared, freeing the affected
    2302             :          * resource to run again once we are sure we know its state.
    2303             :          */
    2304           0 :         if (pcmk__is_guest_or_bundle_node(node)) {
    2305           0 :             pcmk__set_rsc_flags(rsc, pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
    2306           0 :             should_fence = TRUE;
    2307             : 
    2308           0 :         } else if (pcmk_is_set(rsc->cluster->flags,
    2309             :                                pcmk_sched_fencing_enabled)) {
    2310           0 :             if (pcmk__is_remote_node(node)
    2311           0 :                 && (node->details->remote_rsc != NULL)
    2312           0 :                 && !pcmk_is_set(node->details->remote_rsc->flags,
    2313             :                                 pcmk_rsc_failed)) {
    2314             : 
    2315             :                 /* Setting unseen means that fencing of the remote node will
    2316             :                  * occur only if the connection resource is not going to start
    2317             :                  * somewhere. This allows connection resources on a failed
    2318             :                  * cluster node to move to another node without requiring the
    2319             :                  * remote nodes to be fenced as well.
    2320             :                  */
    2321           0 :                 node->details->unseen = TRUE;
    2322           0 :                 reason = crm_strdup_printf("%s is active there (fencing will be"
    2323             :                                            " revoked if remote connection can "
    2324             :                                            "be re-established elsewhere)",
    2325             :                                            rsc->id);
    2326             :             }
    2327           0 :             should_fence = TRUE;
    2328             :         }
    2329             : 
    2330           0 :         if (should_fence) {
    2331           0 :             if (reason == NULL) {
    2332           0 :                reason = crm_strdup_printf("%s is thought to be active there", rsc->id);
    2333             :             }
    2334           0 :             pe_fence_node(rsc->cluster, node, reason, FALSE);
    2335             :         }
    2336           0 :         free(reason);
    2337             :     }
    2338             : 
    2339             :     /* In order to calculate priority_fencing_delay correctly, save the failure information and pass it to native_add_running(). */
    2340           0 :     save_on_fail = on_fail;
    2341             : 
    2342           0 :     if (node->details->unclean) {
    2343             :         /* No extra processing needed
    2344             :          * Also allows resources to be started again after a node is shot
    2345             :          */
    2346           0 :         on_fail = pcmk_on_fail_ignore;
    2347             :     }
    2348             : 
    2349           0 :     switch (on_fail) {
    2350           0 :         case pcmk_on_fail_ignore:
    2351             :             /* nothing to do */
    2352           0 :             break;
    2353             : 
    2354           0 :         case pcmk_on_fail_demote:
    2355           0 :             pcmk__set_rsc_flags(rsc, pcmk_rsc_failed);
    2356           0 :             demote_action(rsc, node, FALSE);
    2357           0 :             break;
    2358             : 
    2359           0 :         case pcmk_on_fail_fence_node:
    2360             :             /* treat it as if it is still running
    2361             :              * but also mark the node as unclean
    2362             :              */
    2363           0 :             reason = crm_strdup_printf("%s failed there", rsc->id);
    2364           0 :             pe_fence_node(rsc->cluster, node, reason, FALSE);
    2365           0 :             free(reason);
    2366           0 :             break;
    2367             : 
    2368           0 :         case pcmk_on_fail_standby_node:
    2369           0 :             node->details->standby = TRUE;
    2370           0 :             node->details->standby_onfail = TRUE;
    2371           0 :             break;
    2372             : 
    2373           0 :         case pcmk_on_fail_block:
    2374             :             /* is_managed == FALSE will prevent any
    2375             :              * actions being sent for the resource
    2376             :              */
    2377           0 :             pcmk__clear_rsc_flags(rsc, pcmk_rsc_managed);
    2378           0 :             pcmk__set_rsc_flags(rsc, pcmk_rsc_blocked);
    2379           0 :             break;
    2380             : 
    2381           0 :         case pcmk_on_fail_ban:
    2382             :             /* make sure it comes up somewhere else
    2383             :              * or not at all
    2384             :              */
    2385           0 :             resource_location(rsc, node, -PCMK_SCORE_INFINITY,
    2386             :                               "__action_migration_auto__", rsc->cluster);
    2387           0 :             break;
    2388             : 
    2389           0 :         case pcmk_on_fail_stop:
    2390           0 :             pe__set_next_role(rsc, pcmk_role_stopped,
    2391             :                               PCMK_META_ON_FAIL "=" PCMK_VALUE_STOP);
    2392           0 :             break;
    2393             : 
    2394           0 :         case pcmk_on_fail_restart:
    2395           0 :             if ((rsc->role != pcmk_role_stopped)
    2396           0 :                 && (rsc->role != pcmk_role_unknown)) {
    2397           0 :                 pcmk__set_rsc_flags(rsc,
    2398             :                                     pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
    2399           0 :                 stop_action(rsc, node, FALSE);
    2400             :             }
    2401           0 :             break;
    2402             : 
    2403           0 :         case pcmk_on_fail_restart_container:
    2404           0 :             pcmk__set_rsc_flags(rsc, pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
    2405           0 :             if ((rsc->container != NULL) && pcmk__is_bundled(rsc)) {
    2406             :                 /* A bundle's remote connection can run on a different node than
    2407             :                  * the bundle's container. We don't necessarily know where the
    2408             :                  * container is running yet, so remember it and add a stop
    2409             :                  * action for it later.
    2410             :                  */
    2411           0 :                 rsc->cluster->stop_needed =
    2412           0 :                     g_list_prepend(rsc->cluster->stop_needed, rsc->container);
    2413           0 :             } else if (rsc->container) {
    2414           0 :                 stop_action(rsc->container, node, FALSE);
    2415           0 :             } else if ((rsc->role != pcmk_role_stopped)
    2416           0 :                        && (rsc->role != pcmk_role_unknown)) {
    2417           0 :                 stop_action(rsc, node, FALSE);
    2418             :             }
    2419           0 :             break;
    2420             : 
    2421           0 :         case pcmk_on_fail_reset_remote:
    2422           0 :             pcmk__set_rsc_flags(rsc, pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
    2423           0 :             if (pcmk_is_set(rsc->cluster->flags, pcmk_sched_fencing_enabled)) {
    2424           0 :                 tmpnode = NULL;
    2425           0 :                 if (rsc->is_remote_node) {
    2426           0 :                     tmpnode = pcmk_find_node(rsc->cluster, rsc->id);
    2427             :                 }
    2428           0 :                 if (pcmk__is_remote_node(tmpnode)
    2429           0 :                     && !(tmpnode->details->remote_was_fenced)) {
    2430             :                     /* The remote connection resource failed in a way that
    2431             :                      * should result in fencing the remote node.
    2432             :                      */
    2433           0 :                     pe_fence_node(rsc->cluster, tmpnode,
    2434             :                                   "remote connection is unrecoverable", FALSE);
    2435             :                 }
    2436             :             }
    2437             : 
    2438             :             /* require the stop action regardless if fencing is occurring or not. */
    2439           0 :             if (rsc->role > pcmk_role_stopped) {
    2440           0 :                 stop_action(rsc, node, FALSE);
    2441             :             }
    2442             : 
    2443             :             /* if reconnect delay is in use, prevent the connection from exiting the
    2444             :              * "STOPPED" role until the failure is cleared by the delay timeout. */
    2445           0 :             if (rsc->remote_reconnect_ms) {
    2446           0 :                 pe__set_next_role(rsc, pcmk_role_stopped, "remote reset");
    2447             :             }
    2448           0 :             break;
    2449             :     }
    2450             : 
    2451             :     /* ensure a remote-node connection failure forces an unclean remote-node
    2452             :      * to be fenced. By setting unseen = FALSE, the remote-node failure will
    2453             :      * result in a fencing operation regardless if we're going to attempt to 
    2454             :      * reconnect to the remote-node in this transition or not. */
    2455           0 :     if (pcmk_is_set(rsc->flags, pcmk_rsc_failed) && rsc->is_remote_node) {
    2456           0 :         tmpnode = pcmk_find_node(rsc->cluster, rsc->id);
    2457           0 :         if (tmpnode && tmpnode->details->unclean) {
    2458           0 :             tmpnode->details->unseen = FALSE;
    2459             :         }
    2460             :     }
    2461             : 
    2462           0 :     if ((rsc->role != pcmk_role_stopped)
    2463           0 :         && (rsc->role != pcmk_role_unknown)) {
    2464           0 :         if (pcmk_is_set(rsc->flags, pcmk_rsc_removed)) {
    2465           0 :             if (pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
    2466           0 :                 crm_notice("Removed resource %s is active on %s and will be "
    2467             :                            "stopped when possible",
    2468             :                            rsc->id, pcmk__node_name(node));
    2469             :             } else {
    2470           0 :                 crm_notice("Removed resource %s must be stopped manually on %s "
    2471             :                            "because " PCMK_OPT_STOP_ORPHAN_RESOURCES
    2472             :                            " is set to false", rsc->id, pcmk__node_name(node));
    2473             :             }
    2474             :         }
    2475             : 
    2476           0 :         native_add_running(rsc, node, rsc->cluster,
    2477             :                            (save_on_fail != pcmk_on_fail_ignore));
    2478           0 :         switch (on_fail) {
    2479           0 :             case pcmk_on_fail_ignore:
    2480           0 :                 break;
    2481           0 :             case pcmk_on_fail_demote:
    2482             :             case pcmk_on_fail_block:
    2483           0 :                 pcmk__set_rsc_flags(rsc, pcmk_rsc_failed);
    2484           0 :                 break;
    2485           0 :             default:
    2486           0 :                 pcmk__set_rsc_flags(rsc,
    2487             :                                     pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
    2488           0 :                 break;
    2489             :         }
    2490             : 
    2491           0 :     } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) {
    2492             :         /* Only do this for older status sections that included instance numbers
    2493             :          * Otherwise stopped instances will appear as orphans
    2494             :          */
    2495           0 :         pcmk__rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)",
    2496             :                         rsc->clone_name, rsc->id);
    2497           0 :         free(rsc->clone_name);
    2498           0 :         rsc->clone_name = NULL;
    2499             : 
    2500             :     } else {
    2501           0 :         GList *possible_matches = pe__resource_actions(rsc, node,
    2502             :                                                        PCMK_ACTION_STOP, FALSE);
    2503           0 :         GList *gIter = possible_matches;
    2504             : 
    2505           0 :         for (; gIter != NULL; gIter = gIter->next) {
    2506           0 :             pcmk_action_t *stop = (pcmk_action_t *) gIter->data;
    2507             : 
    2508           0 :             pcmk__set_action_flags(stop, pcmk_action_optional);
    2509             :         }
    2510             : 
    2511           0 :         g_list_free(possible_matches);
    2512             :     }
    2513             : 
    2514             :     /* A successful stop after migrate_to on the migration source doesn't make
    2515             :      * the partially migrated resource stopped on the migration target.
    2516             :      */
    2517           0 :     if ((rsc->role == pcmk_role_stopped)
    2518           0 :         && rsc->partial_migration_source
    2519           0 :         && rsc->partial_migration_source->details == node->details
    2520           0 :         && rsc->partial_migration_target
    2521           0 :         && rsc->running_on) {
    2522             : 
    2523           0 :         rsc->role = pcmk_role_started;
    2524             :     }
    2525           0 : }
    2526             : 
    2527             : /* create active recurring operations as optional */
    2528             : static void
    2529           0 : process_recurring(pcmk_node_t *node, pcmk_resource_t *rsc,
    2530             :                   int start_index, int stop_index,
    2531             :                   GList *sorted_op_list, pcmk_scheduler_t *scheduler)
    2532             : {
    2533           0 :     int counter = -1;
    2534           0 :     const char *task = NULL;
    2535           0 :     const char *status = NULL;
    2536           0 :     GList *gIter = sorted_op_list;
    2537             : 
    2538           0 :     CRM_ASSERT(rsc);
    2539           0 :     pcmk__rsc_trace(rsc, "%s: Start index %d, stop index = %d",
    2540             :                     rsc->id, start_index, stop_index);
    2541             : 
    2542           0 :     for (; gIter != NULL; gIter = gIter->next) {
    2543           0 :         xmlNode *rsc_op = (xmlNode *) gIter->data;
    2544             : 
    2545           0 :         guint interval_ms = 0;
    2546           0 :         char *key = NULL;
    2547           0 :         const char *id = pcmk__xe_id(rsc_op);
    2548             : 
    2549           0 :         counter++;
    2550             : 
    2551           0 :         if (node->details->online == FALSE) {
    2552           0 :             pcmk__rsc_trace(rsc, "Skipping %s on %s: node is offline",
    2553             :                             rsc->id, pcmk__node_name(node));
    2554           0 :             break;
    2555             : 
    2556             :             /* Need to check if there's a monitor for role="Stopped" */
    2557           0 :         } else if (start_index < stop_index && counter <= stop_index) {
    2558           0 :             pcmk__rsc_trace(rsc, "Skipping %s on %s: resource is not active",
    2559             :                             id, pcmk__node_name(node));
    2560           0 :             continue;
    2561             : 
    2562           0 :         } else if (counter < start_index) {
    2563           0 :             pcmk__rsc_trace(rsc, "Skipping %s on %s: old %d",
    2564             :                             id, pcmk__node_name(node), counter);
    2565           0 :             continue;
    2566             :         }
    2567             : 
    2568           0 :         crm_element_value_ms(rsc_op, PCMK_META_INTERVAL, &interval_ms);
    2569           0 :         if (interval_ms == 0) {
    2570           0 :             pcmk__rsc_trace(rsc, "Skipping %s on %s: non-recurring",
    2571             :                             id, pcmk__node_name(node));
    2572           0 :             continue;
    2573             :         }
    2574             : 
    2575           0 :         status = crm_element_value(rsc_op, PCMK__XA_OP_STATUS);
    2576           0 :         if (pcmk__str_eq(status, "-1", pcmk__str_casei)) {
    2577           0 :             pcmk__rsc_trace(rsc, "Skipping %s on %s: status",
    2578             :                             id, pcmk__node_name(node));
    2579           0 :             continue;
    2580             :         }
    2581           0 :         task = crm_element_value(rsc_op, PCMK_XA_OPERATION);
    2582             :         /* create the action */
    2583           0 :         key = pcmk__op_key(rsc->id, task, interval_ms);
    2584           0 :         pcmk__rsc_trace(rsc, "Creating %s on %s", key, pcmk__node_name(node));
    2585           0 :         custom_action(rsc, key, task, node, TRUE, scheduler);
    2586             :     }
    2587           0 : }
    2588             : 
    2589             : void
    2590           0 : calculate_active_ops(const GList *sorted_op_list, int *start_index,
    2591             :                      int *stop_index)
    2592             : {
    2593           0 :     int counter = -1;
    2594           0 :     int implied_monitor_start = -1;
    2595           0 :     int implied_clone_start = -1;
    2596           0 :     const char *task = NULL;
    2597           0 :     const char *status = NULL;
    2598             : 
    2599           0 :     *stop_index = -1;
    2600           0 :     *start_index = -1;
    2601             : 
    2602           0 :     for (const GList *iter = sorted_op_list; iter != NULL; iter = iter->next) {
    2603           0 :         const xmlNode *rsc_op = (const xmlNode *) iter->data;
    2604             : 
    2605           0 :         counter++;
    2606             : 
    2607           0 :         task = crm_element_value(rsc_op, PCMK_XA_OPERATION);
    2608           0 :         status = crm_element_value(rsc_op, PCMK__XA_OP_STATUS);
    2609             : 
    2610           0 :         if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_casei)
    2611           0 :             && pcmk__str_eq(status, "0", pcmk__str_casei)) {
    2612           0 :             *stop_index = counter;
    2613             : 
    2614           0 :         } else if (pcmk__strcase_any_of(task, PCMK_ACTION_START,
    2615             :                                         PCMK_ACTION_MIGRATE_FROM, NULL)) {
    2616           0 :             *start_index = counter;
    2617             : 
    2618           0 :         } else if ((implied_monitor_start <= *stop_index)
    2619           0 :                    && pcmk__str_eq(task, PCMK_ACTION_MONITOR,
    2620           0 :                                    pcmk__str_casei)) {
    2621           0 :             const char *rc = crm_element_value(rsc_op, PCMK__XA_RC_CODE);
    2622             : 
    2623           0 :             if (pcmk__strcase_any_of(rc, "0", "8", NULL)) {
    2624           0 :                 implied_monitor_start = counter;
    2625             :             }
    2626           0 :         } else if (pcmk__strcase_any_of(task, PCMK_ACTION_PROMOTE,
    2627             :                                         PCMK_ACTION_DEMOTE, NULL)) {
    2628           0 :             implied_clone_start = counter;
    2629             :         }
    2630             :     }
    2631             : 
    2632           0 :     if (*start_index == -1) {
    2633           0 :         if (implied_clone_start != -1) {
    2634           0 :             *start_index = implied_clone_start;
    2635           0 :         } else if (implied_monitor_start != -1) {
    2636           0 :             *start_index = implied_monitor_start;
    2637             :         }
    2638             :     }
    2639           0 : }
    2640             : 
    2641             : // If resource history entry has shutdown lock, remember lock node and time
    2642             : static void
    2643           0 : unpack_shutdown_lock(const xmlNode *rsc_entry, pcmk_resource_t *rsc,
    2644             :                      const pcmk_node_t *node, pcmk_scheduler_t *scheduler)
    2645             : {
    2646           0 :     time_t lock_time = 0;   // When lock started (i.e. node shutdown time)
    2647             : 
    2648           0 :     if ((crm_element_value_epoch(rsc_entry, PCMK_OPT_SHUTDOWN_LOCK,
    2649           0 :                                  &lock_time) == pcmk_ok) && (lock_time != 0)) {
    2650             : 
    2651           0 :         if ((scheduler->shutdown_lock > 0)
    2652           0 :             && (get_effective_time(scheduler)
    2653           0 :                 > (lock_time + scheduler->shutdown_lock))) {
    2654           0 :             pcmk__rsc_info(rsc, "Shutdown lock for %s on %s expired",
    2655             :                            rsc->id, pcmk__node_name(node));
    2656           0 :             pe__clear_resource_history(rsc, node);
    2657             :         } else {
    2658             :             /* @COMPAT I don't like breaking const signatures, but
    2659             :              * rsc->lock_node should really be const -- we just can't change it
    2660             :              * until the next API compatibility break.
    2661             :              */
    2662           0 :             rsc->lock_node = (pcmk_node_t *) node;
    2663           0 :             rsc->lock_time = lock_time;
    2664             :         }
    2665             :     }
    2666           0 : }
    2667             : 
    2668             : /*!
    2669             :  * \internal
    2670             :  * \brief Unpack one \c PCMK__XE_LRM_RESOURCE entry from a node's CIB status
    2671             :  *
    2672             :  * \param[in,out] node       Node whose status is being unpacked
    2673             :  * \param[in]     rsc_entry  \c PCMK__XE_LRM_RESOURCE XML being unpacked
    2674             :  * \param[in,out] scheduler  Scheduler data
    2675             :  *
    2676             :  * \return Resource corresponding to the entry, or NULL if no operation history
    2677             :  */
    2678             : static pcmk_resource_t *
    2679           0 : unpack_lrm_resource(pcmk_node_t *node, const xmlNode *lrm_resource,
    2680             :                     pcmk_scheduler_t *scheduler)
    2681             : {
    2682           0 :     GList *gIter = NULL;
    2683           0 :     int stop_index = -1;
    2684           0 :     int start_index = -1;
    2685           0 :     enum rsc_role_e req_role = pcmk_role_unknown;
    2686             : 
    2687           0 :     const char *rsc_id = pcmk__xe_id(lrm_resource);
    2688             : 
    2689           0 :     pcmk_resource_t *rsc = NULL;
    2690           0 :     GList *op_list = NULL;
    2691           0 :     GList *sorted_op_list = NULL;
    2692             : 
    2693           0 :     xmlNode *rsc_op = NULL;
    2694           0 :     xmlNode *last_failure = NULL;
    2695             : 
    2696           0 :     enum action_fail_response on_fail = pcmk_on_fail_ignore;
    2697           0 :     enum rsc_role_e saved_role = pcmk_role_unknown;
    2698             : 
    2699           0 :     if (rsc_id == NULL) {
    2700           0 :         pcmk__config_err("Ignoring invalid " PCMK__XE_LRM_RESOURCE
    2701             :                          " entry: No " PCMK_XA_ID);
    2702           0 :         crm_log_xml_info(lrm_resource, "missing-id");
    2703           0 :         return NULL;
    2704             :     }
    2705           0 :     crm_trace("Unpacking " PCMK__XE_LRM_RESOURCE " for %s on %s",
    2706             :               rsc_id, pcmk__node_name(node));
    2707             : 
    2708             :     /* Build a list of individual PCMK__XE_LRM_RSC_OP entries, so we can sort
    2709             :      * them
    2710             :      */
    2711           0 :     for (rsc_op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP, NULL,
    2712             :                                        NULL);
    2713           0 :          rsc_op != NULL; rsc_op = pcmk__xe_next_same(rsc_op)) {
    2714             : 
    2715           0 :         op_list = g_list_prepend(op_list, rsc_op);
    2716             :     }
    2717             : 
    2718           0 :     if (!pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)) {
    2719           0 :         if (op_list == NULL) {
    2720             :             // If there are no operations, there is nothing to do
    2721           0 :             return NULL;
    2722             :         }
    2723             :     }
    2724             : 
    2725             :     /* find the resource */
    2726           0 :     rsc = unpack_find_resource(scheduler, node, rsc_id);
    2727           0 :     if (rsc == NULL) {
    2728           0 :         if (op_list == NULL) {
    2729             :             // If there are no operations, there is nothing to do
    2730           0 :             return NULL;
    2731             :         } else {
    2732           0 :             rsc = process_orphan_resource(lrm_resource, node, scheduler);
    2733             :         }
    2734             :     }
    2735           0 :     CRM_ASSERT(rsc != NULL);
    2736             : 
    2737             :     // Check whether the resource is "shutdown-locked" to this node
    2738           0 :     if (pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)) {
    2739           0 :         unpack_shutdown_lock(lrm_resource, rsc, node, scheduler);
    2740             :     }
    2741             : 
    2742             :     /* process operations */
    2743           0 :     saved_role = rsc->role;
    2744           0 :     rsc->role = pcmk_role_unknown;
    2745           0 :     sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
    2746             : 
    2747           0 :     for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
    2748           0 :         xmlNode *rsc_op = (xmlNode *) gIter->data;
    2749             : 
    2750           0 :         unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail);
    2751             :     }
    2752             : 
    2753             :     /* create active recurring operations as optional */
    2754           0 :     calculate_active_ops(sorted_op_list, &start_index, &stop_index);
    2755           0 :     process_recurring(node, rsc, start_index, stop_index, sorted_op_list,
    2756             :                       scheduler);
    2757             : 
    2758             :     /* no need to free the contents */
    2759           0 :     g_list_free(sorted_op_list);
    2760             : 
    2761           0 :     process_rsc_state(rsc, node, on_fail);
    2762             : 
    2763           0 :     if (get_target_role(rsc, &req_role)) {
    2764           0 :         if ((rsc->next_role == pcmk_role_unknown)
    2765           0 :             || (req_role < rsc->next_role)) {
    2766             : 
    2767           0 :             pe__set_next_role(rsc, req_role, PCMK_META_TARGET_ROLE);
    2768             : 
    2769           0 :         } else if (req_role > rsc->next_role) {
    2770           0 :             pcmk__rsc_info(rsc,
    2771             :                            "%s: Not overwriting calculated next role %s"
    2772             :                            " with requested next role %s",
    2773             :                            rsc->id, pcmk_role_text(rsc->next_role),
    2774             :                            pcmk_role_text(req_role));
    2775             :         }
    2776             :     }
    2777             : 
    2778           0 :     if (saved_role > rsc->role) {
    2779           0 :         rsc->role = saved_role;
    2780             :     }
    2781             : 
    2782           0 :     return rsc;
    2783             : }
    2784             : 
    2785             : static void
    2786           0 : handle_orphaned_container_fillers(const xmlNode *lrm_rsc_list,
    2787             :                                   pcmk_scheduler_t *scheduler)
    2788             : {
    2789           0 :     for (const xmlNode *rsc_entry = pcmk__xe_first_child(lrm_rsc_list, NULL,
    2790             :                                                          NULL, NULL);
    2791           0 :          rsc_entry != NULL; rsc_entry = pcmk__xe_next(rsc_entry)) {
    2792             : 
    2793             :         pcmk_resource_t *rsc;
    2794             :         pcmk_resource_t *container;
    2795             :         const char *rsc_id;
    2796             :         const char *container_id;
    2797             : 
    2798           0 :         if (!pcmk__xe_is(rsc_entry, PCMK__XE_LRM_RESOURCE)) {
    2799           0 :             continue;
    2800             :         }
    2801             : 
    2802           0 :         container_id = crm_element_value(rsc_entry, PCMK__META_CONTAINER);
    2803           0 :         rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID);
    2804           0 :         if (container_id == NULL || rsc_id == NULL) {
    2805           0 :             continue;
    2806             :         }
    2807             : 
    2808           0 :         container = pe_find_resource(scheduler->resources, container_id);
    2809           0 :         if (container == NULL) {
    2810           0 :             continue;
    2811             :         }
    2812             : 
    2813           0 :         rsc = pe_find_resource(scheduler->resources, rsc_id);
    2814           0 :         if ((rsc == NULL) || (rsc->container != NULL)
    2815           0 :             || !pcmk_is_set(rsc->flags, pcmk_rsc_removed_filler)) {
    2816           0 :             continue;
    2817             :         }
    2818             : 
    2819           0 :         pcmk__rsc_trace(rsc, "Mapped container of orphaned resource %s to %s",
    2820             :                         rsc->id, container_id);
    2821           0 :         rsc->container = container;
    2822           0 :         container->fillers = g_list_append(container->fillers, rsc);
    2823             :     }
    2824           0 : }
    2825             : 
    2826             : /*!
    2827             :  * \internal
    2828             :  * \brief Unpack one node's lrm status section
    2829             :  *
    2830             :  * \param[in,out] node       Node whose status is being unpacked
    2831             :  * \param[in]     xml        CIB node state XML
    2832             :  * \param[in,out] scheduler  Scheduler data
    2833             :  */
    2834             : static void
    2835           0 : unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
    2836             :                 pcmk_scheduler_t *scheduler)
    2837             : {
    2838           0 :     bool found_orphaned_container_filler = false;
    2839             : 
    2840             :     // Drill down to PCMK__XE_LRM_RESOURCES section
    2841           0 :     xml = pcmk__xe_first_child(xml, PCMK__XE_LRM, NULL, NULL);
    2842           0 :     if (xml == NULL) {
    2843           0 :         return;
    2844             :     }
    2845           0 :     xml = pcmk__xe_first_child(xml, PCMK__XE_LRM_RESOURCES, NULL, NULL);
    2846           0 :     if (xml == NULL) {
    2847           0 :         return;
    2848             :     }
    2849             : 
    2850             :     // Unpack each PCMK__XE_LRM_RESOURCE entry
    2851           0 :     for (const xmlNode *rsc_entry = pcmk__xe_first_child(xml,
    2852             :                                                          PCMK__XE_LRM_RESOURCE,
    2853             :                                                          NULL, NULL);
    2854           0 :          rsc_entry != NULL; rsc_entry = pcmk__xe_next_same(rsc_entry)) {
    2855             : 
    2856           0 :         pcmk_resource_t *rsc = unpack_lrm_resource(node, rsc_entry, scheduler);
    2857             : 
    2858           0 :         if ((rsc != NULL)
    2859           0 :             && pcmk_is_set(rsc->flags, pcmk_rsc_removed_filler)) {
    2860           0 :             found_orphaned_container_filler = true;
    2861             :         }
    2862             :     }
    2863             : 
    2864             :     /* Now that all resource state has been unpacked for this node, map any
    2865             :      * orphaned container fillers to their container resource.
    2866             :      */
    2867           0 :     if (found_orphaned_container_filler) {
    2868           0 :         handle_orphaned_container_fillers(xml, scheduler);
    2869             :     }
    2870             : }
    2871             : 
    2872             : static void
    2873           0 : set_active(pcmk_resource_t *rsc)
    2874             : {
    2875           0 :     const pcmk_resource_t *top = pe__const_top_resource(rsc, false);
    2876             : 
    2877           0 :     if (top && pcmk_is_set(top->flags, pcmk_rsc_promotable)) {
    2878           0 :         rsc->role = pcmk_role_unpromoted;
    2879             :     } else {
    2880           0 :         rsc->role = pcmk_role_started;
    2881             :     }
    2882           0 : }
    2883             : 
    2884             : static void
    2885           0 : set_node_score(gpointer key, gpointer value, gpointer user_data)
    2886             : {
    2887           0 :     pcmk_node_t *node = value;
    2888           0 :     int *score = user_data;
    2889             : 
    2890           0 :     node->weight = *score;
    2891           0 : }
    2892             : 
    2893             : #define XPATH_NODE_STATE "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \
    2894             :                          "/" PCMK__XE_NODE_STATE
    2895             : #define SUB_XPATH_LRM_RESOURCE "/" PCMK__XE_LRM             \
    2896             :                                "/" PCMK__XE_LRM_RESOURCES   \
    2897             :                                "/" PCMK__XE_LRM_RESOURCE
    2898             : #define SUB_XPATH_LRM_RSC_OP "/" PCMK__XE_LRM_RSC_OP
    2899             : 
    2900             : static xmlNode *
    2901           0 : find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
    2902             :             int target_rc, pcmk_scheduler_t *scheduler)
    2903             : {
    2904           0 :     GString *xpath = NULL;
    2905           0 :     xmlNode *xml = NULL;
    2906             : 
    2907           0 :     CRM_CHECK((resource != NULL) && (op != NULL) && (node != NULL),
    2908             :               return NULL);
    2909             : 
    2910           0 :     xpath = g_string_sized_new(256);
    2911           0 :     pcmk__g_strcat(xpath,
    2912             :                    XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node, "']"
    2913             :                    SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", resource, "']"
    2914             :                    SUB_XPATH_LRM_RSC_OP "[@" PCMK_XA_OPERATION "='", op, "'",
    2915             :                    NULL);
    2916             : 
    2917             :     /* Need to check against transition_magic too? */
    2918           0 :     if ((source != NULL) && (strcmp(op, PCMK_ACTION_MIGRATE_TO) == 0)) {
    2919           0 :         pcmk__g_strcat(xpath,
    2920             :                        " and @" PCMK__META_MIGRATE_TARGET "='", source, "']",
    2921             :                        NULL);
    2922             : 
    2923           0 :     } else if ((source != NULL)
    2924           0 :                && (strcmp(op, PCMK_ACTION_MIGRATE_FROM) == 0)) {
    2925           0 :         pcmk__g_strcat(xpath,
    2926             :                        " and @" PCMK__META_MIGRATE_SOURCE "='", source, "']",
    2927             :                        NULL);
    2928             :     } else {
    2929             :         g_string_append_c(xpath, ']');
    2930             :     }
    2931             : 
    2932           0 :     xml = get_xpath_object((const char *) xpath->str, scheduler->input,
    2933             :                            LOG_DEBUG);
    2934           0 :     g_string_free(xpath, TRUE);
    2935             : 
    2936           0 :     if (xml && target_rc >= 0) {
    2937           0 :         int rc = PCMK_OCF_UNKNOWN_ERROR;
    2938           0 :         int status = PCMK_EXEC_ERROR;
    2939             : 
    2940           0 :         crm_element_value_int(xml, PCMK__XA_RC_CODE, &rc);
    2941           0 :         crm_element_value_int(xml, PCMK__XA_OP_STATUS, &status);
    2942           0 :         if ((rc != target_rc) || (status != PCMK_EXEC_DONE)) {
    2943           0 :             return NULL;
    2944             :         }
    2945             :     }
    2946           0 :     return xml;
    2947             : }
    2948             : 
    2949             : static xmlNode *
    2950           0 : find_lrm_resource(const char *rsc_id, const char *node_name,
    2951             :                   pcmk_scheduler_t *scheduler)
    2952             : {
    2953           0 :     GString *xpath = NULL;
    2954           0 :     xmlNode *xml = NULL;
    2955             : 
    2956           0 :     CRM_CHECK((rsc_id != NULL) && (node_name != NULL), return NULL);
    2957             : 
    2958           0 :     xpath = g_string_sized_new(256);
    2959           0 :     pcmk__g_strcat(xpath,
    2960             :                    XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node_name, "']"
    2961             :                    SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", rsc_id, "']",
    2962             :                    NULL);
    2963             : 
    2964           0 :     xml = get_xpath_object((const char *) xpath->str, scheduler->input,
    2965             :                            LOG_DEBUG);
    2966             : 
    2967           0 :     g_string_free(xpath, TRUE);
    2968           0 :     return xml;
    2969             : }
    2970             : 
    2971             : /*!
    2972             :  * \internal
    2973             :  * \brief Check whether a resource has no completed action history on a node
    2974             :  *
    2975             :  * \param[in,out] rsc        Resource to check
    2976             :  * \param[in]     node_name  Node to check
    2977             :  *
    2978             :  * \return true if \p rsc_id is unknown on \p node_name, otherwise false
    2979             :  */
    2980             : static bool
    2981           0 : unknown_on_node(pcmk_resource_t *rsc, const char *node_name)
    2982             : {
    2983           0 :     bool result = false;
    2984             :     xmlXPathObjectPtr search;
    2985           0 :     char *xpath = NULL;
    2986             : 
    2987           0 :     xpath = crm_strdup_printf(XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='%s']"
    2988             :                               SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='%s']"
    2989             :                               SUB_XPATH_LRM_RSC_OP
    2990             :                               "[@" PCMK__XA_RC_CODE "!='%d']",
    2991             :                               node_name, rsc->id, PCMK_OCF_UNKNOWN);
    2992             : 
    2993           0 :     search = xpath_search(rsc->cluster->input, xpath);
    2994           0 :     result = (numXpathResults(search) == 0);
    2995           0 :     freeXpathObject(search);
    2996           0 :     free(xpath);
    2997           0 :     return result;
    2998             : }
    2999             : 
    3000             : /*!
    3001             :  * \brief Check whether a probe/monitor indicating the resource was not running
    3002             :  * on a node happened after some event
    3003             :  *
    3004             :  * \param[in]     rsc_id     Resource being checked
    3005             :  * \param[in]     node_name  Node being checked
    3006             :  * \param[in]     xml_op     Event that monitor is being compared to
    3007             :  * \param[in]     same_node  Whether the operations are on the same node
    3008             :  * \param[in,out] scheduler  Scheduler data
    3009             :  *
    3010             :  * \return true if such a monitor happened after event, false otherwise
    3011             :  */
    3012             : static bool
    3013           0 : monitor_not_running_after(const char *rsc_id, const char *node_name,
    3014             :                           const xmlNode *xml_op, bool same_node,
    3015             :                           pcmk_scheduler_t *scheduler)
    3016             : {
    3017             :     /* Any probe/monitor operation on the node indicating it was not running
    3018             :      * there
    3019             :      */
    3020           0 :     xmlNode *monitor = find_lrm_op(rsc_id, PCMK_ACTION_MONITOR, node_name,
    3021             :                                    NULL, PCMK_OCF_NOT_RUNNING, scheduler);
    3022             : 
    3023           0 :     return (monitor && pe__is_newer_op(monitor, xml_op, same_node) > 0);
    3024             : }
    3025             : 
    3026             : /*!
    3027             :  * \brief Check whether any non-monitor operation on a node happened after some
    3028             :  * event
    3029             :  *
    3030             :  * \param[in]     rsc_id     Resource being checked
    3031             :  * \param[in]     node_name  Node being checked
    3032             :  * \param[in]     xml_op     Event that non-monitor is being compared to
    3033             :  * \param[in]     same_node  Whether the operations are on the same node
    3034             :  * \param[in,out] scheduler  Scheduler data
    3035             :  *
    3036             :  * \return true if such a operation happened after event, false otherwise
    3037             :  */
    3038             : static bool
    3039           0 : non_monitor_after(const char *rsc_id, const char *node_name,
    3040             :                   const xmlNode *xml_op, bool same_node,
    3041             :                   pcmk_scheduler_t *scheduler)
    3042             : {
    3043           0 :     xmlNode *lrm_resource = NULL;
    3044             : 
    3045           0 :     lrm_resource = find_lrm_resource(rsc_id, node_name, scheduler);
    3046           0 :     if (lrm_resource == NULL) {
    3047           0 :         return false;
    3048             :     }
    3049             : 
    3050           0 :     for (xmlNode *op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP,
    3051             :                                             NULL, NULL);
    3052           0 :          op != NULL; op = pcmk__xe_next_same(op)) {
    3053             : 
    3054           0 :         const char * task = NULL;
    3055             : 
    3056           0 :         if (op == xml_op) {
    3057           0 :             continue;
    3058             :         }
    3059             : 
    3060           0 :         task = crm_element_value(op, PCMK_XA_OPERATION);
    3061             : 
    3062           0 :         if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_STOP,
    3063             :                              PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM,
    3064             :                              NULL)
    3065           0 :             && pe__is_newer_op(op, xml_op, same_node) > 0) {
    3066           0 :             return true;
    3067             :         }
    3068             :     }
    3069             : 
    3070           0 :     return false;
    3071             : }
    3072             : 
    3073             : /*!
    3074             :  * \brief Check whether the resource has newer state on a node after a migration
    3075             :  * attempt
    3076             :  *
    3077             :  * \param[in]     rsc_id        Resource being checked
    3078             :  * \param[in]     node_name     Node being checked
    3079             :  * \param[in]     migrate_to    Any migrate_to event that is being compared to
    3080             :  * \param[in]     migrate_from  Any migrate_from event that is being compared to
    3081             :  * \param[in,out] scheduler     Scheduler data
    3082             :  *
    3083             :  * \return true if such a operation happened after event, false otherwise
    3084             :  */
    3085             : static bool
    3086           0 : newer_state_after_migrate(const char *rsc_id, const char *node_name,
    3087             :                           const xmlNode *migrate_to,
    3088             :                           const xmlNode *migrate_from,
    3089             :                           pcmk_scheduler_t *scheduler)
    3090             : {
    3091           0 :     const xmlNode *xml_op = migrate_to;
    3092           0 :     const char *source = NULL;
    3093           0 :     const char *target = NULL;
    3094           0 :     bool same_node = false;
    3095             : 
    3096           0 :     if (migrate_from) {
    3097           0 :         xml_op = migrate_from;
    3098             :     }
    3099             : 
    3100           0 :     source = crm_element_value(xml_op, PCMK__META_MIGRATE_SOURCE);
    3101           0 :     target = crm_element_value(xml_op, PCMK__META_MIGRATE_TARGET);
    3102             : 
    3103             :     /* It's preferred to compare to the migrate event on the same node if
    3104             :      * existing, since call ids are more reliable.
    3105             :      */
    3106           0 :     if (pcmk__str_eq(node_name, target, pcmk__str_casei)) {
    3107           0 :         if (migrate_from) {
    3108           0 :            xml_op = migrate_from;
    3109           0 :            same_node = true;
    3110             : 
    3111             :         } else {
    3112           0 :            xml_op = migrate_to;
    3113             :         }
    3114             : 
    3115           0 :     } else if (pcmk__str_eq(node_name, source, pcmk__str_casei)) {
    3116           0 :         if (migrate_to) {
    3117           0 :            xml_op = migrate_to;
    3118           0 :            same_node = true;
    3119             : 
    3120             :         } else {
    3121           0 :            xml_op = migrate_from;
    3122             :         }
    3123             :     }
    3124             : 
    3125             :     /* If there's any newer non-monitor operation on the node, or any newer
    3126             :      * probe/monitor operation on the node indicating it was not running there,
    3127             :      * the migration events potentially no longer matter for the node.
    3128             :      */
    3129           0 :     return non_monitor_after(rsc_id, node_name, xml_op, same_node, scheduler)
    3130           0 :            || monitor_not_running_after(rsc_id, node_name, xml_op, same_node,
    3131             :                                         scheduler);
    3132             : }
    3133             : 
    3134             : /*!
    3135             :  * \internal
    3136             :  * \brief Parse migration source and target node names from history entry
    3137             :  *
    3138             :  * \param[in]  entry        Resource history entry for a migration action
    3139             :  * \param[in]  source_node  If not NULL, source must match this node
    3140             :  * \param[in]  target_node  If not NULL, target must match this node
    3141             :  * \param[out] source_name  Where to store migration source node name
    3142             :  * \param[out] target_name  Where to store migration target node name
    3143             :  *
    3144             :  * \return Standard Pacemaker return code
    3145             :  */
    3146             : static int
    3147           0 : get_migration_node_names(const xmlNode *entry, const pcmk_node_t *source_node,
    3148             :                          const pcmk_node_t *target_node,
    3149             :                          const char **source_name, const char **target_name)
    3150             : {
    3151           0 :     *source_name = crm_element_value(entry, PCMK__META_MIGRATE_SOURCE);
    3152           0 :     *target_name = crm_element_value(entry, PCMK__META_MIGRATE_TARGET);
    3153           0 :     if ((*source_name == NULL) || (*target_name == NULL)) {
    3154           0 :         pcmk__config_err("Ignoring resource history entry %s without "
    3155             :                          PCMK__META_MIGRATE_SOURCE " and "
    3156             :                          PCMK__META_MIGRATE_TARGET, pcmk__xe_id(entry));
    3157           0 :         return pcmk_rc_unpack_error;
    3158             :     }
    3159             : 
    3160           0 :     if ((source_node != NULL)
    3161           0 :         && !pcmk__str_eq(*source_name, source_node->details->uname,
    3162             :                          pcmk__str_casei|pcmk__str_null_matches)) {
    3163           0 :         pcmk__config_err("Ignoring resource history entry %s because "
    3164             :                          PCMK__META_MIGRATE_SOURCE "='%s' does not match %s",
    3165             :                          pcmk__xe_id(entry), *source_name,
    3166             :                          pcmk__node_name(source_node));
    3167           0 :         return pcmk_rc_unpack_error;
    3168             :     }
    3169             : 
    3170           0 :     if ((target_node != NULL)
    3171           0 :         && !pcmk__str_eq(*target_name, target_node->details->uname,
    3172             :                          pcmk__str_casei|pcmk__str_null_matches)) {
    3173           0 :         pcmk__config_err("Ignoring resource history entry %s because "
    3174             :                          PCMK__META_MIGRATE_TARGET "='%s' does not match %s",
    3175             :                          pcmk__xe_id(entry), *target_name,
    3176             :                          pcmk__node_name(target_node));
    3177           0 :         return pcmk_rc_unpack_error;
    3178             :     }
    3179             : 
    3180           0 :     return pcmk_rc_ok;
    3181             : }
    3182             : 
    3183             : /*
    3184             :  * \internal
    3185             :  * \brief Add a migration source to a resource's list of dangling migrations
    3186             :  *
    3187             :  * If the migrate_to and migrate_from actions in a live migration both
    3188             :  * succeeded, but there is no stop on the source, the migration is considered
    3189             :  * "dangling." Add the source to the resource's dangling migration list, which
    3190             :  * will be used to schedule a stop on the source without affecting the target.
    3191             :  *
    3192             :  * \param[in,out] rsc   Resource involved in migration
    3193             :  * \param[in]     node  Migration source
    3194             :  */
    3195             : static void
    3196           0 : add_dangling_migration(pcmk_resource_t *rsc, const pcmk_node_t *node)
    3197             : {
    3198           0 :     pcmk__rsc_trace(rsc, "Dangling migration of %s requires stop on %s",
    3199             :                     rsc->id, pcmk__node_name(node));
    3200           0 :     rsc->role = pcmk_role_stopped;
    3201           0 :     rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations,
    3202             :                                               (gpointer) node);
    3203           0 : }
    3204             : 
    3205             : /*!
    3206             :  * \internal
    3207             :  * \brief Update resource role etc. after a successful migrate_to action
    3208             :  *
    3209             :  * \param[in,out] history  Parsed action result history
    3210             :  */
    3211             : static void
    3212           0 : unpack_migrate_to_success(struct action_history *history)
    3213             : {
    3214             :     /* A complete migration sequence is:
    3215             :      * 1. migrate_to on source node (which succeeded if we get to this function)
    3216             :      * 2. migrate_from on target node
    3217             :      * 3. stop on source node
    3218             :      *
    3219             :      * If no migrate_from has happened, the migration is considered to be
    3220             :      * "partial". If the migrate_from succeeded but no stop has happened, the
    3221             :      * migration is considered to be "dangling".
    3222             :      *
    3223             :      * If a successful migrate_to and stop have happened on the source node, we
    3224             :      * still need to check for a partial migration, due to scenarios (easier to
    3225             :      * produce with batch-limit=1) like:
    3226             :      *
    3227             :      * - A resource is migrating from node1 to node2, and a migrate_to is
    3228             :      *   initiated for it on node1.
    3229             :      *
    3230             :      * - node2 goes into standby mode while the migrate_to is pending, which
    3231             :      *   aborts the transition.
    3232             :      *
    3233             :      * - Upon completion of the migrate_to, a new transition schedules a stop
    3234             :      *   on both nodes and a start on node1.
    3235             :      *
    3236             :      * - If the new transition is aborted for any reason while the resource is
    3237             :      *   stopping on node1, the transition after that stop completes will see
    3238             :      *   the migrate_to and stop on the source, but it's still a partial
    3239             :      *   migration, and the resource must be stopped on node2 because it is
    3240             :      *   potentially active there due to the migrate_to.
    3241             :      *
    3242             :      *   We also need to take into account that either node's history may be
    3243             :      *   cleared at any point in the migration process.
    3244             :      */
    3245           0 :     int from_rc = PCMK_OCF_OK;
    3246           0 :     int from_status = PCMK_EXEC_PENDING;
    3247           0 :     pcmk_node_t *target_node = NULL;
    3248           0 :     xmlNode *migrate_from = NULL;
    3249           0 :     const char *source = NULL;
    3250           0 :     const char *target = NULL;
    3251           0 :     bool source_newer_op = false;
    3252           0 :     bool target_newer_state = false;
    3253           0 :     bool active_on_target = false;
    3254             : 
    3255             :     // Get source and target node names from XML
    3256           0 :     if (get_migration_node_names(history->xml, history->node, NULL, &source,
    3257             :                                  &target) != pcmk_rc_ok) {
    3258           0 :         return;
    3259             :     }
    3260             : 
    3261             :     // Check for newer state on the source
    3262           0 :     source_newer_op = non_monitor_after(history->rsc->id, source, history->xml,
    3263           0 :                                         true, history->rsc->cluster);
    3264             : 
    3265             :     // Check for a migrate_from action from this source on the target
    3266           0 :     migrate_from = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_FROM,
    3267           0 :                                target, source, -1, history->rsc->cluster);
    3268           0 :     if (migrate_from != NULL) {
    3269           0 :         if (source_newer_op) {
    3270             :             /* There's a newer non-monitor operation on the source and a
    3271             :              * migrate_from on the target, so this migrate_to is irrelevant to
    3272             :              * the resource's state.
    3273             :              */
    3274           0 :             return;
    3275             :         }
    3276           0 :         crm_element_value_int(migrate_from, PCMK__XA_RC_CODE, &from_rc);
    3277           0 :         crm_element_value_int(migrate_from, PCMK__XA_OP_STATUS, &from_status);
    3278             :     }
    3279             : 
    3280             :     /* If the resource has newer state on both the source and target after the
    3281             :      * migration events, this migrate_to is irrelevant to the resource's state.
    3282             :      */
    3283           0 :     target_newer_state = newer_state_after_migrate(history->rsc->id, target,
    3284           0 :                                                    history->xml, migrate_from,
    3285           0 :                                                    history->rsc->cluster);
    3286           0 :     if (source_newer_op && target_newer_state) {
    3287           0 :         return;
    3288             :     }
    3289             : 
    3290             :     /* Check for dangling migration (migrate_from succeeded but stop not done).
    3291             :      * We know there's no stop because we already returned if the target has a
    3292             :      * migrate_from and the source has any newer non-monitor operation.
    3293             :      */
    3294           0 :     if ((from_rc == PCMK_OCF_OK) && (from_status == PCMK_EXEC_DONE)) {
    3295           0 :         add_dangling_migration(history->rsc, history->node);
    3296           0 :         return;
    3297             :     }
    3298             : 
    3299             :     /* Without newer state, this migrate_to implies the resource is active.
    3300             :      * (Clones are not allowed to migrate, so role can't be promoted.)
    3301             :      */
    3302           0 :     history->rsc->role = pcmk_role_started;
    3303             : 
    3304           0 :     target_node = pcmk_find_node(history->rsc->cluster, target);
    3305           0 :     active_on_target = !target_newer_state && (target_node != NULL)
    3306           0 :                        && target_node->details->online;
    3307             : 
    3308           0 :     if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target
    3309           0 :         if (active_on_target) {
    3310           0 :             native_add_running(history->rsc, target_node, history->rsc->cluster,
    3311             :                                TRUE);
    3312             :         } else {
    3313             :             // Mark resource as failed, require recovery, and prevent migration
    3314           0 :             pcmk__set_rsc_flags(history->rsc,
    3315             :                                 pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
    3316           0 :             pcmk__clear_rsc_flags(history->rsc, pcmk_rsc_migratable);
    3317             :         }
    3318           0 :         return;
    3319             :     }
    3320             : 
    3321             :     // The migrate_from is pending, complete but erased, or to be scheduled
    3322             : 
    3323             :     /* If there is no history at all for the resource on an online target, then
    3324             :      * it was likely cleaned. Just return, and we'll schedule a probe. Once we
    3325             :      * have the probe result, it will be reflected in target_newer_state.
    3326             :      */
    3327           0 :     if ((target_node != NULL) && target_node->details->online
    3328           0 :         && unknown_on_node(history->rsc, target)) {
    3329           0 :         return;
    3330             :     }
    3331             : 
    3332           0 :     if (active_on_target) {
    3333           0 :         pcmk_node_t *source_node = pcmk_find_node(history->rsc->cluster,
    3334             :                                                   source);
    3335             : 
    3336           0 :         native_add_running(history->rsc, target_node, history->rsc->cluster,
    3337             :                            FALSE);
    3338           0 :         if ((source_node != NULL) && source_node->details->online) {
    3339             :             /* This is a partial migration: the migrate_to completed
    3340             :              * successfully on the source, but the migrate_from has not
    3341             :              * completed. Remember the source and target; if the newly
    3342             :              * chosen target remains the same when we schedule actions
    3343             :              * later, we may continue with the migration.
    3344             :              */
    3345           0 :             history->rsc->partial_migration_target = target_node;
    3346           0 :             history->rsc->partial_migration_source = source_node;
    3347             :         }
    3348             : 
    3349           0 :     } else if (!source_newer_op) {
    3350             :         // Mark resource as failed, require recovery, and prevent migration
    3351           0 :         pcmk__set_rsc_flags(history->rsc,
    3352             :                             pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
    3353           0 :         pcmk__clear_rsc_flags(history->rsc, pcmk_rsc_migratable);
    3354             :     }
    3355             : }
    3356             : 
    3357             : /*!
    3358             :  * \internal
    3359             :  * \brief Update resource role etc. after a failed migrate_to action
    3360             :  *
    3361             :  * \param[in,out] history  Parsed action result history
    3362             :  */
    3363             : static void
    3364           0 : unpack_migrate_to_failure(struct action_history *history)
    3365             : {
    3366           0 :     xmlNode *target_migrate_from = NULL;
    3367           0 :     const char *source = NULL;
    3368           0 :     const char *target = NULL;
    3369             : 
    3370             :     // Get source and target node names from XML
    3371           0 :     if (get_migration_node_names(history->xml, history->node, NULL, &source,
    3372             :                                  &target) != pcmk_rc_ok) {
    3373           0 :         return;
    3374             :     }
    3375             : 
    3376             :     /* If a migration failed, we have to assume the resource is active. Clones
    3377             :      * are not allowed to migrate, so role can't be promoted.
    3378             :      */
    3379           0 :     history->rsc->role = pcmk_role_started;
    3380             : 
    3381             :     // Check for migrate_from on the target
    3382           0 :     target_migrate_from = find_lrm_op(history->rsc->id,
    3383             :                                       PCMK_ACTION_MIGRATE_FROM, target, source,
    3384           0 :                                       PCMK_OCF_OK, history->rsc->cluster);
    3385             : 
    3386           0 :     if (/* If the resource state is unknown on the target, it will likely be
    3387             :          * probed there.
    3388             :          * Don't just consider it running there. We will get back here anyway in
    3389             :          * case the probe detects it's running there.
    3390             :          */
    3391           0 :         !unknown_on_node(history->rsc, target)
    3392             :         /* If the resource has newer state on the target after the migration
    3393             :          * events, this migrate_to no longer matters for the target.
    3394             :          */
    3395           0 :         && !newer_state_after_migrate(history->rsc->id, target, history->xml,
    3396             :                                       target_migrate_from,
    3397           0 :                                       history->rsc->cluster)) {
    3398             :         /* The resource has no newer state on the target, so assume it's still
    3399             :          * active there.
    3400             :          * (if it is up).
    3401             :          */
    3402           0 :         pcmk_node_t *target_node = pcmk_find_node(history->rsc->cluster,
    3403             :                                                   target);
    3404             : 
    3405           0 :         if (target_node && target_node->details->online) {
    3406           0 :             native_add_running(history->rsc, target_node, history->rsc->cluster,
    3407             :                                FALSE);
    3408             :         }
    3409             : 
    3410           0 :     } else if (!non_monitor_after(history->rsc->id, source, history->xml, true,
    3411           0 :                                   history->rsc->cluster)) {
    3412             :         /* We know the resource has newer state on the target, but this
    3413             :          * migrate_to still matters for the source as long as there's no newer
    3414             :          * non-monitor operation there.
    3415             :          */
    3416             : 
    3417             :         // Mark node as having dangling migration so we can force a stop later
    3418           0 :         history->rsc->dangling_migrations =
    3419           0 :             g_list_prepend(history->rsc->dangling_migrations,
    3420           0 :                            (gpointer) history->node);
    3421             :     }
    3422             : }
    3423             : 
    3424             : /*!
    3425             :  * \internal
    3426             :  * \brief Update resource role etc. after a failed migrate_from action
    3427             :  *
    3428             :  * \param[in,out] history  Parsed action result history
    3429             :  */
    3430             : static void
    3431           0 : unpack_migrate_from_failure(struct action_history *history)
    3432             : {
    3433           0 :     xmlNode *source_migrate_to = NULL;
    3434           0 :     const char *source = NULL;
    3435           0 :     const char *target = NULL;
    3436             : 
    3437             :     // Get source and target node names from XML
    3438           0 :     if (get_migration_node_names(history->xml, NULL, history->node, &source,
    3439             :                                  &target) != pcmk_rc_ok) {
    3440           0 :         return;
    3441             :     }
    3442             : 
    3443             :     /* If a migration failed, we have to assume the resource is active. Clones
    3444             :      * are not allowed to migrate, so role can't be promoted.
    3445             :      */
    3446           0 :     history->rsc->role = pcmk_role_started;
    3447             : 
    3448             :     // Check for a migrate_to on the source
    3449           0 :     source_migrate_to = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_TO,
    3450             :                                     source, target, PCMK_OCF_OK,
    3451           0 :                                     history->rsc->cluster);
    3452             : 
    3453           0 :     if (/* If the resource state is unknown on the source, it will likely be
    3454             :          * probed there.
    3455             :          * Don't just consider it running there. We will get back here anyway in
    3456             :          * case the probe detects it's running there.
    3457             :          */
    3458           0 :         !unknown_on_node(history->rsc, source)
    3459             :         /* If the resource has newer state on the source after the migration
    3460             :          * events, this migrate_from no longer matters for the source.
    3461             :          */
    3462           0 :         && !newer_state_after_migrate(history->rsc->id, source,
    3463           0 :                                       source_migrate_to, history->xml,
    3464           0 :                                       history->rsc->cluster)) {
    3465             :         /* The resource has no newer state on the source, so assume it's still
    3466             :          * active there (if it is up).
    3467             :          */
    3468           0 :         pcmk_node_t *source_node = pcmk_find_node(history->rsc->cluster,
    3469             :                                                   source);
    3470             : 
    3471           0 :         if (source_node && source_node->details->online) {
    3472           0 :             native_add_running(history->rsc, source_node, history->rsc->cluster,
    3473             :                                TRUE);
    3474             :         }
    3475             :     }
    3476             : }
    3477             : 
    3478             : /*!
    3479             :  * \internal
    3480             :  * \brief Add an action to cluster's list of failed actions
    3481             :  *
    3482             :  * \param[in,out] history  Parsed action result history
    3483             :  */
    3484             : static void
    3485           0 : record_failed_op(struct action_history *history)
    3486             : {
    3487           0 :     if (!(history->node->details->online)) {
    3488           0 :         return;
    3489             :     }
    3490             : 
    3491           0 :     for (const xmlNode *xIter = history->rsc->cluster->failed->children;
    3492           0 :          xIter != NULL; xIter = xIter->next) {
    3493             : 
    3494           0 :         const char *key = pcmk__xe_history_key(xIter);
    3495           0 :         const char *uname = crm_element_value(xIter, PCMK_XA_UNAME);
    3496             : 
    3497           0 :         if (pcmk__str_eq(history->key, key, pcmk__str_none)
    3498           0 :             && pcmk__str_eq(uname, history->node->details->uname,
    3499             :                             pcmk__str_casei)) {
    3500           0 :             crm_trace("Skipping duplicate entry %s on %s",
    3501             :                       history->key, pcmk__node_name(history->node));
    3502           0 :             return;
    3503             :         }
    3504             :     }
    3505             : 
    3506           0 :     crm_trace("Adding entry for %s on %s to failed action list",
    3507             :               history->key, pcmk__node_name(history->node));
    3508           0 :     crm_xml_add(history->xml, PCMK_XA_UNAME, history->node->details->uname);
    3509           0 :     crm_xml_add(history->xml, PCMK__XA_RSC_ID, history->rsc->id);
    3510           0 :     pcmk__xml_copy(history->rsc->cluster->failed, history->xml);
    3511             : }
    3512             : 
    3513             : static char *
    3514           0 : last_change_str(const xmlNode *xml_op)
    3515             : {
    3516             :     time_t when;
    3517           0 :     char *result = NULL;
    3518             : 
    3519           0 :     if (crm_element_value_epoch(xml_op, PCMK_XA_LAST_RC_CHANGE,
    3520             :                                 &when) == pcmk_ok) {
    3521           0 :         char *when_s = pcmk__epoch2str(&when, 0);
    3522           0 :         const char *p = strchr(when_s, ' ');
    3523             : 
    3524             :         // Skip day of week to make message shorter
    3525           0 :         if ((p != NULL) && (*(++p) != '\0')) {
    3526           0 :             result = pcmk__str_copy(p);
    3527             :         }
    3528           0 :         free(when_s);
    3529             :     }
    3530             : 
    3531           0 :     if (result == NULL) {
    3532           0 :         result = pcmk__str_copy("unknown_time");
    3533             :     }
    3534             : 
    3535           0 :     return result;
    3536             : }
    3537             : 
    3538             : /*!
    3539             :  * \internal
    3540             :  * \brief Compare two on-fail values
    3541             :  *
    3542             :  * \param[in] first   One on-fail value to compare
    3543             :  * \param[in] second  The other on-fail value to compare
    3544             :  *
    3545             :  * \return A negative number if second is more severe than first, zero if they
    3546             :  *         are equal, or a positive number if first is more severe than second.
    3547             :  * \note This is only needed until the action_fail_response values can be
    3548             :  *       renumbered at the next API compatibility break.
    3549             :  */
    3550             : static int
    3551           0 : cmp_on_fail(enum action_fail_response first, enum action_fail_response second)
    3552             : {
    3553           0 :     switch (first) {
    3554           0 :         case pcmk_on_fail_demote:
    3555           0 :             switch (second) {
    3556           0 :                 case pcmk_on_fail_ignore:
    3557           0 :                     return 1;
    3558           0 :                 case pcmk_on_fail_demote:
    3559           0 :                     return 0;
    3560           0 :                 default:
    3561           0 :                     return -1;
    3562             :             }
    3563             :             break;
    3564             : 
    3565           0 :         case pcmk_on_fail_reset_remote:
    3566           0 :             switch (second) {
    3567           0 :                 case pcmk_on_fail_ignore:
    3568             :                 case pcmk_on_fail_demote:
    3569             :                 case pcmk_on_fail_restart:
    3570           0 :                     return 1;
    3571           0 :                 case pcmk_on_fail_reset_remote:
    3572           0 :                     return 0;
    3573           0 :                 default:
    3574           0 :                     return -1;
    3575             :             }
    3576             :             break;
    3577             : 
    3578           0 :         case pcmk_on_fail_restart_container:
    3579           0 :             switch (second) {
    3580           0 :                 case pcmk_on_fail_ignore:
    3581             :                 case pcmk_on_fail_demote:
    3582             :                 case pcmk_on_fail_restart:
    3583             :                 case pcmk_on_fail_reset_remote:
    3584           0 :                     return 1;
    3585           0 :                 case pcmk_on_fail_restart_container:
    3586           0 :                     return 0;
    3587           0 :                 default:
    3588           0 :                     return -1;
    3589             :             }
    3590             :             break;
    3591             : 
    3592           0 :         default:
    3593           0 :             break;
    3594             :     }
    3595           0 :     switch (second) {
    3596           0 :         case pcmk_on_fail_demote:
    3597           0 :             return (first == pcmk_on_fail_ignore)? -1 : 1;
    3598             : 
    3599           0 :         case pcmk_on_fail_reset_remote:
    3600           0 :             switch (first) {
    3601           0 :                 case pcmk_on_fail_ignore:
    3602             :                 case pcmk_on_fail_demote:
    3603             :                 case pcmk_on_fail_restart:
    3604           0 :                     return -1;
    3605           0 :                 default:
    3606           0 :                     return 1;
    3607             :             }
    3608             :             break;
    3609             : 
    3610           0 :         case pcmk_on_fail_restart_container:
    3611           0 :             switch (first) {
    3612           0 :                 case pcmk_on_fail_ignore:
    3613             :                 case pcmk_on_fail_demote:
    3614             :                 case pcmk_on_fail_restart:
    3615             :                 case pcmk_on_fail_reset_remote:
    3616           0 :                     return -1;
    3617           0 :                 default:
    3618           0 :                     return 1;
    3619             :             }
    3620             :             break;
    3621             : 
    3622           0 :         default:
    3623           0 :             break;
    3624             :     }
    3625           0 :     return first - second;
    3626             : }
    3627             : 
    3628             : /*!
    3629             :  * \internal
    3630             :  * \brief Ban a resource (or its clone if an anonymous instance) from all nodes
    3631             :  *
    3632             :  * \param[in,out] rsc  Resource to ban
    3633             :  */
    3634             : static void
    3635           0 : ban_from_all_nodes(pcmk_resource_t *rsc)
    3636             : {
    3637           0 :     int score = -PCMK_SCORE_INFINITY;
    3638           0 :     pcmk_resource_t *fail_rsc = rsc;
    3639             : 
    3640           0 :     if (fail_rsc->parent != NULL) {
    3641           0 :         pcmk_resource_t *parent = uber_parent(fail_rsc);
    3642             : 
    3643           0 :         if (pcmk__is_anonymous_clone(parent)) {
    3644             :             /* For anonymous clones, if an operation with
    3645             :              * PCMK_META_ON_FAIL=PCMK_VALUE_STOP fails for any instance, the
    3646             :              * entire clone must stop.
    3647             :              */
    3648           0 :             fail_rsc = parent;
    3649             :         }
    3650             :     }
    3651             : 
    3652             :     // Ban the resource from all nodes
    3653           0 :     crm_notice("%s will not be started under current conditions", fail_rsc->id);
    3654           0 :     if (fail_rsc->allowed_nodes != NULL) {
    3655           0 :         g_hash_table_destroy(fail_rsc->allowed_nodes);
    3656             :     }
    3657           0 :     fail_rsc->allowed_nodes = pe__node_list2table(rsc->cluster->nodes);
    3658           0 :     g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score);
    3659           0 : }
    3660             : 
    3661             : /*!
    3662             :  * \internal
    3663             :  * \brief Get configured failure handling and role after failure for an action
    3664             :  *
    3665             :  * \param[in,out] history    Unpacked action history entry
    3666             :  * \param[out]    on_fail    Where to set configured failure handling
    3667             :  * \param[out]    fail_role  Where to set to role after failure
    3668             :  */
    3669             : static void
    3670           0 : unpack_failure_handling(struct action_history *history,
    3671             :                         enum action_fail_response *on_fail,
    3672             :                         enum rsc_role_e *fail_role)
    3673             : {
    3674           0 :     xmlNode *config = pcmk__find_action_config(history->rsc, history->task,
    3675             :                                                history->interval_ms, true);
    3676             : 
    3677           0 :     GHashTable *meta = pcmk__unpack_action_meta(history->rsc, history->node,
    3678             :                                                 history->task,
    3679             :                                                 history->interval_ms, config);
    3680             : 
    3681           0 :     const char *on_fail_str = g_hash_table_lookup(meta, PCMK_META_ON_FAIL);
    3682             : 
    3683           0 :     *on_fail = pcmk__parse_on_fail(history->rsc, history->task,
    3684             :                                    history->interval_ms, on_fail_str);
    3685           0 :     *fail_role = pcmk__role_after_failure(history->rsc, history->task, *on_fail,
    3686             :                                           meta);
    3687           0 :     g_hash_table_destroy(meta);
    3688           0 : }
    3689             : 
    3690             : /*!
    3691             :  * \internal
    3692             :  * \brief Update resource role, failure handling, etc., after a failed action
    3693             :  *
    3694             :  * \param[in,out] history         Parsed action result history
    3695             :  * \param[in]     config_on_fail  Action failure handling from configuration
    3696             :  * \param[in]     fail_role       Resource's role after failure of this action
    3697             :  * \param[out]    last_failure    This will be set to the history XML
    3698             :  * \param[in,out] on_fail         Actual handling of action result
    3699             :  */
    3700             : static void
    3701           0 : unpack_rsc_op_failure(struct action_history *history,
    3702             :                       enum action_fail_response config_on_fail,
    3703             :                       enum rsc_role_e fail_role, xmlNode **last_failure,
    3704             :                       enum action_fail_response *on_fail)
    3705             : {
    3706           0 :     bool is_probe = false;
    3707           0 :     char *last_change_s = NULL;
    3708             : 
    3709           0 :     *last_failure = history->xml;
    3710             : 
    3711           0 :     is_probe = pcmk_xe_is_probe(history->xml);
    3712           0 :     last_change_s = last_change_str(history->xml);
    3713             : 
    3714           0 :     if (!pcmk_is_set(history->rsc->cluster->flags, pcmk_sched_symmetric_cluster)
    3715           0 :         && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
    3716           0 :         crm_trace("Unexpected result (%s%s%s) was recorded for "
    3717             :                   "%s of %s on %s at %s " CRM_XS " exit-status=%d id=%s",
    3718             :                   services_ocf_exitcode_str(history->exit_status),
    3719             :                   (pcmk__str_empty(history->exit_reason)? "" : ": "),
    3720             :                   pcmk__s(history->exit_reason, ""),
    3721             :                   (is_probe? "probe" : history->task), history->rsc->id,
    3722             :                   pcmk__node_name(history->node), last_change_s,
    3723             :                   history->exit_status, history->id);
    3724             :     } else {
    3725           0 :         pcmk__sched_warn("Unexpected result (%s%s%s) was recorded for %s of "
    3726             :                          "%s on %s at %s " CRM_XS " exit-status=%d id=%s",
    3727             :                          services_ocf_exitcode_str(history->exit_status),
    3728             :                          (pcmk__str_empty(history->exit_reason)? "" : ": "),
    3729             :                          pcmk__s(history->exit_reason, ""),
    3730             :                          (is_probe? "probe" : history->task), history->rsc->id,
    3731             :                          pcmk__node_name(history->node), last_change_s,
    3732             :                          history->exit_status, history->id);
    3733             : 
    3734           0 :         if (is_probe && (history->exit_status != PCMK_OCF_OK)
    3735           0 :             && (history->exit_status != PCMK_OCF_NOT_RUNNING)
    3736           0 :             && (history->exit_status != PCMK_OCF_RUNNING_PROMOTED)) {
    3737             : 
    3738             :             /* A failed (not just unexpected) probe result could mean the user
    3739             :              * didn't know resources will be probed even where they can't run.
    3740             :              */
    3741           0 :             crm_notice("If it is not possible for %s to run on %s, see "
    3742             :                        "the " PCMK_XA_RESOURCE_DISCOVERY " option for location "
    3743             :                        "constraints",
    3744             :                        history->rsc->id, pcmk__node_name(history->node));
    3745             :         }
    3746             : 
    3747           0 :         record_failed_op(history);
    3748             :     }
    3749             : 
    3750           0 :     free(last_change_s);
    3751             : 
    3752           0 :     if (cmp_on_fail(*on_fail, config_on_fail) < 0) {
    3753           0 :         pcmk__rsc_trace(history->rsc, "on-fail %s -> %s for %s",
    3754             :                         pcmk_on_fail_text(*on_fail),
    3755             :                         pcmk_on_fail_text(config_on_fail), history->key);
    3756           0 :         *on_fail = config_on_fail;
    3757             :     }
    3758             : 
    3759           0 :     if (strcmp(history->task, PCMK_ACTION_STOP) == 0) {
    3760           0 :         resource_location(history->rsc, history->node, -PCMK_SCORE_INFINITY,
    3761           0 :                           "__stop_fail__", history->rsc->cluster);
    3762             : 
    3763           0 :     } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) {
    3764           0 :         unpack_migrate_to_failure(history);
    3765             : 
    3766           0 :     } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_FROM) == 0) {
    3767           0 :         unpack_migrate_from_failure(history);
    3768             : 
    3769           0 :     } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
    3770           0 :         history->rsc->role = pcmk_role_promoted;
    3771             : 
    3772           0 :     } else if (strcmp(history->task, PCMK_ACTION_DEMOTE) == 0) {
    3773           0 :         if (config_on_fail == pcmk_on_fail_block) {
    3774           0 :             history->rsc->role = pcmk_role_promoted;
    3775           0 :             pe__set_next_role(history->rsc, pcmk_role_stopped,
    3776             :                               "demote with " PCMK_META_ON_FAIL "=block");
    3777             : 
    3778           0 :         } else if (history->exit_status == PCMK_OCF_NOT_RUNNING) {
    3779           0 :             history->rsc->role = pcmk_role_stopped;
    3780             : 
    3781             :         } else {
    3782             :             /* Staying in the promoted role would put the scheduler and
    3783             :              * controller into a loop. Setting the role to unpromoted is not
    3784             :              * dangerous because the resource will be stopped as part of
    3785             :              * recovery, and any promotion will be ordered after that stop.
    3786             :              */
    3787           0 :             history->rsc->role = pcmk_role_unpromoted;
    3788             :         }
    3789             :     }
    3790             : 
    3791           0 :     if (is_probe && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
    3792             :         /* leave stopped */
    3793           0 :         pcmk__rsc_trace(history->rsc, "Leaving %s stopped", history->rsc->id);
    3794           0 :         history->rsc->role = pcmk_role_stopped;
    3795             : 
    3796           0 :     } else if (history->rsc->role < pcmk_role_started) {
    3797           0 :         pcmk__rsc_trace(history->rsc, "Setting %s active", history->rsc->id);
    3798           0 :         set_active(history->rsc);
    3799             :     }
    3800             : 
    3801           0 :     pcmk__rsc_trace(history->rsc,
    3802             :                     "Resource %s: role=%s unclean=%s on_fail=%s fail_role=%s",
    3803             :                     history->rsc->id, pcmk_role_text(history->rsc->role),
    3804             :                     pcmk__btoa(history->node->details->unclean),
    3805             :                     pcmk_on_fail_text(config_on_fail),
    3806             :                     pcmk_role_text(fail_role));
    3807             : 
    3808           0 :     if ((fail_role != pcmk_role_started)
    3809           0 :         && (history->rsc->next_role < fail_role)) {
    3810           0 :         pe__set_next_role(history->rsc, fail_role, "failure");
    3811             :     }
    3812             : 
    3813           0 :     if (fail_role == pcmk_role_stopped) {
    3814           0 :         ban_from_all_nodes(history->rsc);
    3815             :     }
    3816           0 : }
    3817             : 
    3818             : /*!
    3819             :  * \internal
    3820             :  * \brief Block a resource with a failed action if it cannot be recovered
    3821             :  *
    3822             :  * If resource action is a failed stop and fencing is not possible, mark the
    3823             :  * resource as unmanaged and blocked, since recovery cannot be done.
    3824             :  *
    3825             :  * \param[in,out] history  Parsed action history entry
    3826             :  */
    3827             : static void
    3828           0 : block_if_unrecoverable(struct action_history *history)
    3829             : {
    3830           0 :     char *last_change_s = NULL;
    3831             : 
    3832           0 :     if (strcmp(history->task, PCMK_ACTION_STOP) != 0) {
    3833           0 :         return; // All actions besides stop are always recoverable
    3834             :     }
    3835           0 :     if (pe_can_fence(history->node->details->data_set, history->node)) {
    3836           0 :         return; // Failed stops are recoverable via fencing
    3837             :     }
    3838             : 
    3839           0 :     last_change_s = last_change_str(history->xml);
    3840           0 :     pcmk__sched_err("No further recovery can be attempted for %s "
    3841             :                     "because %s on %s failed (%s%s%s) at %s "
    3842             :                     CRM_XS " rc=%d id=%s",
    3843             :                     history->rsc->id, history->task,
    3844             :                     pcmk__node_name(history->node),
    3845             :                     services_ocf_exitcode_str(history->exit_status),
    3846             :                     (pcmk__str_empty(history->exit_reason)? "" : ": "),
    3847             :                     pcmk__s(history->exit_reason, ""),
    3848             :                     last_change_s, history->exit_status, history->id);
    3849             : 
    3850           0 :     free(last_change_s);
    3851             : 
    3852           0 :     pcmk__clear_rsc_flags(history->rsc, pcmk_rsc_managed);
    3853           0 :     pcmk__set_rsc_flags(history->rsc, pcmk_rsc_blocked);
    3854             : }
    3855             : 
    3856             : /*!
    3857             :  * \internal
    3858             :  * \brief Update action history's execution status and why
    3859             :  *
    3860             :  * \param[in,out] history  Parsed action history entry
    3861             :  * \param[out]    why      Where to store reason for update
    3862             :  * \param[in]     value    New value
    3863             :  * \param[in]     reason   Description of why value was changed
    3864             :  */
    3865             : static inline void
    3866           0 : remap_because(struct action_history *history, const char **why, int value,
    3867             :               const char *reason)
    3868             : {
    3869           0 :     if (history->execution_status != value) {
    3870           0 :         history->execution_status = value;
    3871           0 :         *why = reason;
    3872             :     }
    3873           0 : }
    3874             : 
    3875             : /*!
    3876             :  * \internal
    3877             :  * \brief Remap informational monitor results and operation status
    3878             :  *
    3879             :  * For the monitor results, certain OCF codes are for providing extended information
    3880             :  * to the user about services that aren't yet failed but not entirely healthy either.
    3881             :  * These must be treated as the "normal" result by Pacemaker.
    3882             :  *
    3883             :  * For operation status, the action result can be used to determine an appropriate
    3884             :  * status for the purposes of responding to the action.  The status provided by the
    3885             :  * executor is not directly usable since the executor does not know what was expected.
    3886             :  *
    3887             :  * \param[in,out] history  Parsed action history entry
    3888             :  * \param[in,out] on_fail  What should be done about the result
    3889             :  * \param[in]     expired  Whether result is expired
    3890             :  *
    3891             :  * \note If the result is remapped and the node is not shutting down or failed,
    3892             :  *       the operation will be recorded in the scheduler data's list of failed
    3893             :  *       operations to highlight it for the user.
    3894             :  *
    3895             :  * \note This may update the resource's current and next role.
    3896             :  */
    3897             : static void
    3898           0 : remap_operation(struct action_history *history,
    3899             :                 enum action_fail_response *on_fail, bool expired)
    3900             : {
    3901           0 :     bool is_probe = false;
    3902           0 :     int orig_exit_status = history->exit_status;
    3903           0 :     int orig_exec_status = history->execution_status;
    3904           0 :     const char *why = NULL;
    3905           0 :     const char *task = history->task;
    3906             : 
    3907             :     // Remap degraded results to their successful counterparts
    3908           0 :     history->exit_status = pcmk__effective_rc(history->exit_status);
    3909           0 :     if (history->exit_status != orig_exit_status) {
    3910           0 :         why = "degraded result";
    3911           0 :         if (!expired && (!history->node->details->shutdown
    3912           0 :                          || history->node->details->online)) {
    3913           0 :             record_failed_op(history);
    3914             :         }
    3915             :     }
    3916             : 
    3917           0 :     if (!pcmk__is_bundled(history->rsc)
    3918           0 :         && pcmk_xe_mask_probe_failure(history->xml)
    3919           0 :         && ((history->execution_status != PCMK_EXEC_DONE)
    3920           0 :             || (history->exit_status != PCMK_OCF_NOT_RUNNING))) {
    3921           0 :         history->execution_status = PCMK_EXEC_DONE;
    3922           0 :         history->exit_status = PCMK_OCF_NOT_RUNNING;
    3923           0 :         why = "equivalent probe result";
    3924             :     }
    3925             : 
    3926             :     /* If the executor reported an execution status of anything but done or
    3927             :      * error, consider that final. But for done or error, we know better whether
    3928             :      * it should be treated as a failure or not, because we know the expected
    3929             :      * result.
    3930             :      */
    3931           0 :     switch (history->execution_status) {
    3932           0 :         case PCMK_EXEC_DONE:
    3933             :         case PCMK_EXEC_ERROR:
    3934           0 :             break;
    3935             : 
    3936             :         // These should be treated as node-fatal
    3937           0 :         case PCMK_EXEC_NO_FENCE_DEVICE:
    3938             :         case PCMK_EXEC_NO_SECRETS:
    3939           0 :             remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
    3940             :                           "node-fatal error");
    3941           0 :             goto remap_done;
    3942             : 
    3943           0 :         default:
    3944           0 :             goto remap_done;
    3945             :     }
    3946             : 
    3947           0 :     is_probe = pcmk_xe_is_probe(history->xml);
    3948           0 :     if (is_probe) {
    3949           0 :         task = "probe";
    3950             :     }
    3951             : 
    3952           0 :     if (history->expected_exit_status < 0) {
    3953             :         /* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with
    3954             :          * Heartbeat 2.0.7 or earlier as the cluster layer, did not include the
    3955             :          * expected exit status in the transition key, which (along with the
    3956             :          * similar case of a corrupted transition key in the CIB) will be
    3957             :          * reported to this function as -1. Pacemaker 2.0+ does not support
    3958             :          * rolling upgrades from those versions or processing of saved CIB files
    3959             :          * from those versions, so we do not need to care much about this case.
    3960             :          */
    3961           0 :         remap_because(history, &why, PCMK_EXEC_ERROR,
    3962             :                       "obsolete history format");
    3963           0 :         pcmk__config_warn("Expected result not found for %s on %s "
    3964             :                           "(corrupt or obsolete CIB?)",
    3965             :                           history->key, pcmk__node_name(history->node));
    3966             : 
    3967           0 :     } else if (history->exit_status == history->expected_exit_status) {
    3968           0 :         remap_because(history, &why, PCMK_EXEC_DONE, "expected result");
    3969             : 
    3970             :     } else {
    3971           0 :         remap_because(history, &why, PCMK_EXEC_ERROR, "unexpected result");
    3972           0 :         pcmk__rsc_debug(history->rsc,
    3973             :                         "%s on %s: expected %d (%s), got %d (%s%s%s)",
    3974             :                         history->key, pcmk__node_name(history->node),
    3975             :                         history->expected_exit_status,
    3976             :                         services_ocf_exitcode_str(history->expected_exit_status),
    3977             :                         history->exit_status,
    3978             :                         services_ocf_exitcode_str(history->exit_status),
    3979             :                         (pcmk__str_empty(history->exit_reason)? "" : ": "),
    3980             :                         pcmk__s(history->exit_reason, ""));
    3981             :     }
    3982             : 
    3983           0 :     switch (history->exit_status) {
    3984           0 :         case PCMK_OCF_OK:
    3985           0 :             if (is_probe
    3986           0 :                 && (history->expected_exit_status == PCMK_OCF_NOT_RUNNING)) {
    3987           0 :                 char *last_change_s = last_change_str(history->xml);
    3988             : 
    3989           0 :                 remap_because(history, &why, PCMK_EXEC_DONE, "probe");
    3990           0 :                 pcmk__rsc_info(history->rsc,
    3991             :                                "Probe found %s active on %s at %s",
    3992             :                                history->rsc->id, pcmk__node_name(history->node),
    3993             :                                last_change_s);
    3994           0 :                 free(last_change_s);
    3995             :             }
    3996           0 :             break;
    3997             : 
    3998           0 :         case PCMK_OCF_NOT_RUNNING:
    3999           0 :             if (is_probe
    4000           0 :                 || (history->expected_exit_status == history->exit_status)
    4001           0 :                 || !pcmk_is_set(history->rsc->flags, pcmk_rsc_managed)) {
    4002             : 
    4003             :                 /* For probes, recurring monitors for the Stopped role, and
    4004             :                  * unmanaged resources, "not running" is not considered a
    4005             :                  * failure.
    4006             :                  */
    4007           0 :                 remap_because(history, &why, PCMK_EXEC_DONE, "exit status");
    4008           0 :                 history->rsc->role = pcmk_role_stopped;
    4009           0 :                 *on_fail = pcmk_on_fail_ignore;
    4010           0 :                 pe__set_next_role(history->rsc, pcmk_role_unknown,
    4011             :                                   "not running");
    4012             :             }
    4013           0 :             break;
    4014             : 
    4015           0 :         case PCMK_OCF_RUNNING_PROMOTED:
    4016           0 :             if (is_probe
    4017           0 :                 && (history->exit_status != history->expected_exit_status)) {
    4018           0 :                 char *last_change_s = last_change_str(history->xml);
    4019             : 
    4020           0 :                 remap_because(history, &why, PCMK_EXEC_DONE, "probe");
    4021           0 :                 pcmk__rsc_info(history->rsc,
    4022             :                                "Probe found %s active and promoted on %s at %s",
    4023             :                                 history->rsc->id,
    4024             :                                 pcmk__node_name(history->node), last_change_s);
    4025           0 :                 free(last_change_s);
    4026             :             }
    4027           0 :             if (!expired
    4028           0 :                 || (history->exit_status == history->expected_exit_status)) {
    4029           0 :                 history->rsc->role = pcmk_role_promoted;
    4030             :             }
    4031           0 :             break;
    4032             : 
    4033           0 :         case PCMK_OCF_FAILED_PROMOTED:
    4034           0 :             if (!expired) {
    4035           0 :                 history->rsc->role = pcmk_role_promoted;
    4036             :             }
    4037           0 :             remap_because(history, &why, PCMK_EXEC_ERROR, "exit status");
    4038           0 :             break;
    4039             : 
    4040           0 :         case PCMK_OCF_NOT_CONFIGURED:
    4041           0 :             remap_because(history, &why, PCMK_EXEC_ERROR_FATAL, "exit status");
    4042           0 :             break;
    4043             : 
    4044           0 :         case PCMK_OCF_UNIMPLEMENT_FEATURE:
    4045             :             {
    4046           0 :                 guint interval_ms = 0;
    4047           0 :                 crm_element_value_ms(history->xml, PCMK_META_INTERVAL,
    4048             :                                      &interval_ms);
    4049             : 
    4050           0 :                 if (interval_ms == 0) {
    4051           0 :                     if (!expired) {
    4052           0 :                         block_if_unrecoverable(history);
    4053             :                     }
    4054           0 :                     remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
    4055             :                                   "exit status");
    4056             :                 } else {
    4057           0 :                     remap_because(history, &why, PCMK_EXEC_NOT_SUPPORTED,
    4058             :                                   "exit status");
    4059             :                 }
    4060             :             }
    4061           0 :             break;
    4062             : 
    4063           0 :         case PCMK_OCF_NOT_INSTALLED:
    4064             :         case PCMK_OCF_INVALID_PARAM:
    4065             :         case PCMK_OCF_INSUFFICIENT_PRIV:
    4066           0 :             if (!expired) {
    4067           0 :                 block_if_unrecoverable(history);
    4068             :             }
    4069           0 :             remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status");
    4070           0 :             break;
    4071             : 
    4072           0 :         default:
    4073           0 :             if (history->execution_status == PCMK_EXEC_DONE) {
    4074           0 :                 char *last_change_s = last_change_str(history->xml);
    4075             : 
    4076           0 :                 crm_info("Treating unknown exit status %d from %s of %s "
    4077             :                          "on %s at %s as failure",
    4078             :                          history->exit_status, task, history->rsc->id,
    4079             :                          pcmk__node_name(history->node), last_change_s);
    4080           0 :                 remap_because(history, &why, PCMK_EXEC_ERROR,
    4081             :                               "unknown exit status");
    4082           0 :                 free(last_change_s);
    4083             :             }
    4084           0 :             break;
    4085             :     }
    4086             : 
    4087           0 : remap_done:
    4088           0 :     if (why != NULL) {
    4089           0 :         pcmk__rsc_trace(history->rsc,
    4090             :                         "Remapped %s result from [%s: %s] to [%s: %s] "
    4091             :                         "because of %s",
    4092             :                         history->key, pcmk_exec_status_str(orig_exec_status),
    4093             :                         crm_exit_str(orig_exit_status),
    4094             :                         pcmk_exec_status_str(history->execution_status),
    4095             :                         crm_exit_str(history->exit_status), why);
    4096             :     }
    4097           0 : }
    4098             : 
    4099             : // return TRUE if start or monitor last failure but parameters changed
    4100             : static bool
    4101           0 : should_clear_for_param_change(const xmlNode *xml_op, const char *task,
    4102             :                               pcmk_resource_t *rsc, pcmk_node_t *node)
    4103             : {
    4104           0 :     if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_MONITOR, NULL)) {
    4105           0 :         if (pe__bundle_needs_remote_name(rsc)) {
    4106             :             /* We haven't allocated resources yet, so we can't reliably
    4107             :              * substitute addr parameters for the REMOTE_CONTAINER_HACK.
    4108             :              * When that's needed, defer the check until later.
    4109             :              */
    4110           0 :             pe__add_param_check(xml_op, rsc, node, pcmk__check_last_failure,
    4111             :                                 rsc->cluster);
    4112             : 
    4113             :         } else {
    4114           0 :             pcmk__op_digest_t *digest_data = NULL;
    4115             : 
    4116           0 :             digest_data = rsc_action_digest_cmp(rsc, xml_op, node,
    4117             :                                                 rsc->cluster);
    4118           0 :             switch (digest_data->rc) {
    4119           0 :                 case pcmk__digest_unknown:
    4120           0 :                     crm_trace("Resource %s history entry %s on %s"
    4121             :                               " has no digest to compare",
    4122             :                               rsc->id, pcmk__xe_history_key(xml_op),
    4123             :                               node->details->id);
    4124           0 :                     break;
    4125           0 :                 case pcmk__digest_match:
    4126           0 :                     break;
    4127           0 :                 default:
    4128           0 :                     return TRUE;
    4129             :             }
    4130             :         }
    4131             :     }
    4132           0 :     return FALSE;
    4133             : }
    4134             : 
    4135             : // Order action after fencing of remote node, given connection rsc
    4136             : static void
    4137           0 : order_after_remote_fencing(pcmk_action_t *action, pcmk_resource_t *remote_conn,
    4138             :                            pcmk_scheduler_t *scheduler)
    4139             : {
    4140           0 :     pcmk_node_t *remote_node = pcmk_find_node(scheduler, remote_conn->id);
    4141             : 
    4142           0 :     if (remote_node) {
    4143           0 :         pcmk_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL,
    4144             :                                            FALSE, scheduler);
    4145             : 
    4146           0 :         order_actions(fence, action, pcmk__ar_first_implies_then);
    4147             :     }
    4148           0 : }
    4149             : 
    4150             : static bool
    4151           0 : should_ignore_failure_timeout(const pcmk_resource_t *rsc, const char *task,
    4152             :                               guint interval_ms, bool is_last_failure)
    4153             : {
    4154             :     /* Clearing failures of recurring monitors has special concerns. The
    4155             :      * executor reports only changes in the monitor result, so if the
    4156             :      * monitor is still active and still getting the same failure result,
    4157             :      * that will go undetected after the failure is cleared.
    4158             :      *
    4159             :      * Also, the operation history will have the time when the recurring
    4160             :      * monitor result changed to the given code, not the time when the
    4161             :      * result last happened.
    4162             :      *
    4163             :      * @TODO We probably should clear such failures only when the failure
    4164             :      * timeout has passed since the last occurrence of the failed result.
    4165             :      * However we don't record that information. We could maybe approximate
    4166             :      * that by clearing only if there is a more recent successful monitor or
    4167             :      * stop result, but we don't even have that information at this point
    4168             :      * since we are still unpacking the resource's operation history.
    4169             :      *
    4170             :      * This is especially important for remote connection resources with a
    4171             :      * reconnect interval, so in that case, we skip clearing failures
    4172             :      * if the remote node hasn't been fenced.
    4173             :      */
    4174           0 :     if (rsc->remote_reconnect_ms
    4175           0 :         && pcmk_is_set(rsc->cluster->flags, pcmk_sched_fencing_enabled)
    4176           0 :         && (interval_ms != 0)
    4177           0 :         && pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
    4178             : 
    4179           0 :         pcmk_node_t *remote_node = pcmk_find_node(rsc->cluster, rsc->id);
    4180             : 
    4181           0 :         if (remote_node && !remote_node->details->remote_was_fenced) {
    4182           0 :             if (is_last_failure) {
    4183           0 :                 crm_info("Waiting to clear monitor failure for remote node %s"
    4184             :                          " until fencing has occurred", rsc->id);
    4185             :             }
    4186           0 :             return TRUE;
    4187             :         }
    4188             :     }
    4189           0 :     return FALSE;
    4190             : }
    4191             : 
    4192             : /*!
    4193             :  * \internal
    4194             :  * \brief Check operation age and schedule failure clearing when appropriate
    4195             :  *
    4196             :  * This function has two distinct purposes. The first is to check whether an
    4197             :  * operation history entry is expired (i.e. the resource has a failure timeout,
    4198             :  * the entry is older than the timeout, and the resource either has no fail
    4199             :  * count or its fail count is entirely older than the timeout). The second is to
    4200             :  * schedule fail count clearing when appropriate (i.e. the operation is expired
    4201             :  * and either the resource has an expired fail count or the operation is a
    4202             :  * last_failure for a remote connection resource with a reconnect interval,
    4203             :  * or the operation is a last_failure for a start or monitor operation and the
    4204             :  * resource's parameters have changed since the operation).
    4205             :  *
    4206             :  * \param[in,out] history  Parsed action result history
    4207             :  *
    4208             :  * \return true if operation history entry is expired, otherwise false
    4209             :  */
    4210             : static bool
    4211           0 : check_operation_expiry(struct action_history *history)
    4212             : {
    4213           0 :     bool expired = false;
    4214           0 :     bool is_last_failure = pcmk__ends_with(history->id, "_last_failure_0");
    4215           0 :     time_t last_run = 0;
    4216           0 :     int unexpired_fail_count = 0;
    4217           0 :     const char *clear_reason = NULL;
    4218             : 
    4219           0 :     if (history->execution_status == PCMK_EXEC_NOT_INSTALLED) {
    4220           0 :         pcmk__rsc_trace(history->rsc,
    4221             :                         "Resource history entry %s on %s is not expired: "
    4222             :                         "Not Installed does not expire",
    4223             :                         history->id, pcmk__node_name(history->node));
    4224           0 :         return false; // "Not installed" must always be cleared manually
    4225             :     }
    4226             : 
    4227           0 :     if ((history->rsc->failure_timeout > 0)
    4228           0 :         && (crm_element_value_epoch(history->xml, PCMK_XA_LAST_RC_CHANGE,
    4229             :                                     &last_run) == 0)) {
    4230             : 
    4231             :         /* Resource has a PCMK_META_FAILURE_TIMEOUT and history entry has a
    4232             :          * timestamp
    4233             :          */
    4234             : 
    4235           0 :         time_t now = get_effective_time(history->rsc->cluster);
    4236           0 :         time_t last_failure = 0;
    4237             : 
    4238             :         // Is this particular operation history older than the failure timeout?
    4239           0 :         if ((now >= (last_run + history->rsc->failure_timeout))
    4240           0 :             && !should_ignore_failure_timeout(history->rsc, history->task,
    4241             :                                               history->interval_ms,
    4242             :                                               is_last_failure)) {
    4243           0 :             expired = true;
    4244             :         }
    4245             : 
    4246             :         // Does the resource as a whole have an unexpired fail count?
    4247           0 :         unexpired_fail_count = pe_get_failcount(history->node, history->rsc,
    4248             :                                                 &last_failure,
    4249             :                                                 pcmk__fc_effective,
    4250           0 :                                                 history->xml);
    4251             : 
    4252             :         // Update scheduler recheck time according to *last* failure
    4253           0 :         crm_trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d timeout=%ds"
    4254             :                   " last-failure@%lld",
    4255             :                   history->id, (long long) last_run, (expired? "" : "not "),
    4256             :                   (long long) now, unexpired_fail_count,
    4257             :                   history->rsc->failure_timeout, (long long) last_failure);
    4258           0 :         last_failure += history->rsc->failure_timeout + 1;
    4259           0 :         if (unexpired_fail_count && (now < last_failure)) {
    4260           0 :             pe__update_recheck_time(last_failure, history->rsc->cluster,
    4261             :                                     "fail count expiration");
    4262             :         }
    4263             :     }
    4264             : 
    4265           0 :     if (expired) {
    4266           0 :         if (pe_get_failcount(history->node, history->rsc, NULL,
    4267           0 :                              pcmk__fc_default, history->xml)) {
    4268             :             // There is a fail count ignoring timeout
    4269             : 
    4270           0 :             if (unexpired_fail_count == 0) {
    4271             :                 // There is no fail count considering timeout
    4272           0 :                 clear_reason = "it expired";
    4273             : 
    4274             :             } else {
    4275             :                 /* This operation is old, but there is an unexpired fail count.
    4276             :                  * In a properly functioning cluster, this should only be
    4277             :                  * possible if this operation is not a failure (otherwise the
    4278             :                  * fail count should be expired too), so this is really just a
    4279             :                  * failsafe.
    4280             :                  */
    4281           0 :                 pcmk__rsc_trace(history->rsc,
    4282             :                                 "Resource history entry %s on %s is not "
    4283             :                                 "expired: Unexpired fail count",
    4284             :                                 history->id, pcmk__node_name(history->node));
    4285           0 :                 expired = false;
    4286             :             }
    4287             : 
    4288           0 :         } else if (is_last_failure
    4289           0 :                    && (history->rsc->remote_reconnect_ms != 0)) {
    4290             :             /* Clear any expired last failure when reconnect interval is set,
    4291             :              * even if there is no fail count.
    4292             :              */
    4293           0 :             clear_reason = "reconnect interval is set";
    4294             :         }
    4295             :     }
    4296             : 
    4297           0 :     if (!expired && is_last_failure
    4298           0 :         && should_clear_for_param_change(history->xml, history->task,
    4299             :                                          history->rsc, history->node)) {
    4300           0 :         clear_reason = "resource parameters have changed";
    4301             :     }
    4302             : 
    4303           0 :     if (clear_reason != NULL) {
    4304           0 :         pcmk_action_t *clear_op = NULL;
    4305             : 
    4306             :         // Schedule clearing of the fail count
    4307           0 :         clear_op = pe__clear_failcount(history->rsc, history->node,
    4308           0 :                                        clear_reason, history->rsc->cluster);
    4309             : 
    4310           0 :         if (pcmk_is_set(history->rsc->cluster->flags,
    4311             :                         pcmk_sched_fencing_enabled)
    4312           0 :             && (history->rsc->remote_reconnect_ms != 0)) {
    4313             :             /* If we're clearing a remote connection due to a reconnect
    4314             :              * interval, we want to wait until any scheduled fencing
    4315             :              * completes.
    4316             :              *
    4317             :              * We could limit this to remote_node->details->unclean, but at
    4318             :              * this point, that's always true (it won't be reliable until
    4319             :              * after unpack_node_history() is done).
    4320             :              */
    4321           0 :             crm_info("Clearing %s failure will wait until any scheduled "
    4322             :                      "fencing of %s completes",
    4323             :                      history->task, history->rsc->id);
    4324           0 :             order_after_remote_fencing(clear_op, history->rsc,
    4325           0 :                                        history->rsc->cluster);
    4326             :         }
    4327             :     }
    4328             : 
    4329           0 :     if (expired && (history->interval_ms == 0)
    4330           0 :         && pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
    4331           0 :         switch (history->exit_status) {
    4332           0 :             case PCMK_OCF_OK:
    4333             :             case PCMK_OCF_NOT_RUNNING:
    4334             :             case PCMK_OCF_RUNNING_PROMOTED:
    4335             :             case PCMK_OCF_DEGRADED:
    4336             :             case PCMK_OCF_DEGRADED_PROMOTED:
    4337             :                 // Don't expire probes that return these values
    4338           0 :                 pcmk__rsc_trace(history->rsc,
    4339             :                                 "Resource history entry %s on %s is not "
    4340             :                                 "expired: Probe result",
    4341             :                              history->id, pcmk__node_name(history->node));
    4342           0 :                 expired = false;
    4343           0 :                 break;
    4344             :         }
    4345             :     }
    4346             : 
    4347           0 :     return expired;
    4348             : }
    4349             : 
    4350             : int
    4351           0 : pe__target_rc_from_xml(const xmlNode *xml_op)
    4352             : {
    4353           0 :     int target_rc = 0;
    4354           0 :     const char *key = crm_element_value(xml_op, PCMK__XA_TRANSITION_KEY);
    4355             : 
    4356           0 :     if (key == NULL) {
    4357           0 :         return -1;
    4358             :     }
    4359           0 :     decode_transition_key(key, NULL, NULL, NULL, &target_rc);
    4360           0 :     return target_rc;
    4361             : }
    4362             : 
    4363             : /*!
    4364             :  * \internal
    4365             :  * \brief Update a resource's state for an action result
    4366             :  *
    4367             :  * \param[in,out] history       Parsed action history entry
    4368             :  * \param[in]     exit_status   Exit status to base new state on
    4369             :  * \param[in]     last_failure  Resource's last_failure entry, if known
    4370             :  * \param[in,out] on_fail       Resource's current failure handling
    4371             :  */
    4372             : static void
    4373           0 : update_resource_state(struct action_history *history, int exit_status,
    4374             :                       const xmlNode *last_failure,
    4375             :                       enum action_fail_response *on_fail)
    4376             : {
    4377           0 :     bool clear_past_failure = false;
    4378             : 
    4379           0 :     if ((exit_status == PCMK_OCF_NOT_INSTALLED)
    4380           0 :         || (!pcmk__is_bundled(history->rsc)
    4381           0 :             && pcmk_xe_mask_probe_failure(history->xml))) {
    4382           0 :         history->rsc->role = pcmk_role_stopped;
    4383             : 
    4384           0 :     } else if (exit_status == PCMK_OCF_NOT_RUNNING) {
    4385           0 :         clear_past_failure = true;
    4386             : 
    4387           0 :     } else if (pcmk__str_eq(history->task, PCMK_ACTION_MONITOR,
    4388             :                             pcmk__str_none)) {
    4389           0 :         if ((last_failure != NULL)
    4390           0 :             && pcmk__str_eq(history->key, pcmk__xe_history_key(last_failure),
    4391             :                             pcmk__str_none)) {
    4392           0 :             clear_past_failure = true;
    4393             :         }
    4394           0 :         if (history->rsc->role < pcmk_role_started) {
    4395           0 :             set_active(history->rsc);
    4396             :         }
    4397             : 
    4398           0 :     } else if (pcmk__str_eq(history->task, PCMK_ACTION_START, pcmk__str_none)) {
    4399           0 :         history->rsc->role = pcmk_role_started;
    4400           0 :         clear_past_failure = true;
    4401             : 
    4402           0 :     } else if (pcmk__str_eq(history->task, PCMK_ACTION_STOP, pcmk__str_none)) {
    4403           0 :         history->rsc->role = pcmk_role_stopped;
    4404           0 :         clear_past_failure = true;
    4405             : 
    4406           0 :     } else if (pcmk__str_eq(history->task, PCMK_ACTION_PROMOTE,
    4407             :                             pcmk__str_none)) {
    4408           0 :         history->rsc->role = pcmk_role_promoted;
    4409           0 :         clear_past_failure = true;
    4410             : 
    4411           0 :     } else if (pcmk__str_eq(history->task, PCMK_ACTION_DEMOTE,
    4412             :                             pcmk__str_none)) {
    4413           0 :         if (*on_fail == pcmk_on_fail_demote) {
    4414             :             /* Demote clears an error only if
    4415             :              * PCMK_META_ON_FAIL=PCMK_VALUE_DEMOTE
    4416             :              */
    4417           0 :             clear_past_failure = true;
    4418             :         }
    4419           0 :         history->rsc->role = pcmk_role_unpromoted;
    4420             : 
    4421           0 :     } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_FROM,
    4422             :                             pcmk__str_none)) {
    4423           0 :         history->rsc->role = pcmk_role_started;
    4424           0 :         clear_past_failure = true;
    4425             : 
    4426           0 :     } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_TO,
    4427             :                             pcmk__str_none)) {
    4428           0 :         unpack_migrate_to_success(history);
    4429             : 
    4430           0 :     } else if (history->rsc->role < pcmk_role_started) {
    4431           0 :         pcmk__rsc_trace(history->rsc, "%s active on %s",
    4432             :                         history->rsc->id, pcmk__node_name(history->node));
    4433           0 :         set_active(history->rsc);
    4434             :     }
    4435             : 
    4436           0 :     if (!clear_past_failure) {
    4437           0 :         return;
    4438             :     }
    4439             : 
    4440           0 :     switch (*on_fail) {
    4441           0 :         case pcmk_on_fail_stop:
    4442             :         case pcmk_on_fail_ban:
    4443             :         case pcmk_on_fail_standby_node:
    4444             :         case pcmk_on_fail_fence_node:
    4445           0 :             pcmk__rsc_trace(history->rsc,
    4446             :                             "%s (%s) is not cleared by a completed %s",
    4447             :                             history->rsc->id, pcmk_on_fail_text(*on_fail),
    4448             :                             history->task);
    4449           0 :             break;
    4450             : 
    4451           0 :         case pcmk_on_fail_block:
    4452             :         case pcmk_on_fail_ignore:
    4453             :         case pcmk_on_fail_demote:
    4454             :         case pcmk_on_fail_restart:
    4455             :         case pcmk_on_fail_restart_container:
    4456           0 :             *on_fail = pcmk_on_fail_ignore;
    4457           0 :             pe__set_next_role(history->rsc, pcmk_role_unknown,
    4458             :                               "clear past failures");
    4459           0 :             break;
    4460             : 
    4461           0 :         case pcmk_on_fail_reset_remote:
    4462           0 :             if (history->rsc->remote_reconnect_ms == 0) {
    4463             :                 /* With no reconnect interval, the connection is allowed to
    4464             :                  * start again after the remote node is fenced and
    4465             :                  * completely stopped. (With a reconnect interval, we wait
    4466             :                  * for the failure to be cleared entirely before attempting
    4467             :                  * to reconnect.)
    4468             :                  */
    4469           0 :                 *on_fail = pcmk_on_fail_ignore;
    4470           0 :                 pe__set_next_role(history->rsc, pcmk_role_unknown,
    4471             :                                   "clear past failures and reset remote");
    4472             :             }
    4473           0 :             break;
    4474             :     }
    4475             : }
    4476             : 
    4477             : /*!
    4478             :  * \internal
    4479             :  * \brief Check whether a given history entry matters for resource state
    4480             :  *
    4481             :  * \param[in] history  Parsed action history entry
    4482             :  *
    4483             :  * \return true if action can affect resource state, otherwise false
    4484             :  */
    4485             : static inline bool
    4486           0 : can_affect_state(struct action_history *history)
    4487             : {
    4488             : #if 0
    4489             :     /* @COMPAT It might be better to parse only actions we know we're interested
    4490             :      * in, rather than exclude a couple we don't. However that would be a
    4491             :      * behavioral change that should be done at a major or minor series release.
    4492             :      * Currently, unknown operations can affect whether a resource is considered
    4493             :      * active and/or failed.
    4494             :      */
    4495             :      return pcmk__str_any_of(history->task, PCMK_ACTION_MONITOR,
    4496             :                              PCMK_ACTION_START, PCMK_ACTION_STOP,
    4497             :                              PCMK_ACTION_PROMOTE, PCMK_ACTION_DEMOTE,
    4498             :                              PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM,
    4499             :                              "asyncmon", NULL);
    4500             : #else
    4501           0 :      return !pcmk__str_any_of(history->task, PCMK_ACTION_NOTIFY,
    4502           0 :                               PCMK_ACTION_META_DATA, NULL);
    4503             : #endif
    4504             : }
    4505             : 
    4506             : /*!
    4507             :  * \internal
    4508             :  * \brief Unpack execution/exit status and exit reason from a history entry
    4509             :  *
    4510             :  * \param[in,out] history  Action history entry to unpack
    4511             :  *
    4512             :  * \return Standard Pacemaker return code
    4513             :  */
    4514             : static int
    4515           0 : unpack_action_result(struct action_history *history)
    4516             : {
    4517           0 :     if ((crm_element_value_int(history->xml, PCMK__XA_OP_STATUS,
    4518             :                                &(history->execution_status)) < 0)
    4519           0 :         || (history->execution_status < PCMK_EXEC_PENDING)
    4520           0 :         || (history->execution_status > PCMK_EXEC_MAX)
    4521           0 :         || (history->execution_status == PCMK_EXEC_CANCELLED)) {
    4522           0 :         pcmk__config_err("Ignoring resource history entry %s for %s on %s "
    4523             :                          "with invalid " PCMK__XA_OP_STATUS " '%s'",
    4524             :                          history->id, history->rsc->id,
    4525             :                          pcmk__node_name(history->node),
    4526             :                          pcmk__s(crm_element_value(history->xml,
    4527             :                                                    PCMK__XA_OP_STATUS),
    4528             :                                  ""));
    4529           0 :         return pcmk_rc_unpack_error;
    4530             :     }
    4531           0 :     if ((crm_element_value_int(history->xml, PCMK__XA_RC_CODE,
    4532             :                                &(history->exit_status)) < 0)
    4533           0 :         || (history->exit_status < 0) || (history->exit_status > CRM_EX_MAX)) {
    4534             : #if 0
    4535             :         /* @COMPAT We should ignore malformed entries, but since that would
    4536             :          * change behavior, it should be done at a major or minor series
    4537             :          * release.
    4538             :          */
    4539             :         pcmk__config_err("Ignoring resource history entry %s for %s on %s "
    4540             :                          "with invalid " PCMK__XA_RC_CODE " '%s'",
    4541             :                          history->id, history->rsc->id,
    4542             :                          pcmk__node_name(history->node),
    4543             :                          pcmk__s(crm_element_value(history->xml,
    4544             :                                                    PCMK__XA_RC_CODE),
    4545             :                                  ""));
    4546             :         return pcmk_rc_unpack_error;
    4547             : #else
    4548           0 :         history->exit_status = CRM_EX_ERROR;
    4549             : #endif
    4550             :     }
    4551           0 :     history->exit_reason = crm_element_value(history->xml, PCMK_XA_EXIT_REASON);
    4552           0 :     return pcmk_rc_ok;
    4553             : }
    4554             : 
    4555             : /*!
    4556             :  * \internal
    4557             :  * \brief Process an action history entry whose result expired
    4558             :  *
    4559             :  * \param[in,out] history           Parsed action history entry
    4560             :  * \param[in]     orig_exit_status  Action exit status before remapping
    4561             :  *
    4562             :  * \return Standard Pacemaker return code (in particular, pcmk_rc_ok means the
    4563             :  *         entry needs no further processing)
    4564             :  */
    4565             : static int
    4566           0 : process_expired_result(struct action_history *history, int orig_exit_status)
    4567             : {
    4568           0 :     if (!pcmk__is_bundled(history->rsc)
    4569           0 :         && pcmk_xe_mask_probe_failure(history->xml)
    4570           0 :         && (orig_exit_status != history->expected_exit_status)) {
    4571             : 
    4572           0 :         if (history->rsc->role <= pcmk_role_stopped) {
    4573           0 :             history->rsc->role = pcmk_role_unknown;
    4574             :         }
    4575           0 :         crm_trace("Ignoring resource history entry %s for probe of %s on %s: "
    4576             :                   "Masked failure expired",
    4577             :                   history->id, history->rsc->id,
    4578             :                   pcmk__node_name(history->node));
    4579           0 :         return pcmk_rc_ok;
    4580             :     }
    4581             : 
    4582           0 :     if (history->exit_status == history->expected_exit_status) {
    4583           0 :         return pcmk_rc_undetermined; // Only failures expire
    4584             :     }
    4585             : 
    4586           0 :     if (history->interval_ms == 0) {
    4587           0 :         crm_notice("Ignoring resource history entry %s for %s of %s on %s: "
    4588             :                    "Expired failure",
    4589             :                    history->id, history->task, history->rsc->id,
    4590             :                    pcmk__node_name(history->node));
    4591           0 :         return pcmk_rc_ok;
    4592             :     }
    4593             : 
    4594           0 :     if (history->node->details->online && !history->node->details->unclean) {
    4595             :         /* Reschedule the recurring action. schedule_cancel() won't work at
    4596             :          * this stage, so as a hacky workaround, forcibly change the restart
    4597             :          * digest so pcmk__check_action_config() does what we want later.
    4598             :          *
    4599             :          * @TODO We should skip this if there is a newer successful monitor.
    4600             :          *       Also, this causes rescheduling only if the history entry
    4601             :          *       has a PCMK__XA_OP_DIGEST (which the expire-non-blocked-failure
    4602             :          *       scheduler regression test doesn't, but that may not be a
    4603             :          *       realistic scenario in production).
    4604             :          */
    4605           0 :         crm_notice("Rescheduling %s-interval %s of %s on %s "
    4606             :                    "after failure expired",
    4607             :                    pcmk__readable_interval(history->interval_ms), history->task,
    4608             :                    history->rsc->id, pcmk__node_name(history->node));
    4609           0 :         crm_xml_add(history->xml, PCMK__XA_OP_RESTART_DIGEST,
    4610             :                     "calculated-failure-timeout");
    4611           0 :         return pcmk_rc_ok;
    4612             :     }
    4613             : 
    4614           0 :     return pcmk_rc_undetermined;
    4615             : }
    4616             : 
    4617             : /*!
    4618             :  * \internal
    4619             :  * \brief Process a masked probe failure
    4620             :  *
    4621             :  * \param[in,out] history           Parsed action history entry
    4622             :  * \param[in]     orig_exit_status  Action exit status before remapping
    4623             :  * \param[in]     last_failure      Resource's last_failure entry, if known
    4624             :  * \param[in,out] on_fail           Resource's current failure handling
    4625             :  */
    4626             : static void
    4627           0 : mask_probe_failure(struct action_history *history, int orig_exit_status,
    4628             :                    const xmlNode *last_failure,
    4629             :                    enum action_fail_response *on_fail)
    4630             : {
    4631           0 :     pcmk_resource_t *ban_rsc = history->rsc;
    4632             : 
    4633           0 :     if (!pcmk_is_set(history->rsc->flags, pcmk_rsc_unique)) {
    4634           0 :         ban_rsc = uber_parent(history->rsc);
    4635             :     }
    4636             : 
    4637           0 :     crm_notice("Treating probe result '%s' for %s on %s as 'not running'",
    4638             :                services_ocf_exitcode_str(orig_exit_status), history->rsc->id,
    4639             :                pcmk__node_name(history->node));
    4640           0 :     update_resource_state(history, history->expected_exit_status, last_failure,
    4641             :                           on_fail);
    4642           0 :     crm_xml_add(history->xml, PCMK_XA_UNAME, history->node->details->uname);
    4643             : 
    4644           0 :     record_failed_op(history);
    4645           0 :     resource_location(ban_rsc, history->node, -PCMK_SCORE_INFINITY,
    4646           0 :                       "masked-probe-failure", history->rsc->cluster);
    4647           0 : }
    4648             : 
    4649             : /*!
    4650             :  * \internal Check whether a given failure is for a given pending action
    4651             :  *
    4652             :  * \param[in] history       Parsed history entry for pending action
    4653             :  * \param[in] last_failure  Resource's last_failure entry, if known
    4654             :  *
    4655             :  * \return true if \p last_failure is failure of pending action in \p history,
    4656             :  *         otherwise false
    4657             :  * \note Both \p history and \p last_failure must come from the same
    4658             :  *       \c PCMK__XE_LRM_RESOURCE block, as node and resource are assumed to be
    4659             :  *       the same.
    4660             :  */
    4661             : static bool
    4662           0 : failure_is_newer(const struct action_history *history,
    4663             :                  const xmlNode *last_failure)
    4664             : {
    4665           0 :     guint failure_interval_ms = 0U;
    4666           0 :     long long failure_change = 0LL;
    4667           0 :     long long this_change = 0LL;
    4668             : 
    4669           0 :     if (last_failure == NULL) {
    4670           0 :         return false; // Resource has no last_failure entry
    4671             :     }
    4672             : 
    4673           0 :     if (!pcmk__str_eq(history->task,
    4674             :                       crm_element_value(last_failure, PCMK_XA_OPERATION),
    4675             :                       pcmk__str_none)) {
    4676           0 :         return false; // last_failure is for different action
    4677             :     }
    4678             : 
    4679           0 :     if ((crm_element_value_ms(last_failure, PCMK_META_INTERVAL,
    4680             :                               &failure_interval_ms) != pcmk_ok)
    4681           0 :         || (history->interval_ms != failure_interval_ms)) {
    4682           0 :         return false; // last_failure is for action with different interval
    4683             :     }
    4684             : 
    4685           0 :     if ((pcmk__scan_ll(crm_element_value(history->xml, PCMK_XA_LAST_RC_CHANGE),
    4686             :                        &this_change, 0LL) != pcmk_rc_ok)
    4687           0 :         || (pcmk__scan_ll(crm_element_value(last_failure,
    4688             :                                             PCMK_XA_LAST_RC_CHANGE),
    4689             :                           &failure_change, 0LL) != pcmk_rc_ok)
    4690           0 :         || (failure_change < this_change)) {
    4691           0 :         return false; // Failure is not known to be newer
    4692             :     }
    4693             : 
    4694           0 :     return true;
    4695             : }
    4696             : 
    4697             : /*!
    4698             :  * \internal
    4699             :  * \brief Update a resource's role etc. for a pending action
    4700             :  *
    4701             :  * \param[in,out] history       Parsed history entry for pending action
    4702             :  * \param[in]     last_failure  Resource's last_failure entry, if known
    4703             :  */
    4704             : static void
    4705           0 : process_pending_action(struct action_history *history,
    4706             :                        const xmlNode *last_failure)
    4707             : {
    4708             :     /* For recurring monitors, a failure is recorded only in RSC_last_failure_0,
    4709             :      * and there might be a RSC_monitor_INTERVAL entry with the last successful
    4710             :      * or pending result.
    4711             :      *
    4712             :      * If last_failure contains the failure of the pending recurring monitor
    4713             :      * we're processing here, and is newer, the action is no longer pending.
    4714             :      * (Pending results have call ID -1, which sorts last, so the last failure
    4715             :      * if any should be known.)
    4716             :      */
    4717           0 :     if (failure_is_newer(history, last_failure)) {
    4718           0 :         return;
    4719             :     }
    4720             : 
    4721           0 :     if (strcmp(history->task, PCMK_ACTION_START) == 0) {
    4722           0 :         pcmk__set_rsc_flags(history->rsc, pcmk_rsc_start_pending);
    4723           0 :         set_active(history->rsc);
    4724             : 
    4725           0 :     } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
    4726           0 :         history->rsc->role = pcmk_role_promoted;
    4727             : 
    4728           0 :     } else if ((strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0)
    4729           0 :                && history->node->details->unclean) {
    4730             :         /* A migrate_to action is pending on a unclean source, so force a stop
    4731             :          * on the target.
    4732             :          */
    4733           0 :         const char *migrate_target = NULL;
    4734           0 :         pcmk_node_t *target = NULL;
    4735             : 
    4736           0 :         migrate_target = crm_element_value(history->xml,
    4737             :                                            PCMK__META_MIGRATE_TARGET);
    4738           0 :         target = pcmk_find_node(history->rsc->cluster, migrate_target);
    4739           0 :         if (target != NULL) {
    4740           0 :             stop_action(history->rsc, target, FALSE);
    4741             :         }
    4742             :     }
    4743             : 
    4744           0 :     if (history->rsc->pending_task != NULL) {
    4745             :         /* There should never be multiple pending actions, but as a failsafe,
    4746             :          * just remember the first one processed for display purposes.
    4747             :          */
    4748           0 :         return;
    4749             :     }
    4750             : 
    4751           0 :     if (pcmk_is_probe(history->task, history->interval_ms)) {
    4752             :         /* Pending probes are currently never displayed, even if pending
    4753             :          * operations are requested. If we ever want to change that,
    4754             :          * enable the below and the corresponding part of
    4755             :          * native.c:native_pending_task().
    4756             :          */
    4757             : #if 0
    4758             :         history->rsc->pending_task = strdup("probe");
    4759             :         history->rsc->pending_node = history->node;
    4760             : #endif
    4761             :     } else {
    4762           0 :         history->rsc->pending_task = strdup(history->task);
    4763           0 :         history->rsc->pending_node = history->node;
    4764             :     }
    4765             : }
    4766             : 
    4767             : static void
    4768           0 : unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node, xmlNode *xml_op,
    4769             :               xmlNode **last_failure, enum action_fail_response *on_fail)
    4770             : {
    4771           0 :     int old_rc = 0;
    4772           0 :     bool expired = false;
    4773           0 :     pcmk_resource_t *parent = rsc;
    4774           0 :     enum rsc_role_e fail_role = pcmk_role_unknown;
    4775           0 :     enum action_fail_response failure_strategy = pcmk_on_fail_restart;
    4776             : 
    4777           0 :     struct action_history history = {
    4778             :         .rsc = rsc,
    4779             :         .node = node,
    4780             :         .xml = xml_op,
    4781             :         .execution_status = PCMK_EXEC_UNKNOWN,
    4782             :     };
    4783             : 
    4784           0 :     CRM_CHECK(rsc && node && xml_op, return);
    4785             : 
    4786           0 :     history.id = pcmk__xe_id(xml_op);
    4787           0 :     if (history.id == NULL) {
    4788           0 :         pcmk__config_err("Ignoring resource history entry for %s on %s "
    4789             :                          "without ID", rsc->id, pcmk__node_name(node));
    4790           0 :         return;
    4791             :     }
    4792             : 
    4793             :     // Task and interval
    4794           0 :     history.task = crm_element_value(xml_op, PCMK_XA_OPERATION);
    4795           0 :     if (history.task == NULL) {
    4796           0 :         pcmk__config_err("Ignoring resource history entry %s for %s on %s "
    4797             :                          "without " PCMK_XA_OPERATION,
    4798             :                          history.id, rsc->id, pcmk__node_name(node));
    4799           0 :         return;
    4800             :     }
    4801           0 :     crm_element_value_ms(xml_op, PCMK_META_INTERVAL, &(history.interval_ms));
    4802           0 :     if (!can_affect_state(&history)) {
    4803           0 :         pcmk__rsc_trace(rsc,
    4804             :                         "Ignoring resource history entry %s for %s on %s "
    4805             :                         "with irrelevant action '%s'",
    4806             :                         history.id, rsc->id, pcmk__node_name(node),
    4807             :                         history.task);
    4808           0 :         return;
    4809             :     }
    4810             : 
    4811           0 :     if (unpack_action_result(&history) != pcmk_rc_ok) {
    4812           0 :         return; // Error already logged
    4813             :     }
    4814             : 
    4815           0 :     history.expected_exit_status = pe__target_rc_from_xml(xml_op);
    4816           0 :     history.key = pcmk__xe_history_key(xml_op);
    4817           0 :     crm_element_value_int(xml_op, PCMK__XA_CALL_ID, &(history.call_id));
    4818             : 
    4819           0 :     pcmk__rsc_trace(rsc, "Unpacking %s (%s call %d on %s): %s (%s)",
    4820             :                     history.id, history.task, history.call_id,
    4821             :                     pcmk__node_name(node),
    4822             :                     pcmk_exec_status_str(history.execution_status),
    4823             :                     crm_exit_str(history.exit_status));
    4824             : 
    4825           0 :     if (node->details->unclean) {
    4826           0 :         pcmk__rsc_trace(rsc,
    4827             :                         "%s is running on %s, which is unclean (further action "
    4828             :                         "depends on value of stop's on-fail attribute)",
    4829             :                         rsc->id, pcmk__node_name(node));
    4830             :     }
    4831             : 
    4832           0 :     expired = check_operation_expiry(&history);
    4833           0 :     old_rc = history.exit_status;
    4834             : 
    4835           0 :     remap_operation(&history, on_fail, expired);
    4836             : 
    4837           0 :     if (expired && (process_expired_result(&history, old_rc) == pcmk_rc_ok)) {
    4838           0 :         goto done;
    4839             :     }
    4840             : 
    4841           0 :     if (!pcmk__is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) {
    4842           0 :         mask_probe_failure(&history, old_rc, *last_failure, on_fail);
    4843           0 :         goto done;
    4844             :     }
    4845             : 
    4846           0 :     if (!pcmk_is_set(rsc->flags, pcmk_rsc_unique)) {
    4847           0 :         parent = uber_parent(rsc);
    4848             :     }
    4849             : 
    4850           0 :     switch (history.execution_status) {
    4851           0 :         case PCMK_EXEC_PENDING:
    4852           0 :             process_pending_action(&history, *last_failure);
    4853           0 :             goto done;
    4854             : 
    4855           0 :         case PCMK_EXEC_DONE:
    4856           0 :             update_resource_state(&history, history.exit_status, *last_failure,
    4857             :                                   on_fail);
    4858           0 :             goto done;
    4859             : 
    4860           0 :         case PCMK_EXEC_NOT_INSTALLED:
    4861           0 :             unpack_failure_handling(&history, &failure_strategy, &fail_role);
    4862           0 :             if (failure_strategy == pcmk_on_fail_ignore) {
    4863           0 :                 crm_warn("Cannot ignore failed %s of %s on %s: "
    4864             :                          "Resource agent doesn't exist "
    4865             :                          CRM_XS " status=%d rc=%d id=%s",
    4866             :                          history.task, rsc->id, pcmk__node_name(node),
    4867             :                          history.execution_status, history.exit_status,
    4868             :                          history.id);
    4869             :                 /* Also for printing it as "FAILED" by marking it as
    4870             :                  * pcmk_rsc_failed later
    4871             :                  */
    4872           0 :                 *on_fail = pcmk_on_fail_ban;
    4873             :             }
    4874           0 :             resource_location(parent, node, -PCMK_SCORE_INFINITY,
    4875             :                               "hard-error", rsc->cluster);
    4876           0 :             unpack_rsc_op_failure(&history, failure_strategy, fail_role,
    4877             :                                   last_failure, on_fail);
    4878           0 :             goto done;
    4879             : 
    4880           0 :         case PCMK_EXEC_NOT_CONNECTED:
    4881           0 :             if (pcmk__is_pacemaker_remote_node(node)
    4882           0 :                 && pcmk_is_set(node->details->remote_rsc->flags,
    4883             :                                pcmk_rsc_managed)) {
    4884             :                 /* We should never get into a situation where a managed remote
    4885             :                  * connection resource is considered OK but a resource action
    4886             :                  * behind the connection gets a "not connected" status. But as a
    4887             :                  * fail-safe in case a bug or unusual circumstances do lead to
    4888             :                  * that, ensure the remote connection is considered failed.
    4889             :                  */
    4890           0 :                 pcmk__set_rsc_flags(node->details->remote_rsc,
    4891             :                                     pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
    4892             :             }
    4893           0 :             break; // Not done, do error handling
    4894             : 
    4895           0 :         case PCMK_EXEC_ERROR:
    4896             :         case PCMK_EXEC_ERROR_HARD:
    4897             :         case PCMK_EXEC_ERROR_FATAL:
    4898             :         case PCMK_EXEC_TIMEOUT:
    4899             :         case PCMK_EXEC_NOT_SUPPORTED:
    4900             :         case PCMK_EXEC_INVALID:
    4901           0 :             break; // Not done, do error handling
    4902             : 
    4903           0 :         default: // No other value should be possible at this point
    4904           0 :             break;
    4905             :     }
    4906             : 
    4907           0 :     unpack_failure_handling(&history, &failure_strategy, &fail_role);
    4908           0 :     if ((failure_strategy == pcmk_on_fail_ignore)
    4909           0 :         || ((failure_strategy == pcmk_on_fail_restart_container)
    4910           0 :             && (strcmp(history.task, PCMK_ACTION_STOP) == 0))) {
    4911             : 
    4912           0 :         char *last_change_s = last_change_str(xml_op);
    4913             : 
    4914           0 :         crm_warn("Pretending failed %s (%s%s%s) of %s on %s at %s succeeded "
    4915             :                  CRM_XS " %s",
    4916             :                  history.task, services_ocf_exitcode_str(history.exit_status),
    4917             :                  (pcmk__str_empty(history.exit_reason)? "" : ": "),
    4918             :                  pcmk__s(history.exit_reason, ""), rsc->id,
    4919             :                  pcmk__node_name(node), last_change_s, history.id);
    4920           0 :         free(last_change_s);
    4921             : 
    4922           0 :         update_resource_state(&history, history.expected_exit_status,
    4923             :                               *last_failure, on_fail);
    4924           0 :         crm_xml_add(xml_op, PCMK_XA_UNAME, node->details->uname);
    4925           0 :         pcmk__set_rsc_flags(rsc, pcmk_rsc_ignore_failure);
    4926             : 
    4927           0 :         record_failed_op(&history);
    4928             : 
    4929           0 :         if ((failure_strategy == pcmk_on_fail_restart_container)
    4930           0 :             && cmp_on_fail(*on_fail, pcmk_on_fail_restart) <= 0) {
    4931           0 :             *on_fail = failure_strategy;
    4932             :         }
    4933             : 
    4934             :     } else {
    4935           0 :         unpack_rsc_op_failure(&history, failure_strategy, fail_role,
    4936             :                               last_failure, on_fail);
    4937             : 
    4938           0 :         if (history.execution_status == PCMK_EXEC_ERROR_HARD) {
    4939           0 :             uint8_t log_level = LOG_ERR;
    4940             : 
    4941           0 :             if (history.exit_status == PCMK_OCF_NOT_INSTALLED) {
    4942           0 :                 log_level = LOG_NOTICE;
    4943             :             }
    4944           0 :             do_crm_log(log_level,
    4945             :                        "Preventing %s from restarting on %s because "
    4946             :                        "of hard failure (%s%s%s) " CRM_XS " %s",
    4947             :                        parent->id, pcmk__node_name(node),
    4948             :                        services_ocf_exitcode_str(history.exit_status),
    4949             :                        (pcmk__str_empty(history.exit_reason)? "" : ": "),
    4950             :                        pcmk__s(history.exit_reason, ""), history.id);
    4951           0 :             resource_location(parent, node, -PCMK_SCORE_INFINITY,
    4952             :                               "hard-error", rsc->cluster);
    4953             : 
    4954           0 :         } else if (history.execution_status == PCMK_EXEC_ERROR_FATAL) {
    4955           0 :             pcmk__sched_err("Preventing %s from restarting anywhere because "
    4956             :                             "of fatal failure (%s%s%s) " CRM_XS " %s",
    4957             :                             parent->id,
    4958             :                             services_ocf_exitcode_str(history.exit_status),
    4959             :                             (pcmk__str_empty(history.exit_reason)? "" : ": "),
    4960             :                             pcmk__s(history.exit_reason, ""), history.id);
    4961           0 :             resource_location(parent, NULL, -PCMK_SCORE_INFINITY,
    4962             :                               "fatal-error", rsc->cluster);
    4963             :         }
    4964             :     }
    4965             : 
    4966           0 : done:
    4967           0 :     pcmk__rsc_trace(rsc, "%s role on %s after %s is %s (next %s)",
    4968             :                     rsc->id, pcmk__node_name(node), history.id,
    4969             :                     pcmk_role_text(rsc->role),
    4970             :                     pcmk_role_text(rsc->next_role));
    4971             : }
    4972             : 
    4973             : static void
    4974           0 : add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node, bool overwrite,
    4975             :                pcmk_scheduler_t *scheduler)
    4976             : {
    4977           0 :     const char *cluster_name = NULL;
    4978             : 
    4979           0 :     pe_rule_eval_data_t rule_data = {
    4980             :         .node_hash = NULL,
    4981           0 :         .now = scheduler->now,
    4982             :         .match_data = NULL,
    4983             :         .rsc_data = NULL,
    4984             :         .op_data = NULL
    4985             :     };
    4986             : 
    4987           0 :     pcmk__insert_dup(node->details->attrs,
    4988           0 :                      CRM_ATTR_UNAME, node->details->uname);
    4989             : 
    4990           0 :     pcmk__insert_dup(node->details->attrs, CRM_ATTR_ID, node->details->id);
    4991           0 :     if (pcmk__str_eq(node->details->id, scheduler->dc_uuid, pcmk__str_casei)) {
    4992           0 :         scheduler->dc_node = node;
    4993           0 :         node->details->is_dc = TRUE;
    4994           0 :         pcmk__insert_dup(node->details->attrs,
    4995             :                          CRM_ATTR_IS_DC, PCMK_VALUE_TRUE);
    4996             :     } else {
    4997           0 :         pcmk__insert_dup(node->details->attrs,
    4998             :                          CRM_ATTR_IS_DC, PCMK_VALUE_FALSE);
    4999             :     }
    5000             : 
    5001           0 :     cluster_name = g_hash_table_lookup(scheduler->config_hash,
    5002             :                                        PCMK_OPT_CLUSTER_NAME);
    5003           0 :     if (cluster_name) {
    5004           0 :         pcmk__insert_dup(node->details->attrs, CRM_ATTR_CLUSTER_NAME,
    5005             :                          cluster_name);
    5006             :     }
    5007             : 
    5008           0 :     pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_INSTANCE_ATTRIBUTES, &rule_data,
    5009           0 :                                node->details->attrs, NULL, overwrite,
    5010             :                                scheduler);
    5011             : 
    5012           0 :     pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_UTILIZATION, &rule_data,
    5013           0 :                                node->details->utilization, NULL,
    5014             :                                FALSE, scheduler);
    5015             : 
    5016           0 :     if (pcmk__node_attr(node, CRM_ATTR_SITE_NAME, NULL,
    5017             :                         pcmk__rsc_node_current) == NULL) {
    5018           0 :         const char *site_name = pcmk__node_attr(node, "site-name", NULL,
    5019             :                                                 pcmk__rsc_node_current);
    5020             : 
    5021           0 :         if (site_name) {
    5022           0 :             pcmk__insert_dup(node->details->attrs,
    5023             :                              CRM_ATTR_SITE_NAME, site_name);
    5024             : 
    5025           0 :         } else if (cluster_name) {
    5026             :             /* Default to cluster-name if unset */
    5027           0 :             pcmk__insert_dup(node->details->attrs,
    5028             :                              CRM_ATTR_SITE_NAME, cluster_name);
    5029             :         }
    5030             :     }
    5031           0 : }
    5032             : 
    5033             : static GList *
    5034           0 : extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
    5035             : {
    5036           0 :     int counter = -1;
    5037           0 :     int stop_index = -1;
    5038           0 :     int start_index = -1;
    5039             : 
    5040           0 :     xmlNode *rsc_op = NULL;
    5041             : 
    5042           0 :     GList *gIter = NULL;
    5043           0 :     GList *op_list = NULL;
    5044           0 :     GList *sorted_op_list = NULL;
    5045             : 
    5046             :     /* extract operations */
    5047           0 :     op_list = NULL;
    5048           0 :     sorted_op_list = NULL;
    5049             : 
    5050           0 :     for (rsc_op = pcmk__xe_first_child(rsc_entry, NULL, NULL, NULL);
    5051           0 :          rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op)) {
    5052             : 
    5053           0 :         if (pcmk__xe_is(rsc_op, PCMK__XE_LRM_RSC_OP)) {
    5054           0 :             crm_xml_add(rsc_op, PCMK_XA_RESOURCE, rsc);
    5055           0 :             crm_xml_add(rsc_op, PCMK_XA_UNAME, node);
    5056           0 :             op_list = g_list_prepend(op_list, rsc_op);
    5057             :         }
    5058             :     }
    5059             : 
    5060           0 :     if (op_list == NULL) {
    5061             :         /* if there are no operations, there is nothing to do */
    5062           0 :         return NULL;
    5063             :     }
    5064             : 
    5065           0 :     sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
    5066             : 
    5067             :     /* create active recurring operations as optional */
    5068           0 :     if (active_filter == FALSE) {
    5069           0 :         return sorted_op_list;
    5070             :     }
    5071             : 
    5072           0 :     op_list = NULL;
    5073             : 
    5074           0 :     calculate_active_ops(sorted_op_list, &start_index, &stop_index);
    5075             : 
    5076           0 :     for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
    5077           0 :         xmlNode *rsc_op = (xmlNode *) gIter->data;
    5078             : 
    5079           0 :         counter++;
    5080             : 
    5081           0 :         if (start_index < stop_index) {
    5082           0 :             crm_trace("Skipping %s: not active", pcmk__xe_id(rsc_entry));
    5083           0 :             break;
    5084             : 
    5085           0 :         } else if (counter < start_index) {
    5086           0 :             crm_trace("Skipping %s: old", pcmk__xe_id(rsc_op));
    5087           0 :             continue;
    5088             :         }
    5089           0 :         op_list = g_list_append(op_list, rsc_op);
    5090             :     }
    5091             : 
    5092           0 :     g_list_free(sorted_op_list);
    5093           0 :     return op_list;
    5094             : }
    5095             : 
    5096             : GList *
    5097           0 : find_operations(const char *rsc, const char *node, gboolean active_filter,
    5098             :                 pcmk_scheduler_t *scheduler)
    5099             : {
    5100           0 :     GList *output = NULL;
    5101           0 :     GList *intermediate = NULL;
    5102             : 
    5103           0 :     xmlNode *tmp = NULL;
    5104           0 :     xmlNode *status = pcmk__xe_first_child(scheduler->input, PCMK_XE_STATUS,
    5105             :                                            NULL, NULL);
    5106             : 
    5107           0 :     pcmk_node_t *this_node = NULL;
    5108             : 
    5109           0 :     xmlNode *node_state = NULL;
    5110             : 
    5111           0 :     CRM_CHECK(status != NULL, return NULL);
    5112             : 
    5113           0 :     for (node_state = pcmk__xe_first_child(status, NULL, NULL, NULL);
    5114           0 :          node_state != NULL; node_state = pcmk__xe_next(node_state)) {
    5115             : 
    5116           0 :         if (pcmk__xe_is(node_state, PCMK__XE_NODE_STATE)) {
    5117           0 :             const char *uname = crm_element_value(node_state, PCMK_XA_UNAME);
    5118             : 
    5119           0 :             if (node != NULL && !pcmk__str_eq(uname, node, pcmk__str_casei)) {
    5120           0 :                 continue;
    5121             :             }
    5122             : 
    5123           0 :             this_node = pcmk_find_node(scheduler, uname);
    5124           0 :             if(this_node == NULL) {
    5125           0 :                 CRM_LOG_ASSERT(this_node != NULL);
    5126           0 :                 continue;
    5127             : 
    5128           0 :             } else if (pcmk__is_pacemaker_remote_node(this_node)) {
    5129           0 :                 determine_remote_online_status(scheduler, this_node);
    5130             : 
    5131             :             } else {
    5132           0 :                 determine_online_status(node_state, this_node, scheduler);
    5133             :             }
    5134             : 
    5135           0 :             if (this_node->details->online
    5136           0 :                 || pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
    5137             :                 /* offline nodes run no resources...
    5138             :                  * unless stonith is enabled in which case we need to
    5139             :                  *   make sure rsc start events happen after the stonith
    5140             :                  */
    5141           0 :                 xmlNode *lrm_rsc = NULL;
    5142             : 
    5143           0 :                 tmp = pcmk__xe_first_child(node_state, PCMK__XE_LRM, NULL,
    5144             :                                            NULL);
    5145           0 :                 tmp = pcmk__xe_first_child(tmp, PCMK__XE_LRM_RESOURCES, NULL,
    5146             :                                            NULL);
    5147             : 
    5148           0 :                 for (lrm_rsc = pcmk__xe_first_child(tmp, NULL, NULL, NULL);
    5149           0 :                      lrm_rsc != NULL; lrm_rsc = pcmk__xe_next(lrm_rsc)) {
    5150             : 
    5151           0 :                     if (pcmk__xe_is(lrm_rsc, PCMK__XE_LRM_RESOURCE)) {
    5152           0 :                         const char *rsc_id = crm_element_value(lrm_rsc,
    5153             :                                                                PCMK_XA_ID);
    5154             : 
    5155           0 :                         if (rsc != NULL && !pcmk__str_eq(rsc_id, rsc, pcmk__str_casei)) {
    5156           0 :                             continue;
    5157             :                         }
    5158             : 
    5159           0 :                         intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
    5160           0 :                         output = g_list_concat(output, intermediate);
    5161             :                     }
    5162             :                 }
    5163             :             }
    5164             :         }
    5165             :     }
    5166             : 
    5167           0 :     return output;
    5168             : }

Generated by: LCOV version 1.14