Enable check for status after member starts up
[integration/test.git] / csit / libraries / ClusterManagement.robot
index 719a1c1d7466eb154435cb7d7964cfa794363ace..13e87b6cdef1b5d4ac28fbbedadfdb174f388294 100644 (file)
@@ -44,16 +44,23 @@ ${GC_LOG_PATH}    ${KARAF_HOME}/data/log
 ${JAVA_HOME}      ${EMPTY}    # releng/builder scripts should provide correct value
 ${JOLOKIA_CONF_SHARD_MANAGER_URI}    jolokia/read/org.opendaylight.controller:Category=ShardManager,name=shard-manager-config,type=DistributedConfigDatastore
 ${JOLOKIA_OPER_SHARD_MANAGER_URI}    jolokia/read/org.opendaylight.controller:Category=ShardManager,name=shard-manager-operational,type=DistributedOperationalDatastore
+${JOLOKIA_CONFIG_LOCAL_SHARDS_URI}    jolokia/read/org.opendaylight.controller:type=DistributedConfigDatastore,Category=ShardManager,name=shard-manager-config/LocalShards
+${JOLOKIA_OPER_LOCAL_SHARDS_URI}    jolokia/read/org.opendaylight.controller:type=DistributedOperationalDatastore,Category=ShardManager,name=shard-manager-operational/LocalShards
 ${JOLOKIA_READ_URI}    jolokia/read/org.opendaylight.controller
 # Bug 9044 workaround: delete etc/host.key before restart.
 @{ODL_DEFAULT_DATA_PATHS}    tmp/    data/    cache/    snapshots/    journal/    etc/opendaylight/current/    etc/host.key
 ${RESTCONF_MODULES_DIR}    ${CURDIR}/../variables/restconf/modules
-${SINGLETON_NETCONF_DEVICE_ID_PREFIX}    /odl-general-entity:entity[odl-general-entity:name='KeyedInstanceIdentifier{targetType=interface org.opendaylight.yang.gen.v1.urn.tbd.params.xml.ns.yang.network.topology.rev131021.network.topology.topology.Node, path=[org.opendaylight.yang.gen.v1.urn.tbd.params.xml.ns.yang.network.topology.rev131021.NetworkTopology, org.opendaylight.yang.gen.v1.urn.tbd.params.xml.ns.yang.network.topology.rev131021.network.topology.Topology[key=TopologyKey [_topologyId=Uri [_value=topology-netconf]]], org.opendaylight.yang.gen.v1.urn.tbd.params.xml.ns.yang.network.topology.rev131021.network.topology.topology.Node[key=NodeKey [_nodeId=Uri [_value=
-${SINGLETON_NETCONF_DEVICE_ID_SUFFIX}    ]]]]}']
+${SINGLETON_NETCONF_DEVICE_ID_PREFIX_OLD}    /odl-general-entity:entity[odl-general-entity:name='KeyedInstanceIdentifier{targetType=interface org.opendaylight.yang.gen.v1.urn.tbd.params.xml.ns.yang.network.topology.rev131021.network.topology.topology.Node, path=[org.opendaylight.yang.gen.v1.urn.tbd.params.xml.ns.yang.network.topology.rev131021.NetworkTopology, org.opendaylight.yang.gen.v1.urn.tbd.params.xml.ns.yang.network.topology.rev131021.network.topology.Topology[key=TopologyKey [_topologyId=Uri [_value=topology-netconf]]], org.opendaylight.yang.gen.v1.urn.tbd.params.xml.ns.yang.network.topology.rev131021.network.topology.topology.Node[key=NodeKey [_nodeId=Uri [_value=
+${SINGLETON_NETCONF_DEVICE_ID_SUFFIX_OLD}    ]]]]}']
+${SINGLETON_NETCONF_DEVICE_ID_PREFIX}    /odl-general-entity:entity[odl-general-entity:name='KeyedInstanceIdentifier{targetType=interface org.opendaylight.yang.gen.v1.urn.tbd.params.xml.ns.yang.network.topology.rev131021.network.topology.topology.Node, path=[org.opendaylight.yang.gen.v1.urn.tbd.params.xml.ns.yang.network.topology.rev131021.NetworkTopology, org.opendaylight.yang.gen.v1.urn.tbd.params.xml.ns.yang.network.topology.rev131021.network.topology.Topology[key=TopologyKey{_topologyId=Uri{_value=topology-netconf}}], org.opendaylight.yang.gen.v1.urn.tbd.params.xml.ns.yang.network.topology.rev131021.network.topology.topology.Node[key=NodeKey{_nodeId=Uri{_value=
+${SINGLETON_NETCONF_DEVICE_ID_SUFFIX}    }}]]}']
 ${SINGLETON_BGPCEP_DEVICE_ID_PREFIX}    /odl-general-entity:entity[odl-general-entity:name='
 ${SINGLETON_BGPCEP_DEVICE_ID_SUFFIX}    -service-group']
+${SINGLETON_SXP_DEVICE_ID_PREFIX}    /odl-general-entity:entity[odl-general-entity:name='
+${SINGLETON_SXP_DEVICE_ID_SUFFIX}    ']
 ${SINGLETON_ELECTION_ENTITY_TYPE}    org.opendaylight.mdsal.ServiceEntityType
 ${SINGLETON_CHANGE_OWNERSHIP_ENTITY_TYPE}    org.opendaylight.mdsal.AsyncServiceCloseEntityType
+${NODE_ROLE_INDEX_START}    1
 ${NODE_START_COMMAND}    ${KARAF_HOME}/bin/start
 ${NODE_STOP_COMMAND}    ${KARAF_HOME}/bin/stop
 ${NODE_KARAF_COUNT_COMMAND}    ps axf | grep org.apache.karaf | grep -v grep | wc -l
@@ -151,7 +158,8 @@ Get_Raft_Property_From_Shard_Member
     # TODO: Does the used URI tend to generate large data which floods log.html?
     BuiltIn.Run_Keyword_If    ${verify_restconf}    TemplatedRequests.Get_As_Json_Templated    session=${session}    folder=${RESTCONF_MODULES_DIR}    verify=False    http_timeout=${http_timeout}
     ${type_class} =    Resolve_Shard_Type_Class    shard_type=${shard_type}
-    ${uri} =    BuiltIn.Set_Variable    ${JOLOKIA_READ_URI}:Category=Shards,name=member-${member_index}-shard-${shard_name}-${shard_type},type=${type_class}
+    ${cluster_index} =    Evaluate    ${member_index}+${NODE_ROLE_INDEX_START}-1
+    ${uri} =    BuiltIn.Set_Variable    ${JOLOKIA_READ_URI}:Category=Shards,name=member-${cluster_index}-shard-${shard_name}-${shard_type},type=${type_class}
     ${data_text} =    TemplatedRequests.Get_As_Json_From_Uri    uri=${uri}    session=${session}    http_timeout=${http_timeout}
     ${data_object} =    RequestsLibrary.To_Json    ${data_text}
     ${value} =    Collections.Get_From_Dictionary    ${data_object}    value
@@ -251,11 +259,11 @@ Get_Owner_And_Candidates_For_Device_Singleton_Netconf
     ...    Parsing method is set as netconf (using netconf device id prefix and suffix)
     # Get election entity type results
     ${type} =    BuiltIn.Set_Variable    ${SINGLETON_ELECTION_ENTITY_TYPE}
-    ${id} =    BuiltIn.Set_Variable    ${SINGLETON_NETCONF_DEVICE_ID_PREFIX}${device_name}${SINGLETON_NETCONF_DEVICE_ID_SUFFIX}
+    ${id} =    CompareStream.Set_Variable_If_At_Least_Fluorine    ${SINGLETON_NETCONF_DEVICE_ID_PREFIX}${device_name}${SINGLETON_NETCONF_DEVICE_ID_SUFFIX}    ${SINGLETON_NETCONF_DEVICE_ID_PREFIX_OLD}${device_name}${SINGLETON_NETCONF_DEVICE_ID_SUFFIX_OLD}
     ${owner_1}    ${candidate_list_1} =    Get_Owner_And_Candidates_For_Type_And_Id    ${type}    ${id}    ${member_index}    http_timeout=${http_timeout}
     # Get change ownership entity type results
     ${type} =    BuiltIn.Set_Variable    ${SINGLETON_CHANGE_OWNERSHIP_ENTITY_TYPE}
-    ${id} =    BuiltIn.Set_Variable    ${SINGLETON_NETCONF_DEVICE_ID_PREFIX}${device_name}${SINGLETON_NETCONF_DEVICE_ID_SUFFIX}
+    ${id} =    CompareStream.Set_Variable_If_At_Least_Fluorine    ${SINGLETON_NETCONF_DEVICE_ID_PREFIX}${device_name}${SINGLETON_NETCONF_DEVICE_ID_SUFFIX}    ${SINGLETON_NETCONF_DEVICE_ID_PREFIX_OLD}${device_name}${SINGLETON_NETCONF_DEVICE_ID_SUFFIX_OLD}
     ${owner_2}    ${candidate_list_2} =    Get_Owner_And_Candidates_For_Type_And_Id    ${type}    ${id}    ${member_index}    http_timeout=${http_timeout}
     # Owners must be same, if not, there is still some election or change ownership in progress
     BuiltIn.Should_Be_Equal_As_Integers    ${owner_1}    ${owner_2}    Owners for device ${device_name} are not same
@@ -276,6 +284,21 @@ Get_Owner_And_Candidates_For_Device_Singleton_Bgpcep
     BuiltIn.Should_Be_Equal_As_Integers    ${owner_1}    ${owner_2}    Owners for device ${device_name} are not same
     [Return]    ${owner_1}    ${candidate_list_1}
 
+Get_Owner_And_Candidates_For_Device_Singleton_Sxp
+    [Arguments]    ${device_name}    ${member_index}    ${http_timeout}=${EMPTY}
+    [Documentation]    Returns the owner and a list of candidates for the SB device ${device_name}. Request is sent to member ${member_index}.
+    # Get election entity type results
+    ${type} =    BuiltIn.Set_Variable    ${SINGLETON_ELECTION_ENTITY_TYPE}
+    ${id} =    BuiltIn.Set_Variable    ${SINGLETON_SXP_DEVICE_ID_PREFIX}${device_name}${SINGLETON_SXP_DEVICE_ID_SUFFIX}
+    ${owner_1}    ${candidate_list_1} =    Get_Owner_And_Candidates_For_Type_And_Id    ${type}    ${id}    ${member_index}    http_timeout=${http_timeout}
+    # Get change ownership entity type results
+    ${type} =    BuiltIn.Set_Variable    ${SINGLETON_CHANGE_OWNERSHIP_ENTITY_TYPE}
+    ${id} =    BuiltIn.Set_Variable    ${SINGLETON_SXP_DEVICE_ID_PREFIX}${device_name}${SINGLETON_SXP_DEVICE_ID_SUFFIX}
+    ${owner_2}    ${candidate_list_2} =    Get_Owner_And_Candidates_For_Type_And_Id    ${type}    ${id}    ${member_index}    http_timeout=${http_timeout}
+    # Owners must be same, if not, there is still some election or change ownership in progress
+    BuiltIn.Should_Be_Equal_As_Integers    ${owner_1}    ${owner_2}    Owners for device ${device_name} are not same
+    [Return]    ${owner_1}    ${candidate_list_1}
+
 Get_Owner_And_Candidates_For_Device
     [Arguments]    ${device_name}    ${device_type}    ${member_index}    ${http_timeout}=${EMPTY}
     [Documentation]    Returns the owner and a list of candidates for the SB device ${device_name} of type ${device_type}. Request is sent to member ${member_index}.
@@ -398,15 +421,18 @@ Kill_Members_From_List_Or_All
     [Return]    ${updated_index_list}
 
 Stop_Single_Member
-    [Arguments]    ${member}    ${original_index_list}=${EMPTY}    ${confirm}=True
+    [Arguments]    ${member}    ${original_index_list}=${EMPTY}    ${confirm}=True    ${msg}=${EMPTY}
     [Documentation]    Convenience keyword that stops the specified member of the cluster.
     ...    The KW will return a list of available members: \${updated index_list}=\${original_index_list}-\${member}
     ${index_list} =    ClusterManagement__Build_List    ${member}
+    ${member_ip} =    Return_Member_IP    ${member}
+    ${msg} =    Builtin.Set Variable If    "${msg}" == "${EMPTY}"    Stopping ODL${member} ${member_ip}    Stopping ODL${member} ${member_ip}, ${msg}
+    KarafKeywords.Log_Message_To_Controller_Karaf    ${msg}
     ${updated_index_list} =    Stop_Members_From_List_Or_All    ${index_list}    ${original_index_list}    ${confirm}
     [Return]    ${updated_index_list}
 
 Stop_Members_From_List_Or_All
-    [Arguments]    ${member_index_list}=${EMPTY}    ${original_index_list}=${EMPTY}    ${confirm}=True    ${timeout}=120s
+    [Arguments]    ${member_index_list}=${EMPTY}    ${original_index_list}=${EMPTY}    ${confirm}=True    ${timeout}=240s
     [Documentation]    If the list is empty, stops all ODL instances. Otherwise stop members based on \${stop_index_list}
     ...    If \${confirm} is True, verify stopped instances are not there anymore.
     ...    The KW will return a list of available members: \${updated index_list}=\${original_index_list}-\${member_index_list}
@@ -422,13 +448,18 @@ Stop_Members_From_List_Or_All
     [Return]    ${updated_index_list}
 
 Start_Single_Member
-    [Arguments]    ${member}    ${wait_for_sync}=True    ${timeout}=300s
+    [Arguments]    ${member}    ${wait_for_sync}=True    ${timeout}=300s    ${msg}=${EMPTY}    ${check_system_status}=True    ${verify_restconf}=True
+    ...    ${service_list}=${EMPTY_LIST}
     [Documentation]    Convenience keyword that starts the specified member of the cluster.
     ${index_list} =    ClusterManagement__Build_List    ${member}
-    Start_Members_From_List_Or_All    ${index_list}    ${wait_for_sync}    ${timeout}
+    ${member_ip} =    Return_Member_IP    ${member}
+    ${msg} =    Builtin.Set Variable If    "${msg}" == "${EMPTY}"    Starting ODL${member} ${member_ip}    Starting ODL${member} ${member_ip}, ${msg}
+    KarafKeywords.Log_Message_To_Controller_Karaf    ${msg}
+    Start_Members_From_List_Or_All    ${index_list}    ${wait_for_sync}    ${timeout}    check_system_status=${check_system_status}    verify_restconf=${verify_restconf}    service_list=${service_list}
 
 Start_Members_From_List_Or_All
     [Arguments]    ${member_index_list}=${EMPTY}    ${wait_for_sync}=True    ${timeout}=300s    ${karaf_home}=${EMPTY}    ${export_java_home}=${EMPTY}    ${gc_log_dir}=${EMPTY}
+    ...    ${check_system_status}=True    ${verify_restconf}=True    ${service_list}=${EMPTY_LIST}
     [Documentation]    If the list is empty, start all cluster members. Otherwise, start members based on present indices.
     ...    If ${wait_for_sync}, wait for cluster sync on listed members.
     ...    Optionally karaf_home can be overriden. Optionally specific JAVA_HOME is used for starting.
@@ -439,11 +470,29 @@ Start_Members_From_List_Or_All
     ${gc_filepath} =    BuiltIn.Set_Variable_If    """${karaf_home}""" != ""    ${karaf_home}/data/log/gc_${epoch}.log    ${GC_LOG_PATH}/gc_${epoch}.log
     ${gc_options} =    BuiltIn.Set_Variable_If    "docker" not in """${node_start_command}"""    -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:${gc_filepath}    ${EMPTY}
     Run_Bash_Command_On_List_Or_All    command=${command} ${gc_options}    member_index_list=${member_index_list}
-    BuiltIn.Return_From_Keyword_If    not ${wait_for_sync}
-    BuiltIn.Wait_Until_Keyword_Succeeds    ${timeout}    10s    Check_Cluster_Is_In_Sync    member_index_list=${member_index_list}
-    # TODO: Do we also want to check Shard Leaders here?
+    BuiltIn.Wait_Until_Keyword_Succeeds    ${timeout}    10s    Verify_Members_Are_Ready    ${member_index_list}    ${wait_for_sync}    ${verify_restconf}
+    ...    ${check_system_status}    ${service_list}
     [Teardown]    Run_Bash_Command_On_List_Or_All    command=netstat -pnatu | grep 2550
 
+Verify_Members_Are_Ready
+    [Arguments]    ${member_index_list}    ${verify_cluster_sync}    ${verify_restconf}    ${verify_system_status}    ${service_list}
+    [Documentation]    Verifies the specified readiness conditions for the given listed members after startup.
+    ...    If ${verify_cluster_sync}, verifies the datastores have synced with the rest of the cluster.
+    ...    If ${verify_restconf}, verifies RESTCONF is available.
+    ...    If ${verify_system_status}, verifies the system services are OPERATIONAL.
+    BuiltIn.Run_Keyword_If    ${verify_cluster_sync}    Check_Cluster_Is_In_Sync    ${member_index_list}
+    BuiltIn.Run_Keyword_If    ${verify_restconf}    Verify_Restconf_Is_Available    ${member_index_list}
+    # for backward compatibility, some consumers might not be passing @{service_list}, but since we can't set a list to a default
+    # value, we need to check here if it's empty in order to skip the check which would throw an error
+    BuiltIn.Run_Keyword_If    ${verify_system_status} and ("${service_list}" != "[[]]")    ClusterManagement.Check Status Of Services Is OPERATIONAL    @{service_list}
+
+Verify_Restconf_Is_Available
+    [Arguments]    ${member_index_list}
+    ${index_list} =    List_Indices_Or_All    given_list=${member_index_list}
+    : FOR    ${index}    IN    @{index_list}
+    \    ${session} =    Resolve_Http_Session_For_Member    member_index=${index}
+    \    TemplatedRequests.Get_As_Json_Templated    session=${session}    folder=${RESTCONF_MODULES_DIR}    verify=False
+
 Freeze_Single_Member
     [Arguments]    ${member}
     [Documentation]    Convenience keyword that stops the specified member of the cluster by freezing the jvm.
@@ -829,3 +878,18 @@ Return_Member_IP
     ${member_int} =    BuiltIn.Convert_To_Integer    ${member_index}
     ${member_ip} =    Collections.Get_From_Dictionary    dictionary=${ClusterManagement__index_to_ip_mapping}    key=${member_int}
     [Return]    ${member_ip}
+
+Check Service Status
+    [Arguments]    ${odl_ip}    ${system_ready_state}    ${service_state}    @{service_list}
+    [Documentation]    Issues the karaf shell command showSvcStatus to verify the ready and service states are the same as the arguments passed
+    ${service_status_output} =    BuiltIn.Run Keyword If    ${NUM_ODL_SYSTEM} > 1    KarafKeywords.Issue_Command_On_Karaf_Console    showSvcStatus -n ${odl_ip}    ${odl_ip}    ${KARAF_SHELL_PORT}
+    ...    ELSE    KarafKeywords.Issue_Command_On_Karaf_Console    showSvcStatus    ${odl_ip}    ${KARAF_SHELL_PORT}
+    BuiltIn.Should Contain    ${service_status_output}    ${system_ready_state}
+    : FOR    ${service}    IN    @{service_list}
+    \    BuiltIn.Should Match Regexp    ${service_status_output}    ${service} +: ${service_state}
+
+Check Status Of Services Is OPERATIONAL
+    [Arguments]    @{service_list}
+    [Documentation]    This keyword will verify whether all the services are operational in all the ODL nodes
+    : FOR    ${i}    IN RANGE    ${NUM_ODL_SYSTEM}
+    \    ClusterManagement.Check Service Status    ${ODL_SYSTEM_${i+1}_IP}    ACTIVE    OPERATIONAL    @{service_list}