Refactor netconf stress suite
[integration/test.git] / csit / suites / netconf / clusteringscale / topology_owner_ha.robot
index 63a26a03b8ff58f1fe6e255260896e74a69d2ff4..ddde3a9dbcc744d2dce9a5c269fbdfeed3c085e7 100644 (file)
@@ -13,19 +13,20 @@ Documentation     Suite for High Availability testing netconf topology owner und
 ...
 ...               This suite uses a Python utility to continuously configure/deconfigure
 ...               device connections against devices simulated by testtool.
-...               The utility sends requests to the member which is Leader for topology config shard.
+...               The utility sends requests to the member which is leader for topology config shard.
 ...
 ...               To avoid excessive resource consumption, the utility deconfigures old devices.
 ...               In a stationary state, number of config items oscillates between
 ...               ${CONFIGURED_DEVICES_LIMIT} and 1 + ${CONFIGURED_DEVICES_LIMIT}.
 ...
 ...               The only tested HA event so far is reboot of the member
-...               which is Owner of netconf topology-manager entity.
-...               This suite assumes the Owner and the Leader are not co-located.
+...               which is the leader of entity-ownership operational shard.
+...               This suite assumes the entity-ownership operational shard leader and
+...               topology config shard leader are not co-located.
 ...
 ...               Number of devices is configurable, wait times are computed from that,
 ...               as it takes some time to initialize connections.
-...               Ideally, the utility should go through half of devices during Owner downtime.
+...               Ideally, the utility should go through half of devices during entity-ownership leader downtime.
 ...
 ...               If there is a period when netconf manager ignores deletions in config datastore,
 ...               the devices created previously could "leak", meaning the number of
@@ -46,10 +47,11 @@ Default Tags      @{TAGS_CRITICAL}
 Library           OperatingSystem
 Library           SSHLibrary    timeout=10s
 Library           String    # for Get_Regexp_Matches
+Resource          ${CURDIR}/../../../libraries/ClusterAdmin.robot
 Resource          ${CURDIR}/../../../libraries/ClusterManagement.robot
 Resource          ${CURDIR}/../../../libraries/KarafKeywords.robot
 Resource          ${CURDIR}/../../../libraries/NetconfKeywords.robot
-Resource          ${CURDIR}/../../../libraries/RemeoteBash.robot
+Resource          ${CURDIR}/../../../libraries/RemoteBash.robot
 Resource          ${CURDIR}/../../../libraries/SetupUtils.robot
 Resource          ${CURDIR}/../../../libraries/SSHKeywords.robot
 Resource          ${CURDIR}/../../../libraries/TemplatedRequests.robot
@@ -66,8 +68,8 @@ ${DEVICE_SET_SIZE}    30
 @{TAGS_NONCRITICAL}    clustering    netconf
 
 *** Test Cases ***
-Locate_Managers
-    [Documentation]    Detect location of Leader and Owner and store related data into suite variables.
+Setup_Leaders_Location
+    [Documentation]    Detect location of topology(config) and entity-ownership(operational) leaders and store related data into suite variables.
     ...    This cannot be part of Suite Setup, as Utils.Get_Index_From_List_Of_Dictionaries calls BuiltIn.Set_Test_Variable.
     ...    WUKS are used, as location failures are probably due to booting process, not bugs.
     ${topology_config_leader_index}    ${candidates} =    BuiltIn.Wait_Until_Keyword_Succeeds    3x    2s    ClusterManagement.Get_Leader_And_Followers_For_Shard    shard_name=topology
@@ -77,13 +79,12 @@ Locate_Managers
     BuiltIn.Set_Suite_Variable    \${topology_config_leader_ip}
     ${topology_config_leader_http_session} =    Resolve_Http_Session_For_Member    ${topology_config_leader_index}
     BuiltIn.Set_Suite_Variable    \${topology_config_leader_http_session}
-    ${netconf_manager_owner_index}    ${candidates} =    BuiltIn.Wait_Until_Keyword_Succeeds    3x    2s    ClusterManagement.Get_Owner_And_Candidates_For_Type_And_Id    type=topology-netconf
-    ...    id=/general-entity:entity[general-entity:name='topology-manager']    member_index=1
-    BuiltIn.Set_Suite_Variable    \${netconf_manager_owner_index}
-    ${netconf_manager_owner_ip} =    ClusterManagement.Resolve_Ip_Address_For_Member    ${netconf_manager_owner_index}
-    BuiltIn.Set_Suite_Variable    \${netconf_manager_owner_ip}
-    ${netconf_manager_owner_http_session} =    Resolve_Http_Session_For_Member    ${netconf_manager_owner_index}
-    BuiltIn.Set_Suite_Variable    \${netconf_manager_owner_http_session}
+    ${entity_ownership_leader_index}    Change_Entity_Ownership_Leader_If_Needed    ${topology_config_leader_index}
+    BuiltIn.Set_Suite_Variable    \${entity_ownership_leader_index}
+    ${entity_ownership_leader_ip} =    ClusterManagement.Resolve_Ip_Address_For_Member    ${entity_ownership_leader_index}
+    BuiltIn.Set_Suite_Variable    \${entity_ownership_leader_ip}
+    ${entity_ownership_leader_http_session} =    Resolve_Http_Session_For_Member    ${entity_ownership_leader_index}
+    BuiltIn.Set_Suite_Variable    \${entity_ownership_leader_http_session}
 
 Start_Testtool
     [Documentation]    Deploy and start test tool on its separate SSH session.
@@ -110,20 +111,20 @@ Wait_For_Config_Items
     ${timeout} =    Get_Typical_Time
     BuiltIn.Wait_Until_Keyword_Succeeds    ${timeout}    1s    Check_Config_Items_Lower_Bound
 
-Reboot_Manager_Owner
-    [Documentation]    Kill and restart member where netconf topology manager was, including removal of persisted data.
-    ...    After cluster sync, sleep additional time to ensure manager processes requests with the rebooted member fully rejoined.
+Reboot_Entity_Ownership_Leader
+    [Documentation]    Kill and restart member where entity-ownership shard leader was, including removal of persisted data.
+    ...    After cluster sync, sleep additional time to ensure entity-ownership shard processes requests with the rebooted member fully rejoined.
     [Tags]    @{TAGS_NONCRITICAL}    # To avoid long WUKS list expanded in log.html
-    ClusterManagement.Kill_Single_Member    ${netconf_manager_owner_index}
-    ${owner_list} =    BuiltIn.Create_List    ${netconf_manager_owner_index}
-    ClusterManagement.Start_Single_Member    ${netconf_manager_owner_index}
+    ClusterManagement.Kill_Single_Member    ${entity_ownership_leader_index}
+    ${owner_list} =    BuiltIn.Create_List    ${entity_ownership_leader_index}
+    ClusterManagement.Start_Single_Member    ${entity_ownership_leader_index}
     BuiltIn.Comment    FIXME: Replace sleep with WUKS when it becomes clear what to wait for.
     ${sleep_time} =    Get_Typical_Time    coefficient=3.0
     BuiltIn.Sleep    ${sleep_time}
 
 Stop_Configurer
     [Documentation]    Write ctrl+c, download the log, read its contents and match expected patterns.
-    RemeoteBash.Write_Bare_Ctrl_C
+    RemoteBash.Write_Bare_Ctrl_C
     ${output} =    SSHLibrary.Read_Until_Prompt
     BuiltIn.Log    ${output}
     SSHLibrary.Get_File    ${log_filename}
@@ -192,3 +193,16 @@ Get_Typical_Time
     [Arguments]    ${coefficient}=1.0
     [Documentation]    Return number of seconds typical for given scale variables.
     BuiltIn.Run_Keyword_And_Return    BuiltIn.Evaluate    ${coefficient} * ${CONNECTION_SLEEP} * ${CONFIGURED_DEVICES_LIMIT}
+
+Change_Entity_Ownership_Leader_If_Needed
+    [Arguments]    ${topology_config_leader_idx}
+    [Documentation]    Move entity-ownership (operational) shard leader if it is on the same node as topology (config) shard leader.
+    ...    TODO: move keyword to a common resource, e.g. ShardStability
+    ${entity_ownership_leader_index_old}    ${candidates} =    BuiltIn.Wait_Until_Keyword_Succeeds    3x    2s    ClusterManagement.Get_Leader_And_Followers_For_Shard    shard_name=entity-ownership
+    ...    shard_type=operational
+    BuiltIn.Return_From_Keyword_If    ${topology_config_leader_idx} != ${entity_ownership_leader_index_old}    ${entity_ownership_leader_index_old}
+    ${idx}=    Collections.Get_From_List    ${candidates}    0
+    ClusterAdmin.Make_Leader_Local    ${idx}    entity-ownership    operational
+    ${entity_ownership_leader_index}    ${candidates} =    BuiltIn.Wait_Until_Keyword_Succeeds    60s    3s    ClusterManagement.Verify_Shard_Leader_Elected    entity-ownership
+    ...    operational    ${True}    ${entity_ownership_leader_index_old}    verify_restconf=False
+    BuiltIn.Return_From_Keyword    ${entity_ownership_leader_index}