Netconf configuration stress HA suites
[integration/test.git] / csit / suites / netconf / clusteringscale / topology_leader_ha.robot
1 *** Settings ***
2 Documentation     Suite for High Availability testing config topology shard Leader under stress.
3 ...
4 ...               Copyright (c) 2016 Cisco Systems, Inc. and others. All rights reserved.
5 ...
6 ...               This program and the accompanying materials are made available under the
7 ...               terms of the Eclipse Public License v1.0 which accompanies this distribution,
8 ...               and is available at http://www.eclipse.org/legal/epl-v10.html
9 ...
10 ...
11 ...               This is close analogue of topology_owner_ha.robot, see Documentation there.
12 ...               The difference is that here the requests are sent towards Owner,
13 ...               and the Leader node is rebooted.
14 ...
15 ...               No real clustering Bugs are expected to be discovered by this suite,
16 ...               except maybe some Restconf ones.
17 ...               But as this suite was easy to create, it may as well be run.
18 Suite Setup       Setup_Everything
19 Suite Teardown    Teardown_Everything
20 Test Setup        SetupUtils.Setup_Test_With_Logging_And_Without_Fast_Failing
21 Test Teardown     ${DEFAULT_TEARDOWN_KEYWORD}
22 Default Tags      @{TAGS_CRITICAL}
23 Library           OperatingSystem
24 Library           SSHLibrary    timeout=10s
25 Library           String    # for Get_Regexp_Matches
26 Resource          ${CURDIR}/../../../libraries/ClusterManagement.robot
27 Resource          ${CURDIR}/../../../libraries/KarafKeywords.robot
28 Resource          ${CURDIR}/../../../libraries/NetconfKeywords.robot
29 Resource          ${CURDIR}/../../../libraries/SetupUtils.robot
30 Resource          ${CURDIR}/../../../libraries/SSHKeywords.robot
31 Resource          ${CURDIR}/../../../libraries/TemplatedRequests.robot
32 Resource          ${CURDIR}/../../../libraries/Utils.robot
33 Variables         ${CURDIR}/../../../variables/Variables.py
34
35 *** Variables ***
36 ${CONFIGURED_DEVICES_LIMIT}    20
37 ${CONNECTION_SLEEP}    1.2
38 ${DEFAULT_TEARDOWN_KEYWORD}    SetupUtils.Teardown_Test_Show_Bugs_If_Test_Failed
39 ${DEVICE_BASE_NAME}    netconf-test-device
40 ${DEVICE_SET_SIZE}    30
41 @{TAGS_CRITICAL}    critical    @{TAGS_NONCRITICAL}
42 @{TAGS_NONCRITICAL}    clustering    netconf
43
44 *** Test Cases ***
45 Locate_Managers
46     [Documentation]    Detect location of Leader and Owner and store related data into suite variables.
47     ...    This cannot be part of Suite Setup, as Utils.Get_Index_From_List_Of_Dictionaries calls BuiltIn.Set_Test_Variable.
48     ...    WUKS are used, as location failures are probably due to booting process, not bugs.
49     ${topology_config_leader_index}    ${candidates} =    BuiltIn.Wait_Until_Keyword_Succeeds    3x    2s    ClusterManagement.Get_Leader_And_Followers_For_Shard    shard_name=topology
50     ...    shard_type=config
51     BuiltIn.Set_Suite_Variable    \${topology_config_leader_index}
52     ${topology_config_leader_ip} =    ClusterManagement.Resolve_Ip_Address_For_Member    ${topology_config_leader_index}
53     BuiltIn.Set_Suite_Variable    \${topology_config_leader_ip}
54     ${topology_config_leader_http_session} =    Resolve_Http_Session_For_Member    ${topology_config_leader_index}
55     BuiltIn.Set_Suite_Variable    \${topology_config_leader_http_session}
56     ${netconf_manager_owner_index}    ${candidates} =    BuiltIn.Wait_Until_Keyword_Succeeds    3x    2s    ClusterManagement.Get_Owner_And_Candidates_For_Type_And_Id    type=topology-netconf
57     ...    id=/general-entity:entity[general-entity:name='topology-manager']    member_index=1
58     BuiltIn.Set_Suite_Variable    \${netconf_manager_owner_index}
59     ${netconf_manager_owner_ip} =    ClusterManagement.Resolve_Ip_Address_For_Member    ${netconf_manager_owner_index}
60     BuiltIn.Set_Suite_Variable    \${netconf_manager_owner_ip}
61     ${netconf_manager_owner_http_session} =    Resolve_Http_Session_For_Member    ${netconf_manager_owner_index}
62     BuiltIn.Set_Suite_Variable    \${netconf_manager_owner_http_session}
63
64 Start_Testtool
65     [Documentation]    Deploy and start test tool on its separate SSH session.
66     SSHLibrary.Switch_Connection    ${testtool_connection_index}
67     NetconfKeywords.Install_And_Start_Testtool    device-count=${DEVICE_SET_SIZE}    schemas=${CURDIR}/../../../variables/netconf/CRUD/schemas
68     # TODO: Introduce NetconfKeywords.Safe_Install_And_Start_Testtool to avoid teardown maniputation.
69     [Teardown]    BuiltIn.Run_Keywords    SSHLibrary.Switch_Connection    ${configurer_connection_index}
70     ...    AND    ${DEFAULT_TEARDOWN_KEYWORD}
71
72 Start_Configurer
73     [Documentation]    Launch Python utility (while copying output to log file) and verify it does not stop by itself.
74     ${log_filename} =    Utils.Get_Log_File_Name    configurer
75     BuiltIn.Set_Suite_Variable    \${log_filename}
76     # TODO: Should things like restconf port/user/password be set from Variables?
77     ${command} =    BuiltIn.Set_Variable    python configurer.py --odladdress ${netconf_manager_owner_ip} --deviceaddress ${TOOLS_SYSTEM_IP} --devices ${DEVICE_SET_SIZE} --disconndelay ${CONFIGURED_DEVICES_LIMIT} --basename ${DEVICE_BASE_NAME} --connsleep ${CONNECTION_SLEEP} &> "${log_filename}"
78     SSHLibrary.Write    ${command}
79     ${status}    ${text} =    BuiltIn.Run_Keyword_And_Ignore_Error    SSHLibrary.Read_Until_Prompt
80     BuiltIn.Log    ${text}
81     BuiltIn.Run_Keyword_If    "${status}" != "FAIL"    BuiltIn.Fail    Prompt happened, see Log.
82     # Session is kept active.
83
84 Wait_For_Config_Items
85     [Documentation]    Make sure configurer is in phase when old devices are being deconfigured; or fail on timeout.
86     ${timeout} =    Get_Typical_Time
87     BuiltIn.Wait_Until_Keyword_Succeeds    ${timeout}    1s    Check_Config_Items_Lower_Bound
88
89 Reboot_Topology_Leader
90     [Documentation]    Kill and restart member where topology shard Leader was, including removal of persisted data.
91     ...    After cluster sync, sleep additional time to ensure manager processes requests with the rebooted member fully rejoined.
92     [Tags]    @{TAGS_NONCRITICAL}    # To avoid long WUKS list expanded in log.html
93     ClusterManagement.Kill_Single_Member    ${topology_config_leader_index}
94     # TODO: Introduce ClusterManagement.Clean_Journals_And_Snapshots_On_Single_Member
95     ${owner_list} =    BuiltIn.Create_List    ${topology_config_leader_index}
96     ClusterManagement.Clean_Journals_And_Snapshots_On_List_Or_All    ${owner_list}
97     ClusterManagement.Start_Single_Member    ${topology_config_leader_index}
98     BuiltIn.Comment    FIXME: Replace sleep with WUKS when it becomes clear what to wait for.
99     ${sleep_time} =    Get_Typical_Time    coefficient=3.0
100     BuiltIn.Sleep    ${sleep_time}
101
102 Stop_Configurer
103     [Documentation]    Write ctrl+c, download the log, read its contents and match expected patterns.
104     Utils.Write_Bare_Ctrl_C
105     ${output} =    SSHLibrary.Read_Until_Prompt
106     BuiltIn.Log    ${output}
107     SSHLibrary.Get_File    ${log_filename}
108     ${output} =    OperatingSystem.Get_File    ${log_filename}
109     ${list_any_matches} =    String.Get_Regexp_Matches    ${output}    delete|put
110     ${number_any_matches} =    BuiltIn.Get_Length    ${list_any_matches}
111     BuiltIn.Should_Be_Equal    ${2}    ${number_any_matches}    Unexpected status seen: ${output}
112     ${list_strict_matches} =    String.Get_Regexp_Matches    ${output}    delete:200|put:201
113     ${number_strict_matches} =    BuiltIn.Get_Length    ${list_strict_matches}
114     BuiltIn.Should_Be_Equal    ${2}    ${number_strict_matches}    Expected status not seen: ${output}
115
116 Check_For_Connector_Leak
117     [Documentation]    Check that number of items in operational netconf topology is not higher than expected.
118     # FIXME: Are separate keywords necessary?
119     Check_Operational_Items_Upper_Bound
120
121 *** Keywords ***
122 Setup_Everything
123     [Documentation]    Initialize libraries and set suite variables..
124     ClusterManagement.ClusterManagement_Setup
125     SetupUtils.Setup_Utils_For_Setup_And_Teardown
126     NetconfKeywords.Setup_Netconf_Keywords    create_session_for_templated_requests=False
127     ${testtool_connection_index} =    SSHKeywords.Open_Connection_To_Tools_System
128     BuiltIn.Set_Suite_Variable    \${testtool_connection_index}
129     ${configurer_connection_index} =    SSHKeywords.Open_Connection_To_Tools_System
130     BuiltIn.Set_Suite_Variable    \${configurer_connection_index}
131     SSHKeywords.Require_Python
132     SSHKeywords.Assure_Library_Counter
133     SSHLibrary.Put_File    ${CURDIR}/../../../../tools/netconf_tools/configurer.py
134     SSHLibrary.Put_File    ${CURDIR}/../../../libraries/AuthStandalone.py
135
136 Teardown_Everything
137     [Documentation]    Teardown the test infrastructure, perform cleanup and release all resources.
138     SSHLibrary.Switch_Connection    ${testtool_connection_index}
139     NetconfKeywords.Stop_Testtool
140     RequestsLibrary.Delete_All_Sessions
141
142 Count_Substring_Occurence
143     [Arguments]    ${substring}    ${main_string}
144     [Documentation]    Apply the length_of_split method for counting how many times ${substring} occures within ${main_string}.
145     ...    The method is reliable only if triple-double quotes are not present in either argument.
146     BuiltIn.Comment    TODO: Migrate this keyword into an appropriate Resource.
147     BuiltIn.Run_Keyword_And_Return    Builtin.Evaluate    len("""${main_string}""".split("""${substring}""")) - 1
148
149 Get_Config_Device_Count
150     [Documentation]    Count number of items in config netconf topology matching ${DEVICE_BASE_NAME}
151     ${item_data} =    TemplatedRequests.Get_As_Json_From_Uri    ${CONFIG_API}/network-topology:network-topology/topology/topology-netconf    session=${netconf_manager_owner_http_session}
152     BuiltIn.Run_Keyword_And_Return    Count_Substring_Occurence    substring=${DEVICE_BASE_NAME}    main_string=${item_data}
153
154 Get_Operational_Device_Count
155     [Documentation]    Count number of items in operational netconf topology matching ${DEVICE_BASE_NAME}
156     ${item_data} =    TemplatedRequests.Get_As_Json_From_Uri    ${OPERATIONAL_API}/network-topology:network-topology/topology/topology-netconf    session=${netconf_manager_owner_http_session}
157     BuiltIn.Run_Keyword_And_Return    Count_Substring_Occurence    substring=${DEVICE_BASE_NAME}    main_string=${item_data}
158
159 Check_Config_Items_Lower_Bound
160     [Documentation]    Count items matching ${DEVICE_BASE_NAME}, fail if less than ${CONFIGURED_DEVICES_LIMIT}
161     ${device_count} =    Get_Config_Device_Count
162     BuiltIn.Run_Keyword_If    ${device_count} < ${CONFIGURED_DEVICES_LIMIT}    BuiltIn.Fail    Found ${device_count} config items, should be at least ${CONFIGURED_DEVICES_LIMIT}
163
164 Check_Operational_Items_Upper_Bound
165     [Documentation]    Count items matching ${DEVICE_BASE_NAME}, fail if more than 1 + ${CONFIGURED_DEVICES_LIMIT}
166     ${device_count} =    Get_Operational_Device_Count
167     BuiltIn.Run_Keyword_If    ${device_count} > 1 + ${CONFIGURED_DEVICES_LIMIT}    BuiltIn.Fail    Found ${device_count} config items, should be at most 1 + ${CONFIGURED_DEVICES_LIMIT}
168
169 Get_Typical_Time
170     [Arguments]    ${coefficient}=1.0
171     [Documentation]    Return number of seconds typical for given scale variables.
172     BuiltIn.Run_Keyword_And_Return    BuiltIn.Evaluate    ${coefficient} * ${CONNECTION_SLEEP} * ${CONFIGURED_DEVICES_LIMIT}