ae87311ca462c739e65179c27076126da065072e
[integration/test.git] / csit / suites / netconf / clustering / outages.robot
1 *** Settings ***
2 Documentation     netconf cluster node outage test suite (CRUD operations).
3 ...
4 ...               Copyright (c) 2016 Cisco Systems, Inc. and others. All rights reserved.
5 ...
6 ...               This program and the accompanying materials are made available under the
7 ...               terms of the Eclipse Public License v1.0 which accompanies this distribution,
8 ...               and is available at http://www.eclipse.org/legal/epl-v10.html
9 ...
10 ...
11 ...               Perform one of the basic operations (Create, Read, Update and Delete or CRUD)
12 ...               on device data mounted onto a netconf connector while one of the nodes is
13 ...               down and see if they work. Then bring the dead node up and check that it sees
14 ...               the operations that were made while it was down are visible on it as well.
15 ...
16 ...               The node is brought down before each of the "Create", "Update" and "Delete"
17 ...               operations and brought and back up after these operations. Before the dead
18 ...               node is brought up, a test case makes sure the operation is properly
19 ...               propagated within the cluster.
20 ...
21 ...               Currently each of the 3 operations is done once. "Create" is done while
22 ...               node 1 is down, "Update" while node 2 is down and "Delete" while node 3
23 ...               is down.
24 Suite Setup       Setup_Everything
25 Suite Teardown    Teardown_Everything
26 Test Setup        SetupUtils.Setup_Test_With_Logging_And_Without_Fast_Failing
27 Library           Collections
28 Library           RequestsLibrary
29 Library           OperatingSystem
30 Library           String
31 Library           SSHLibrary    timeout=10s
32 Resource          ${CURDIR}/../../../libraries/ClusterManagement.robot
33 Resource          ${CURDIR}/../../../libraries/FailFast.robot
34 Resource          ${CURDIR}/../../../libraries/KarafKeywords.robot
35 Resource          ${CURDIR}/../../../libraries/NetconfKeywords.robot
36 Resource          ${CURDIR}/../../../libraries/SetupUtils.robot
37 Resource          ${CURDIR}/../../../libraries/TemplatedRequests.robot
38 Resource          ${CURDIR}/../../../libraries/Utils.robot
39 Variables         ${CURDIR}/../../../variables/Variables.py
40
41 *** Variables ***
42 ${DEVICE_CHECK_TIMEOUT}    60s
43 ${DEVICE_BOOT_TIMEOUT}    100s
44 ${DEVICE_NAME}    netconf-test-device
45 ${MASTER_CONNECT_TIMEOUT}    15s
46
47 *** Test Cases ***
48 Start_Testtool
49     [Documentation]    Deploy and start test tool, then wait for all its devices to become online.
50     NetconfKeywords.Install_And_Start_Testtool    device-count=1    schemas=${CURDIR}/../../../variables/netconf/CRUD/schemas
51
52 Check_Device_Is_Not_Mounted_At_Beginning
53     [Documentation]    Sanity check making sure our device is not there. Fail if found.
54     NetconfKeywords.Check_Device_Has_No_Netconf_Connector    ${DEVICE_NAME}    session=node1
55     NetconfKeywords.Check_Device_Has_No_Netconf_Connector    ${DEVICE_NAME}    session=node2
56     NetconfKeywords.Check_Device_Has_No_Netconf_Connector    ${DEVICE_NAME}    session=node3
57
58 Configure_Device_On_Netconf
59     [Documentation]    Use node 1 to configure a testtool device on Netconf connector
60     NetconfKeywords.Configure_Device_In_Netconf    ${DEVICE_NAME}    device_type=configure-via-topology    session=node1
61     [Teardown]    Utils.Report_Failure_Due_To_Bug    5089
62
63 Wait_For_Device_To_Become_Visible_For_All_Nodes
64     [Documentation]    Check that the cluster communication about a new Netconf device configuration works
65     NetconfKeywords.Wait_Device_Connected    ${DEVICE_NAME}    session=node1
66     NetconfKeywords.Wait_Device_Connected    ${DEVICE_NAME}    session=node2
67     NetconfKeywords.Wait_Device_Connected    ${DEVICE_NAME}    session=node3
68
69 Check_Device_Data_Is_Seen_As_Empty_On_All_Nodes
70     [Documentation]    Sanity check against possible data left-overs from previous suites. Also causes the suite to wait until the entire cluster sees the device and its data mount.
71     ${config_topology}    ${operational_topology}=    Get_Topology    session=node1
72     ${config_topology}    ${operational_topology}=    Get_Topology    session=node2
73     ${config_topology}    ${operational_topology}=    Get_Topology    session=node3
74     BuiltIn.Wait_Until_Keyword_Succeeds    ${DEVICE_CHECK_TIMEOUT}    1s    Check_Config_Data    node1    ${empty_data}
75     BuiltIn.Wait_Until_Keyword_Succeeds    ${DEVICE_CHECK_TIMEOUT}    1s    Check_Config_Data    node2    ${empty_data}
76     BuiltIn.Wait_Until_Keyword_Succeeds    ${DEVICE_CHECK_TIMEOUT}    1s    Check_Config_Data    node3    ${empty_data}
77
78 Kill_node1_Before_Create
79     [Documentation]    Simulate node 1 crashes just before device data is created, fail if node 1 survives.
80     ClusterManagement.Kill_Single_Member    1
81
82 Create_Device_Data_With_node1_Down
83     [Documentation]    Check that the create requests work when node 1 is down.
84     ...    As ODL may be in the process of connecting possible new master to the device,
85     ...    the action is retried few times.
86     ...    TODO: Check exact status before retry. Carbon reports 404 instead of the correct 503.
87     [Tags]    critical
88     BuiltIn.Wait_Until_Keyword_Succeeds    ${MASTER_CONNECT_TIMEOUT}    1s    TemplatedRequests.Post_As_Xml_Templated    ${directory_with_template_folders}${/}dataorig    {'DEVICE_NAME': '${DEVICE_NAME}'}    session=node2
89
90 Check_New_Device_Data_Is_Visible_On_Nodes_Without_node1
91     [Documentation]    Check that the new device data is propagated in the cluster even when node 1 is down.
92     [Tags]    critical
93     BuiltIn.Wait_Until_Keyword_Succeeds    ${DEVICE_CHECK_TIMEOUT}    1s    Check_Config_Data    node2    ${original_data}
94     BuiltIn.Wait_Until_Keyword_Succeeds    ${DEVICE_CHECK_TIMEOUT}    1s    Check_Config_Data    node3    ${original_data}
95
96 Restart_node1_After_Create_And_Dump_Its_Topology_Data
97     [Documentation]    Simulate node 1 restarted by admin just after device data is created and the change propagated in the cluster, fail if node 1 fails to boot.
98     ClusterManagement.Start_Single_Member    1
99     ${config_topology}    ${operational_topology}=    Get_Topology    session=node1
100
101 Check_New_Device_Data_Is_Visible_On_node1
102     [Documentation]    Check that the created device data is propagated to node 1 as well.
103     [Tags]    critical
104     BuiltIn.Wait_Until_Keyword_Succeeds    ${DEVICE_BOOT_TIMEOUT}    1s    Check_Config_Data    node1    ${original_data}
105     [Teardown]    Utils.Report_Failure_Due_To_Bug    5761
106
107 Kill_node2_Before_Modify
108     [Documentation]    Simulate node 2 crashes just before device data is modified, fail if node 2 survives.
109     ClusterManagement.Kill_Single_Member    2
110
111 Modify_Device_Data_With_node2_Down
112     [Documentation]    Check that the modification requests work when node 2 is down.
113     ...    As ODL may be in the process of connecting possible new master to the device,
114     ...    the action is retried few times.
115     ...    TODO: Check exact status before retry. Carbon reports 404 instead of the correct 503.
116     [Tags]    critical
117     BuiltIn.Wait_Until_Keyword_Succeeds    ${MASTER_CONNECT_TIMEOUT}    1s    TemplatedRequests.Put_As_Xml_Templated    ${directory_with_template_folders}${/}datamod1    {'DEVICE_NAME': '${DEVICE_NAME}'}    session=node3
118     [Teardown]    Utils.Report_Failure_Due_To_Bug    5762
119
120 Check_Modified_Device_Data_Is_Visible_On_Nodes_Without_node2
121     [Documentation]    Check that the device data modification is propagated in the cluster even when node 2 is down.
122     [Tags]    critical
123     BuiltIn.Wait_Until_Keyword_Succeeds    ${DEVICE_CHECK_TIMEOUT}    1s    Check_Config_Data    node1    ${modified_data}
124     BuiltIn.Wait_Until_Keyword_Succeeds    ${DEVICE_CHECK_TIMEOUT}    1s    Check_Config_Data    node3    ${modified_data}
125     [Teardown]    Utils.Report_Failure_Due_To_Bug    5762
126
127 Restart_node2_After_Modify_And_Dump_Its_Topology_Data
128     [Documentation]    Simulate node 2 restarted by admin just after device data is modified and the change propagated in the cluster, fail if node 2 fails to boot.
129     ClusterManagement.Start_Single_Member    2
130     ${config_topology}    ${operational_topology}=    Get_Topology    session=node2
131
132 Check_Modified_Device_Data_Is_Visible_On_node2
133     [Documentation]    Check that the device data modification is propagated to node 2 as well.
134     [Tags]    critical
135     BuiltIn.Wait_Until_Keyword_Succeeds    ${DEVICE_BOOT_TIMEOUT}    1s    Check_Config_Data    node2    ${modified_data}
136     [Teardown]    Utils.Report_Failure_Due_To_Bug    5761
137
138 Kill_node3_Before_Delete
139     [Documentation]    Simulate node 3 crashes just before device data is deleted, fail if node 3 survives.
140     ClusterManagement.Kill_Single_Member    3
141
142 Delete_Device_Data_With_node3_Down
143     [Documentation]    Check that the data removal requests work when node 3 is down.
144     ...    As ODL may be in the process of connecting possible new master to the device,
145     ...    the action is retried few times.
146     ...    TODO: Check exact status before retry. Carbon reports 404 instead of the correct 503.
147     [Tags]    critical
148     BuiltIn.Wait_Until_Keyword_Succeeds    ${MASTER_CONNECT_TIMEOUT}    1s    TemplatedRequests.Delete_Templated    ${directory_with_template_folders}${/}datamod1    {'DEVICE_NAME': '${DEVICE_NAME}'}    session=node1
149     [Teardown]    Utils.Report_Failure_Due_To_Bug    5762
150
151 Check_Device_Data_Removal_Is_Visible_On_Nodes_Without_node3
152     [Documentation]    Check that the device data removal is propagated in the cluster even when node 3 is down.
153     [Tags]    critical
154     BuiltIn.Wait_Until_Keyword_Succeeds    ${DEVICE_CHECK_TIMEOUT}    1s    Check_Config_Data    node1    ${empty_data}
155     BuiltIn.Wait_Until_Keyword_Succeeds    ${DEVICE_CHECK_TIMEOUT}    1s    Check_Config_Data    node2    ${empty_data}
156     [Teardown]    Utils.Report_Failure_Due_To_Bug    5762
157
158 Restart_node3_After_Delete_And_Dump_Its_Topology_Data
159     [Documentation]    Simulate node 3 restarted by admin just after device data is deleted and the change propagated in the cluster, fail if node 3 fails to boot.
160     ClusterManagement.Start_Single_Member    3
161     ${config_topology}    ${operational_topology}=    Get_Topology    session=node3
162
163 Check_Device_Data_Removal_Is_Visible_On_node3
164     [Documentation]    Check that the device data removal is propagated to node 3 as well.
165     [Tags]    critical
166     BuiltIn.Wait_Until_Keyword_Succeeds    ${DEVICE_BOOT_TIMEOUT}    1s    Check_Config_Data    node3    ${empty_data}
167     [Teardown]    Utils.Report_Failure_Due_To_Bug    5761
168
169 Deconfigure_Device_In_Netconf
170     [Documentation]    Make request to deconfigure the device on Netconf connector to clean things up and also check that it still works after all the node outages.
171     [Setup]    SetupUtils.Setup_Test_With_Logging_And_Without_Fast_Failing
172     NetconfKeywords.Remove_Device_From_Netconf    ${DEVICE_NAME}    session=node1
173
174 Check_Device_Deconfigured
175     [Documentation]    Check that the device deconfiguration is propagated throughout the cluster correctly.
176     NetconfKeywords.Wait_Device_Fully_Removed    ${DEVICE_NAME}    session=node1
177     NetconfKeywords.Wait_Device_Fully_Removed    ${DEVICE_NAME}    session=node2
178     NetconfKeywords.Wait_Device_Fully_Removed    ${DEVICE_NAME}    session=node3
179
180 *** Keywords ***
181 Setup_Everything
182     [Documentation]    Setup everything needed for the test cases.
183     # Setup resources used by the suite.
184     SetupUtils.Setup_Utils_For_Setup_And_Teardown
185     ClusterManagement.ClusterManagement_Setup
186     NetconfKeywords.Setup_Netconf_Keywords    create_session_for_templated_requests=False
187     # TODO: Refactor the suite to use ClusterManagement.Resolve_Http_Session_For_Member instead of these 3 "hardcoded" sessions.
188     RequestsLibrary.Create_Session    node1    http://${ODL_SYSTEM_1_IP}:${RESTCONFPORT}    headers=${HEADERS_XML}    auth=${AUTH}
189     RequestsLibrary.Create_Session    node2    http://${ODL_SYSTEM_2_IP}:${RESTCONFPORT}    headers=${HEADERS_XML}    auth=${AUTH}
190     RequestsLibrary.Create_Session    node3    http://${ODL_SYSTEM_3_IP}:${RESTCONFPORT}    headers=${HEADERS_XML}    auth=${AUTH}
191     BuiltIn.Set_Suite_Variable    ${directory_with_template_folders}    ${CURDIR}/../../../variables/netconf/CRUD
192     BuiltIn.Set_Suite_Variable    ${empty_data}    <data xmlns="${ODL_NETCONF_NAMESPACE}"></data>
193     BuiltIn.Set_Suite_Variable    ${original_data}    <data xmlns="${ODL_NETCONF_NAMESPACE}"><cont xmlns="urn:opendaylight:test:netconf:crud"><l>Content</l></cont></data>
194     BuiltIn.Set_Suite_Variable    ${modified_data}    <data xmlns="${ODL_NETCONF_NAMESPACE}"><cont xmlns="urn:opendaylight:test:netconf:crud"><l>Modified Content</l></cont></data>
195     ${url}=    Builtin.Set_Variable    /network-topology:network-topology/topology/topology-netconf
196     BuiltIn.Set_Suite_Variable    ${config_topology_url}    ${CONFIG_API}${url}
197     BuiltIn.Set_Suite_Variable    ${operational_topology_url}    ${OPERATIONAL_API}${url}
198
199 Get_Topology_Core
200     [Arguments]    ${session}
201     [Documentation]    Get both versions of topology (config and operational), log them and return them for further processing.
202     ${config_topology}=    TemplatedRequests.Get_As_Json_From_Uri    ${config_topology_url}    session=${session}
203     BuiltIn.Log    ${config_topology}
204     ${operational_topology}=    TemplatedRequests.Get_As_Json_From_Uri    ${operational_topology_url}    session=${session}
205     BuiltIn.Log    ${operational_topology}
206     [Return]    ${config_topology}    ${operational_topology}
207
208 Get_Topology
209     [Arguments]    ${session}
210     [Documentation]    Repeatedly try to get the topologies using Get_Topology_Core until either the request succeeds or boot timeout period expires.
211     ${result}=    BuiltIn.Wait_Until_Keyword_Succeeds    ${DEVICE_BOOT_TIMEOUT}    1s    Get_Topology_Core    ${session}
212     [Return]    ${result}
213
214 Teardown_Everything
215     [Documentation]    Teardown the test infrastructure, perform cleanup and release all resources.
216     RequestsLibrary.Delete_All_Sessions
217     NetconfKeywords.Stop_Testtool
218
219 Check_Device_Instance_Count
220     [Arguments]    ${expected}    ${session}
221     [Documentation]    Check that the specified session sees the specified count of instances of the test tool device.
222     ${count}    NetconfKeywords.Count_Netconf_Connectors_For_Device    ${DEVICE_NAME}    session=${session}
223     Builtin.Should_Be_Equal_As_Strings    ${count}    ${expected}
224
225 Check_Config_Data
226     [Arguments]    ${node}    ${expected}    ${contains}=False
227     [Documentation]    Check that the specified session sees the specified data in the test tool device.
228     ${url}=    Builtin.Set_Variable    ${CONFIG_API}/network-topology:network-topology/topology/topology-netconf/node/${DEVICE_NAME}/yang-ext:mount
229     ${data}=    TemplatedRequests.Get_As_Xml_From_Uri    ${url}    session=${node}
230     BuiltIn.Run_Keyword_Unless    ${contains}    BuiltIn.Should_Be_Equal_As_Strings    ${data}    ${expected}
231     BuiltIn.Run_Keyword_If    ${contains}    BuiltIn.Should_Contain    ${data}    ${expected}