9c6a000d425f6923eb4a518b4dfa27c1aa278252
[integration/test.git] / csit / suites / netconf / clustering / outages.robot
1 *** Settings ***
2 Documentation     netconf cluster node outage test suite (CRUD operations).
3 ...
4 ...               Copyright (c) 2016 Cisco Systems, Inc. and others. All rights reserved.
5 ...
6 ...               This program and the accompanying materials are made available under the
7 ...               terms of the Eclipse Public License v1.0 which accompanies this distribution,
8 ...               and is available at http://www.eclipse.org/legal/epl-v10.html
9 ...
10 ...
11 ...               Perform one of the basic operations (Create, Read, Update and Delete or CRUD)
12 ...               on device data mounted onto a netconf connector while one of the nodes is
13 ...               down and see if they work. Then bring the dead node up and check that it sees
14 ...               the operations that were made while it was down are visible on it as well.
15 ...
16 ...               The node is brought down before each of the "Create", "Update" and "Delete"
17 ...               operations and brought and back up after these operations. Before the dead
18 ...               node is brought up, a test case makes sure the operation is properly
19 ...               propagated within the cluster.
20 ...
21 ...               Currently each of the 3 operations is done once. "Create" is done while
22 ...               node 1 is down, "Update" while node 2 is down and "Delete" while node 3
23 ...               is down.
24 Suite Setup       Setup_Everything
25 Suite Teardown    Teardown_Everything
26 Test Setup        SetupUtils.Setup_Test_With_Logging_And_Without_Fast_Failing
27 Library           Collections
28 Library           RequestsLibrary
29 Library           OperatingSystem
30 Library           String
31 Library           SSHLibrary    timeout=10s
32 Resource          ${CURDIR}/../../../libraries/ClusterManagement.robot
33 Resource          ${CURDIR}/../../../libraries/FailFast.robot
34 Resource          ${CURDIR}/../../../libraries/KarafKeywords.robot
35 Resource          ${CURDIR}/../../../libraries/NetconfKeywords.robot
36 Resource          ${CURDIR}/../../../libraries/SetupUtils.robot
37 Resource          ${CURDIR}/../../../libraries/TemplatedRequests.robot
38 Resource          ${CURDIR}/../../../libraries/Utils.robot
39 Variables         ${CURDIR}/../../../variables/Variables.py
40
41 *** Variables ***
42 ${DEVICE_CHECK_TIMEOUT}    60s
43 ${DEVICE_BOOT_TIMEOUT}    100s
44 ${DEVICE_NAME}    netconf-test-device
45 ${MASTER_CONNECT_TIMEOUT}    15s
46
47 *** Test Cases ***
48 Start_Testtool
49     [Documentation]    Deploy and start test tool, then wait for all its devices to become online.
50     NetconfKeywords.Install_And_Start_Testtool    device-count=1    schemas=${CURDIR}/../../../variables/netconf/CRUD/schemas
51
52 Check_Device_Is_Not_Mounted_At_Beginning
53     [Documentation]    Sanity check making sure our device is not there. Fail if found.
54     NetconfKeywords.Check_Device_Has_No_Netconf_Connector    ${DEVICE_NAME}    session=node1
55     NetconfKeywords.Check_Device_Has_No_Netconf_Connector    ${DEVICE_NAME}    session=node2
56     NetconfKeywords.Check_Device_Has_No_Netconf_Connector    ${DEVICE_NAME}    session=node3
57
58 Configure_Device_On_Netconf
59     [Documentation]    Use node 1 to configure a testtool device on Netconf connector
60     NetconfKeywords.Configure_Device_In_Netconf    ${DEVICE_NAME}    device_type=configure-via-topology    session=node1
61     [Teardown]    Utils.Report_Failure_Due_To_Bug    5089
62
63 Wait_For_Device_To_Become_Visible_For_All_Nodes
64     [Documentation]    Check that the cluster communication about a new Netconf device configuration works
65     NetconfKeywords.Wait_Device_Connected    ${DEVICE_NAME}    session=node1
66     NetconfKeywords.Wait_Device_Connected    ${DEVICE_NAME}    session=node2
67     NetconfKeywords.Wait_Device_Connected    ${DEVICE_NAME}    session=node3
68
69 Check_Device_Data_Is_Seen_As_Empty_On_All_Nodes
70     [Documentation]    Sanity check against possible data left-overs from previous suites. Also causes the suite to wait until the entire cluster sees the device and its data mount.
71     ${config_topology}    ${operational_topology}=    Get_Topology    session=node1
72     ${config_topology}    ${operational_topology}=    Get_Topology    session=node2
73     ${config_topology}    ${operational_topology}=    Get_Topology    session=node3
74     BuiltIn.Wait_Until_Keyword_Succeeds    ${DEVICE_CHECK_TIMEOUT}    1s    Check_Config_Data    node1    ${empty_data}
75     BuiltIn.Wait_Until_Keyword_Succeeds    ${DEVICE_CHECK_TIMEOUT}    1s    Check_Config_Data    node2    ${empty_data}
76     BuiltIn.Wait_Until_Keyword_Succeeds    ${DEVICE_CHECK_TIMEOUT}    1s    Check_Config_Data    node3    ${empty_data}
77
78 Kill_node1_Before_Create
79     [Documentation]    Simulate node 1 crashes just before device data is created, fail if node 1 survives.
80     ClusterManagement.Kill_Single_Member    1
81
82 Create_Device_Data_With_node1_Down
83     [Documentation]    Check that the create requests work when node 1 is down.
84     ...    As ODL may be in the process of connecting possible new master to the device,
85     ...    the action is retried few times.
86     ...    TODO: Check exact status before retry. Carbon reports 404 instead of the correct 503.
87     [Tags]    critical
88     ${template_as_string}=    BuiltIn.Create_Dictionary    DEVICE_NAME=${DEVICE_NAME}
89     BuiltIn.Wait_Until_Keyword_Succeeds    ${MASTER_CONNECT_TIMEOUT}    1s    TemplatedRequests.Post_As_Xml_Templated    ${directory_with_template_folders}${/}dataorig    ${template_as_string}    session=node2
90
91 Check_New_Device_Data_Is_Visible_On_Nodes_Without_node1
92     [Documentation]    Check that the new device data is propagated in the cluster even when node 1 is down.
93     [Tags]    critical
94     BuiltIn.Wait_Until_Keyword_Succeeds    ${DEVICE_CHECK_TIMEOUT}    1s    Check_Config_Data    node2    ${original_data}
95     BuiltIn.Wait_Until_Keyword_Succeeds    ${DEVICE_CHECK_TIMEOUT}    1s    Check_Config_Data    node3    ${original_data}
96
97 Restart_node1_After_Create_And_Dump_Its_Topology_Data
98     [Documentation]    Simulate node 1 restarted by admin just after device data is created and the change propagated in the cluster, fail if node 1 fails to boot.
99     ClusterManagement.Start_Single_Member    1
100     ${config_topology}    ${operational_topology}=    Get_Topology    session=node1
101
102 Check_New_Device_Data_Is_Visible_On_node1
103     [Documentation]    Check that the created device data is propagated to node 1 as well.
104     [Tags]    critical
105     BuiltIn.Wait_Until_Keyword_Succeeds    ${DEVICE_BOOT_TIMEOUT}    1s    Check_Config_Data    node1    ${original_data}
106     [Teardown]    Utils.Report_Failure_Due_To_Bug    5761
107
108 Kill_node2_Before_Modify
109     [Documentation]    Simulate node 2 crashes just before device data is modified, fail if node 2 survives.
110     ClusterManagement.Kill_Single_Member    2
111
112 Modify_Device_Data_With_node2_Down
113     [Documentation]    Check that the modification requests work when node 2 is down.
114     ...    As ODL may be in the process of connecting possible new master to the device,
115     ...    the action is retried few times.
116     ...    TODO: Check exact status before retry. Carbon reports 404 instead of the correct 503.
117     [Tags]    critical
118     BuiltIn.Wait_Until_Keyword_Succeeds    ${MASTER_CONNECT_TIMEOUT}    1s    TemplatedRequests.Put_As_Xml_Templated    ${directory_with_template_folders}${/}datamod1    {'DEVICE_NAME': '${DEVICE_NAME}'}    session=node3
119     [Teardown]    Utils.Report_Failure_Due_To_Bug    5762
120
121 Check_Modified_Device_Data_Is_Visible_On_Nodes_Without_node2
122     [Documentation]    Check that the device data modification is propagated in the cluster even when node 2 is down.
123     [Tags]    critical
124     BuiltIn.Wait_Until_Keyword_Succeeds    ${DEVICE_CHECK_TIMEOUT}    1s    Check_Config_Data    node1    ${modified_data}
125     BuiltIn.Wait_Until_Keyword_Succeeds    ${DEVICE_CHECK_TIMEOUT}    1s    Check_Config_Data    node3    ${modified_data}
126     [Teardown]    Utils.Report_Failure_Due_To_Bug    5762
127
128 Restart_node2_After_Modify_And_Dump_Its_Topology_Data
129     [Documentation]    Simulate node 2 restarted by admin just after device data is modified and the change propagated in the cluster, fail if node 2 fails to boot.
130     ClusterManagement.Start_Single_Member    2
131     ${config_topology}    ${operational_topology}=    Get_Topology    session=node2
132
133 Check_Modified_Device_Data_Is_Visible_On_node2
134     [Documentation]    Check that the device data modification is propagated to node 2 as well.
135     [Tags]    critical
136     BuiltIn.Wait_Until_Keyword_Succeeds    ${DEVICE_BOOT_TIMEOUT}    1s    Check_Config_Data    node2    ${modified_data}
137     [Teardown]    Utils.Report_Failure_Due_To_Bug    5761
138
139 Kill_node3_Before_Delete
140     [Documentation]    Simulate node 3 crashes just before device data is deleted, fail if node 3 survives.
141     ClusterManagement.Kill_Single_Member    3
142
143 Delete_Device_Data_With_node3_Down
144     [Documentation]    Check that the data removal requests work when node 3 is down.
145     ...    As ODL may be in the process of connecting possible new master to the device,
146     ...    the action is retried few times.
147     ...    TODO: Check exact status before retry. Carbon reports 404 instead of the correct 503.
148     [Tags]    critical
149     BuiltIn.Wait_Until_Keyword_Succeeds    ${MASTER_CONNECT_TIMEOUT}    1s    TemplatedRequests.Delete_Templated    ${directory_with_template_folders}${/}datamod1    {'DEVICE_NAME': '${DEVICE_NAME}'}    session=node1
150     [Teardown]    Utils.Report_Failure_Due_To_Bug    5762
151
152 Check_Device_Data_Removal_Is_Visible_On_Nodes_Without_node3
153     [Documentation]    Check that the device data removal is propagated in the cluster even when node 3 is down.
154     [Tags]    critical
155     BuiltIn.Wait_Until_Keyword_Succeeds    ${DEVICE_CHECK_TIMEOUT}    1s    Check_Config_Data    node1    ${empty_data}
156     BuiltIn.Wait_Until_Keyword_Succeeds    ${DEVICE_CHECK_TIMEOUT}    1s    Check_Config_Data    node2    ${empty_data}
157     [Teardown]    Utils.Report_Failure_Due_To_Bug    5762
158
159 Restart_node3_After_Delete_And_Dump_Its_Topology_Data
160     [Documentation]    Simulate node 3 restarted by admin just after device data is deleted and the change propagated in the cluster, fail if node 3 fails to boot.
161     ClusterManagement.Start_Single_Member    3
162     ${config_topology}    ${operational_topology}=    Get_Topology    session=node3
163
164 Check_Device_Data_Removal_Is_Visible_On_node3
165     [Documentation]    Check that the device data removal is propagated to node 3 as well.
166     [Tags]    critical
167     BuiltIn.Wait_Until_Keyword_Succeeds    ${DEVICE_BOOT_TIMEOUT}    1s    Check_Config_Data    node3    ${empty_data}
168     [Teardown]    Utils.Report_Failure_Due_To_Bug    5761
169
170 Deconfigure_Device_In_Netconf
171     [Documentation]    Make request to deconfigure the device on Netconf connector to clean things up and also check that it still works after all the node outages.
172     [Setup]    SetupUtils.Setup_Test_With_Logging_And_Without_Fast_Failing
173     NetconfKeywords.Remove_Device_From_Netconf    ${DEVICE_NAME}    session=node1
174
175 Check_Device_Deconfigured
176     [Documentation]    Check that the device deconfiguration is propagated throughout the cluster correctly.
177     NetconfKeywords.Wait_Device_Fully_Removed    ${DEVICE_NAME}    session=node1
178     NetconfKeywords.Wait_Device_Fully_Removed    ${DEVICE_NAME}    session=node2
179     NetconfKeywords.Wait_Device_Fully_Removed    ${DEVICE_NAME}    session=node3
180
181 *** Keywords ***
182 Setup_Everything
183     [Documentation]    Setup everything needed for the test cases.
184     # Setup resources used by the suite.
185     SetupUtils.Setup_Utils_For_Setup_And_Teardown
186     ClusterManagement.ClusterManagement_Setup
187     NetconfKeywords.Setup_Netconf_Keywords    create_session_for_templated_requests=False
188     # TODO: Refactor the suite to use ClusterManagement.Resolve_Http_Session_For_Member instead of these 3 "hardcoded" sessions.
189     RequestsLibrary.Create_Session    node1    http://${ODL_SYSTEM_1_IP}:${RESTCONFPORT}    headers=${HEADERS_XML}    auth=${AUTH}
190     RequestsLibrary.Create_Session    node2    http://${ODL_SYSTEM_2_IP}:${RESTCONFPORT}    headers=${HEADERS_XML}    auth=${AUTH}
191     RequestsLibrary.Create_Session    node3    http://${ODL_SYSTEM_3_IP}:${RESTCONFPORT}    headers=${HEADERS_XML}    auth=${AUTH}
192     BuiltIn.Set_Suite_Variable    ${directory_with_template_folders}    ${CURDIR}/../../../variables/netconf/CRUD
193     BuiltIn.Set_Suite_Variable    ${empty_data}    <data xmlns="${ODL_NETCONF_NAMESPACE}"></data>
194     BuiltIn.Set_Suite_Variable    ${original_data}    <data xmlns="${ODL_NETCONF_NAMESPACE}"><cont xmlns="urn:opendaylight:test:netconf:crud"><l>Content</l></cont></data>
195     BuiltIn.Set_Suite_Variable    ${modified_data}    <data xmlns="${ODL_NETCONF_NAMESPACE}"><cont xmlns="urn:opendaylight:test:netconf:crud"><l>Modified Content</l></cont></data>
196     ${url}=    Builtin.Set_Variable    /network-topology:network-topology/topology=topology-netconf
197     BuiltIn.Set_Suite_Variable    ${config_topology_url}    ${REST_API}${url}
198     BuiltIn.Set_Suite_Variable    ${operational_topology_url}    ${REST_API}${url}
199
200 Get_Topology_Core
201     [Arguments]    ${session}
202     [Documentation]    Get both versions of topology (config and operational), log them and return them for further processing.
203     ${config_topology}=    TemplatedRequests.Get_As_Json_From_Uri    ${config_topology_url}    session=${session}
204     BuiltIn.Log    ${config_topology}
205     ${operational_topology}=    TemplatedRequests.Get_As_Json_From_Uri    ${operational_topology_url}    session=${session}
206     BuiltIn.Log    ${operational_topology}
207     [Return]    ${config_topology}    ${operational_topology}
208
209 Get_Topology
210     [Arguments]    ${session}
211     [Documentation]    Repeatedly try to get the topologies using Get_Topology_Core until either the request succeeds or boot timeout period expires.
212     ${result}=    BuiltIn.Wait_Until_Keyword_Succeeds    ${DEVICE_BOOT_TIMEOUT}    1s    Get_Topology_Core    ${session}
213     [Return]    ${result}
214
215 Teardown_Everything
216     [Documentation]    Teardown the test infrastructure, perform cleanup and release all resources.
217     RequestsLibrary.Delete_All_Sessions
218     NetconfKeywords.Stop_Testtool
219
220 Check_Device_Instance_Count
221     [Arguments]    ${expected}    ${session}
222     [Documentation]    Check that the specified session sees the specified count of instances of the test tool device.
223     ${count}    NetconfKeywords.Count_Netconf_Connectors_For_Device    ${DEVICE_NAME}    session=${session}
224     Builtin.Should_Be_Equal_As_Strings    ${count}    ${expected}
225
226 Check_Config_Data
227     [Arguments]    ${node}    ${expected}    ${contains}=False
228     [Documentation]    Check that the specified session sees the specified data in the test tool device.
229     ${url}=    Builtin.Set_Variable    ${REST_API}/network-topology:network-topology/topology=topology-netconf/node=${DEVICE_NAME}/yang-ext:mount?content=config
230     ${data}=    TemplatedRequests.Get_As_Xml_From_Uri    ${url}    session=${node}
231     BuiltIn.Run_Keyword_Unless    ${contains}    BuiltIn.Should_Be_Equal_As_Strings    ${data}    ${expected}
232     BuiltIn.Run_Keyword_If    ${contains}    BuiltIn.Should_Contain    ${data}    ${expected}