c9fb3ee5d7dd59198f4ff27637d5fd0c826c76e9
[integration/test.git] / tools / clustering / cluster-monitor / monitor.py
1 #!/usr/bin/python
2 """
3 Cluster Monitor Tool
4 Author: Phillip Shea
5 Updated: 2016-Mar-07
6
7 This tool provides real-time visualization of the cluster member roles for all
8 shards in the config datastore.
9
10 A file named 'cluster.json' contaning a list of the IP addresses and port numbers
11 of the controllers is required. This resides in the same directory as monitor.py.
12 "user" and "pass" are not required for monitor.py, but they may be
13 needed for other apps in this folder. The file should look like this:
14
15     {
16         "cluster": {
17             "controllers": [
18                 {"ip": "172.17.10.93", "port": "8181"},
19                 {"ip": "172.17.10.93", "port": "8181"},
20                 {"ip": "172.17.10.93", "port": "8181"}
21             ],
22             "user": "username",
23             "pass": "password",
24             "shards_to_exclude": []  # list of shard names to omit from output
25         }
26     }
27
28 Usage:python monitor.py
29 """
30 from io import BytesIO
31 import time
32 import pprint
33 import curses
34 import sys
35 import json
36 import pycurl
37 import string
38
39
40 def rest_get(restURL):
41     rest_buffer = BytesIO()
42     c = pycurl.Curl()
43     c.setopt(c.TIMEOUT, 2)
44     c.setopt(c.CONNECTTIMEOUT, 1)
45     c.setopt(c.FAILONERROR, False)
46     c.setopt(c.URL, str(restURL))
47     c.setopt(c.HTTPGET, 0)
48     c.setopt(c.WRITEFUNCTION, rest_buffer.write)
49     c.perform()
50     c.close()
51     return json.loads(rest_buffer.getvalue())
52
53
54 def getClusterRolesWithCurl(shardName, *args):
55     controllers = args[0]
56     names = args[1]
57     controller_state = {}
58     for i, controller in enumerate(controllers):
59         controller_state[controller["ip"]] = None
60         url = "http://" + controller["ip"] + ":" + controller["port"] + "/jolokia/read/org.opendaylight.controller:"
61         url += 'Category=Shards,name=' + names[i]
62         url += '-shard-' + shardName + '-config,type=DistributedConfigDatastore'
63         try:
64             resp = rest_get(url)
65             if resp['status'] != 200:
66                 controller_state[controller["ip"]] = 'HTTP ' + str(resp['status'])
67             if 'value' in resp:
68                 data_value = resp['value']
69                 controller_state[controller["ip"]] = data_value['RaftState']
70         except:
71             if 'timed out' in str(sys.exc_info()[1]):
72                 controller_state[controller["ip"]] = 'timeout'
73             elif 'JSON' in str(sys.exc_info()):
74                 controller_state[controller["ip"]] = 'JSON error'
75             elif 'connect to host' in str(sys.exc_info()):
76                 controller_state[controller["ip"]] = 'no connection'
77             else:
78                 controller_state[controller["ip"]] = 'down'
79     return controller_state
80
81
82 def size_and_color(cluster_roles, field_length, ip_addr):
83     status_dict = {}
84     status_dict['txt'] = string.center(str(cluster_roles[ip_addr]), field_length)
85     if cluster_roles[ip_addr] == "Leader":
86         status_dict['color'] = curses.color_pair(2)
87     elif cluster_roles[ip_addr] == "Follower":
88         status_dict['color'] = curses.color_pair(3)
89     elif cluster_roles[ip_addr] == "Candidate":
90         status_dict['color'] = curses.color_pair(5)
91     else:
92         status_dict['color'] = curses.color_pair(0)
93     return status_dict
94
95
96 try:
97     with open('cluster.json') as cluster_file:
98         data = json.load(cluster_file)
99 except:
100     print str(sys.exc_info())
101     print 'Unable to open the file cluster.json'
102     exit(1)
103 try:
104     controllers = data["cluster"]["controllers"]
105     shards_to_exclude = data["cluster"]["shards_to_exclude"]
106 except:
107     print str(sys.exc_info())
108     print 'Error reading the file cluster.json'
109     exit(1)
110
111 controller_names = []
112 Shards = set()
113 # Retrieve controller names and shard names.
114 for controller in controllers:
115     url = "http://" + controller["ip"] + ":" + controller["port"] + "/jolokia/read/org.opendaylight.controller:"
116     url += "Category=ShardManager,name=shard-manager-config,type=DistributedConfigDatastore"
117     try:
118         data = rest_get(url)
119     except:
120         print 'Unable to retrieve shard names from ' + controller
121         print 'Are all controllers up?'
122         print str(sys.exc_info()[1])
123         exit(1)
124     print 'shards from the first controller'
125     pprint.pprint(data)
126     # grab the controller name from the first shard
127     name = data['value']['LocalShards'][0]
128     print name
129     pos = name.find('-shard-')
130     print pos
131     print name[:8]
132     controller_names.append(name[:name.find('-shard-')])
133
134     # collect shards found in any controller; does not require all controllers to have the same shards
135     for localShard in data['value']['LocalShards']:
136         shardName = localShard[(localShard.find("-shard-") + 7):localShard.find("-config")]
137         if shardName not in shards_to_exclude:
138             Shards.add(shardName)
139 print controller_names
140 print Shards
141 field_len = max(map(len, Shards)) + 2
142
143 stdscr = curses.initscr()
144 curses.noecho()
145 curses.cbreak()
146 curses.curs_set(0)
147 stdscr.keypad(1)
148 stdscr.nodelay(1)
149
150 curses.start_color()
151 curses.init_pair(1, curses.COLOR_WHITE, curses.COLOR_BLACK)
152 curses.init_pair(2, curses.COLOR_WHITE, curses.COLOR_GREEN)
153 curses.init_pair(3, curses.COLOR_WHITE, curses.COLOR_BLUE)
154 curses.init_pair(4, curses.COLOR_WHITE, curses.COLOR_YELLOW)
155 curses.init_pair(5, curses.COLOR_BLACK, curses.COLOR_YELLOW)
156
157 # display controller and shard headers
158 for row, controller in enumerate(controller_names):
159     stdscr.addstr(row + 1, 0, string.center(controller, field_len), curses.color_pair(1))
160 for data_column, shard in enumerate(Shards):
161     stdscr.addstr(0, (field_len + 1) * (data_column + 1), string.center(shard, field_len), curses.color_pair(1))
162 stdscr.addstr(len(Shards) + 2, 0, 'Press q to quit.', curses.color_pair(1))
163 stdscr.refresh()
164
165 # display shard status
166 odd_or_even = 0
167 key = ''
168 while key != ord('q') and key != ord('Q'):
169     odd_or_even += 1
170     key = stdscr.getch()
171
172     for data_column, shard_name in enumerate(Shards):
173         if shard_name not in shards_to_exclude:
174             cluster_stat = getClusterRolesWithCurl(shard_name, controllers, controller_names)
175             for row, controller in enumerate(controllers):
176                 status = size_and_color(cluster_stat, field_len, controller["ip"])
177                 stdscr.addstr(row + 1, (field_len + 1) * (data_column + 1), status['txt'], status['color'])
178         time.sleep(0.5)
179         if odd_or_even % 2 == 0:
180             stdscr.addstr(0, field_len / 2 - 2, " <3 ", curses.color_pair(5))
181         else:
182             stdscr.addstr(0, field_len / 2 - 2, " <3 ", curses.color_pair(0))
183         stdscr.refresh()
184
185 # clean up
186 curses.nocbreak()
187 stdscr.keypad(0)
188 curses.echo()
189 curses.endwin()