Audit
Centralize configuration¶
Git¶
Each user has a copy of the configuration in his operator environment. He can make change locally, test and then synchronize the platform configuration.
To manage conflicts, all the punch configuration is tracked by git.
description:
Git is a version-control system for tracking changes in computer files and coordinating work on those files among multiple people. It is primarily used for source-code management in software development, but it can be used to keep track of changes in any set of files. As a distributed revision-control system, it is aimed at speed, data integrity, and support for distributed, non-linear workflows.
Zookeeper¶
To monitor the health of the platform, it's crucial to have the last running configuration. For instance: how many channels are running ? How many nodes are deployed in the elasticsearch cluster ?
That's why the punch admin command API such as punchplatform-channel.sh and punchplatform-services.sh use zookeeper to store the last running configuration
technical:
To read the last running configuration:
1 2 3 4 | punchplatform-zookeeper-console.sh # then go the punchplatform root path, on standalone it's punchplatform-primary [zk: server4:2181(CONNECTED) 0] ls /punchplatform-livedemo/conf [tenants, resources, punchplatform.properties] |
Centralize monitoring¶
Platform monitoring¶
If deployed, shiva components monitor the full punch platform stack. An internal admin task runs periodically and stores the result of the monitoring check in an elasticsearch cluster (often dedicated for metrics).
example of platform:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 | { "@timestamp": "2018-11-02T14:11:29.195Z", "storm": { "health_code": 0, "health_name": "green", "clusters": { "main": { "nimbus": { "hosts": { "server4": { "health_code": 0, "health_name": "green", "details": { "port": 6627, "host": "server4", "nimbusLogLink": "http://server4:8000/daemonlog?file=nimbus.log", "nimbusUpTime": "5d 17h 27m 35s", "nimbusUpTimeSeconds": 494855, "version": "1.1.1", "status": "Leader" } } } }, "cluster": { "health_code": 0, "health_name": "green", "details": { "stormVersion": "1.1.1", "executorsTotal": 22, "totalMem": 12288, "availCpu": 200, "slotsTotal": 20, "bugtracker-url": "null", "slotsUsed": 6, "topologies": 6, "totalCpu": 200, "cpuAssignedPercentUtil": "0.0", "availMem": 10648, "name": "main", "slotsFree": 14, "memAssignedPercentUtil": "13.3", "central-log-url": "null", "user": "null", "tasksTotal": 22, "schedulerDisplayResource": false, "supervisors": 2 } }, "health_code": 0, "health_name": "green", "supervisor": { "hosts": { "server6": { "health_code": 0, "health_name": "green", "details": { "logLink": "http://server6:8000/daemonlog?file=supervisor.log", "totalMem": 6144, "availCpu": 100, "usedMem": 678, "slotsTotal": 10, "slotsUsed": 2, "version": "1.1.1", "uptimeSeconds": 495099, "uptime": "5d 17h 31m 39s", "totalCpu": 100, "availMem": 5466, "host": "server6", "slotsFree": 8, "id": "c026c4ff-23ce-4094-a229-284e036d0ec2", "usedCpu": 0 } }, "server4": { "health_code": 0, "health_name": "green", "details": { "logLink": "http://server4:8000/daemonlog?file=supervisor.log", "totalMem": 6144, "availCpu": 100, "usedMem": 962, "slotsTotal": 10, "slotsUsed": 4, "version": "1.1.1", "uptimeSeconds": 495074, "uptime": "5d 17h 31m 14s", "totalCpu": 100, "availMem": 5182, "host": "server4", "slotsFree": 6, "id": "276af295-14b7-4de1-84f6-0611a39a482e", "usedCpu": 0 } } } } } } }, "elasticsearch": { "health_code": 0, "health_name": "green", "clusters": { "es_search": { "health_code": 0, "health_name": "green", "details": { "number_of_pending_tasks": 0, "cluster_name": "es_search", "active_shards": 538, "active_primary_shards": 269, "unassigned_shards": 0, "delayed_unassigned_shards": 0, "timed_out": false, "relocating_shards": 0, "initializing_shards": 0, "task_max_waiting_in_queue_millis": 0, "number_of_data_nodes": 3, "number_of_in_flight_fetch": 0, "active_shards_percent_as_number": 100, "status": "green", "number_of_nodes": 3 } } } }, "zookeeper": { "health_code": 0, "health_name": "green", "clusters": { "common": { "health_code": 0, "hosts": { "server6": { "health_code": 0, "health_name": "green", "details": { "zk_packets_sent": 9074725, "zk_max_latency": 16859, "zk_server_state": "follower", "zk_outstanding_requests": 0, "zk_ephemerals_count": 33, "zk_min_latency": 0, "zk_max_file_descriptor_count": 100000, "zk_approximate_data_size": 3459895, "zk_version": "3.4.10-39d3a4f269333c922ed3db283be479f9deacaa0f,", "zk_avg_latency": 9, "zk_znode_count": 1142, "zk_watch_count": 5290, "zk_open_file_descriptor_count": 49, "zk_packets_received": 7964841, "zk_num_alive_connections": 9 } }, "server5": { "health_code": 0, "health_name": "green", "details": { "zk_followers": 2, "zk_packets_sent": 283129945, "zk_pending_syncs": 0, "zk_synced_followers": 2, "zk_max_latency": 669680, "zk_server_state": "leader", "zk_outstanding_requests": 0, "zk_ephemerals_count": 33, "zk_min_latency": 0, "zk_max_file_descriptor_count": 100000, "zk_approximate_data_size": 3459898, "zk_version": "3.4.10-39d3a4f269333c922ed3db283be479f9deacaa0f,", "zk_avg_latency": 14, "zk_znode_count": 1142, "zk_watch_count": 159332, "zk_open_file_descriptor_count": 50, "zk_packets_received": 278374614, "zk_num_alive_connections": 8 } }, "server4": { "health_code": 0, "health_name": "green", "details": { "zk_packets_sent": 27723571, "zk_max_latency": 344801, "zk_server_state": "follower", "zk_outstanding_requests": 0, "zk_ephemerals_count": 33, "zk_min_latency": 0, "zk_max_file_descriptor_count": 100000, "zk_approximate_data_size": 3459898, "zk_version": "3.4.10-39d3a4f269333c922ed3db283be479f9deacaa0f,", "zk_avg_latency": 20, "zk_znode_count": 1142, "zk_watch_count": 85213, "zk_open_file_descriptor_count": 50, "zk_packets_received": 26959577, "zk_num_alive_connections": 10 } } }, "health_name": "green" } } }, "kafka": { "health_code": 2, "health_name": "red", "clusters": { "local": { "brokers": { "1": { "health_code": 0, "health_name": "green", "details": { "listener_security_protocol_map": { "PLAINTEXT": "PLAINTEXT" }, "endpoints": [ "PLAINTEXT://server4:9092" ], "jmx_port": -1, "port": 9092, "host": "server4", "id": "1", "version": 4, "timestamp": "1539681285784" } }, "2": { "health_code": 0, "health_name": "green", "details": { "listener_security_protocol_map": { "PLAINTEXT": "PLAINTEXT" }, "endpoints": [ "PLAINTEXT://server5:9092" ], "jmx_port": -1, "port": 9092, "host": "server5", "id": "2", "version": 4, "timestamp": "1534301326278" } }, "3": { "health_code": 2, "health_name": "red", "details": { "id": "3" } } }, "health_code": 2, "health_name": "red" } } }, "shiva": { "health_code": 1, "health_name": "yellow", "clusters": { "shivamain": { "health_code": 1, "health_name": "yellow", "details": { "cluster_name": "shivamain", "cluster": { "election_timestamp": "2018-10-06T18:54:46.869Z", "assignements": { "25838@server4": [ "/shiva/shivamain/tasks/punchplatform/mytenant/services/admin/elasticsearch_housekeeping_service", "/shiva/shivamain/tasks/punchplatform/mytenant/services/admin/kafka_service", "/shiva/shivamain/tasks/punchplatform/platform/services/admin/elasticsearch_housekeeping_service" ] }, "leader_id": "25838@server4", "unassigned_tasks": {} } } } } }, "platform": { "health_code": 2, "health_name": "red" } } |
Runtime monitoring¶
Each running component published metrics or logs in an elasticsearch. This allow a user to track stop/start activities or configuration changes. For instance, each component published uptime metric.
-
platform-<date>
: Contains the platform stack metrics status -
shiva-logs-<date>
: Contains the logs (STDOUT & STDIN) of the tasks running in shiva -
punchplatform
: Full status of the platform -
metricbeat-<version>-<date>
: Contains system metrics such as memory, CPU, storage -
<tenant>-metrics-<date>
: Contains the runtime metrics of storm topologies
Tracability¶
Git history¶
With git, all the past changes are stored.
At anytime, the history is accessible:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 | git log commit f4155ebb3a2524b4e474da6654689f9c8005ee74 Author: Loic JARDIN <loic.jardin@thalesgroup.com> Date: Mon Oct 15 18:07:30 2018 +0200 update ais commit 78b9a271873c62fcfb17a468c2acf876133f13a8 Author: Loic jardin <loic.jardin@thalesgroup.com> Date: Fri Oct 5 15:18:45 2018 +0200 rollback change commit b65114642c877be96a20d89dba9669d5794d36d1 Author: Loic JARDIN <loic.jardin@thalesgroup.com> Date: Thu Oct 4 14:52:10 2018 +0200 add operator cedric on livedemo |
And to have the full changes:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | git log -p commit b65114642c877be96a20d89dba9669d5794d36d1 Author: Loic JARDIN <loic.jardin@thalesgroup.com> Date: Thu Oct 4 14:52:10 2018 +0200 add operator cedric on livedemo diff --git a/platform/livedemo/generated_inventory/group_vars/git_servers b/platform/livedemo/generated_inventory/group_vars/git_servers index b488ef7..38c3b04 100644 --- a/platform/livedemo/generated_inventory/group_vars/git_servers +++ b/platform/livedemo/generated_inventory/group_vars/git_servers @@ -22,4 +22,6 @@ key_to_copy: - /tmp/rsa_pub/server4/home/sgrah/.ssh/id_rsa.pub + - /tmp/rsa_pub/server4/home/cedric/.ssh/id_rsa.pub + diff --git a/platform/livedemo/generated_inventory/group_vars/punchplatform_operator_servers b/platform/livedemo/generated_inventory/group_vars/punchplatform_operator_servers index 9726d0f..94bebc1 100644 --- a/platform/livedemo/generated_inventory/group_vars/punchplatform_operator_servers +++ b/platform/livedemo/generated_inventory/group_vars/punchplatform_operator_servers @@ -38,6 +38,7 @@ operators_username: - punchplatform - rpenco - sgrah + - cedric ####### ADMIN USER & GROUP ########## diff --git a/platform/livedemo/livedemo-punchplatform-deployment.settings b/platform/livedemo/livedemo-punchplatform-deployment.settings index 54b850c..9d29a8e 100644 --- a/platform/livedemo/livedemo-punchplatform-deployment.settings +++ b/platform/livedemo/livedemo-punchplatform-deployment.settings @@ -26,7 +26,7 @@ "punchplatform_operator" : { "configuration_name_dir_from_home" : "pp-conf-livedemo", - "operators_username" : ["loic","qmarinie","leo","dimi","obarbier","punchplatform","rpenco","sgrah"], + "operators_username" : ["loic","qmarinie","leo","dimi","obarbier","punchplatform","rpenco","sgrah","cedric"], "servers" : { "server4" : {} } |
Apache Storm¶
Storm logs contain the user and the topology name submitted.
example:
Here mytenant_ufw_processing topology has been submitted by the user loic
1 | 2018-10-18 10:16:50.093 o.a.s.d.nimbus [INFO] Setting new assignment for topology id mytenant_ufw_processing-10-1535456322: #org.apache.storm.daemon.common.Assignment{:master-code-dir "/data/storm", :node->host {"c026c4ff-23ce-4094-a229-284e036d0ec2" "server6"}, :executor->node+port {[4 4] ["c026c4ff-23ce-4094-a229-284e036d0ec2" 6707], [3 3] ["c026c4ff-23ce-4094-a229-284e036d0ec2" 6707], [2 2] ["c026c4ff-23ce-4094-a229-284e036d0ec2" 6707], [1 1] ["c026c4ff-23ce-4094-a229-284e036d0ec2" 6707]}, :executor->start-time-secs {[1 1] 1539850610, [2 2] 1539850610, [3 3] 1539850610, [4 4] 1539850610}, :worker->resources {["c026c4ff-23ce-4094-a229-284e036d0ec2" 6707] [0.0 0.0 0.0]}, :owner "loic"} |