source: nscp/modules/CheckSystem/PDHCollector.cpp @ d6c3131

0.4.00.4.10.4.2
Last change on this file since d6c3131 was d6c3131, checked in by Michael Medin <michael@…>, 13 months ago
  • Added a test to see if I can resolve the negative denominator issue.
  • Property mode set to 100644
File size: 9.9 KB
Line 
1//////////////////////////////////////////////////////////////////////////
2// PDH Collector
3//
4// Functions from this file collects data from the PDH subsystem and stores
5// it for later use
6// *NOTICE* that this is done in a separate thread so threading issues has
7// to be handled. I handle threading issues in the CounterListener's get/
8// set accessors.
9//
10// Copyright (c) 2004 MySolutions NORDIC (http://www.medin.nu)
11//
12// Date: 2004-03-13
13// Author: Michael Medin - <michael@medin.name>
14//
15// This software is provided "AS IS", without a warranty of any kind.
16// You are free to use/modify this code but leave this header intact.
17//
18//////////////////////////////////////////////////////////////////////////
19
20#include "stdafx.h"
21#include "PDHCollector.h"
22#include <sysinfo.h>
23#include "settings.hpp"
24
25PDHCollector::PDHCollector() : hStopEvent_(NULL)/*, data_(NULL)*/ {
26        std::wstring subsystem = SETTINGS_GET_STRING(check_system::PDH_SUBSYSTEM);
27        if (subsystem == setting_keys::check_system::PDH_SUBSYSTEM_FAST) {
28        } else if (subsystem == setting_keys::check_system::PDH_SUBSYSTEM_THREAD_SAFE) {
29                PDH::PDHFactory::set_threadSafe();
30        } else {
31                NSC_LOG_ERROR_STD(_T("Unknown PDH subsystem (") + subsystem + _T(") valid values are: fast and thread-safe"));
32        }
33}
34
35PDHCollector::~PDHCollector()
36{
37        if (hStopEvent_)
38                CloseHandle(hStopEvent_);
39//      delete data_;
40}
41
42boost::shared_ptr<PDHCollectors::PDHCollector> PDHCollector::system_counter_data::counter::create(int check_intervall) {
43        if (data_type == type_uint64 && data_format == format_large && collection_strategy == value) {
44                return boost::shared_ptr<PDHCollectors::PDHCollector>(new PDHCollectors::StaticPDHCounterListener<unsigned __int64, PDHCollectors::format_large, PDHCollectors::PDHCounterNormalMutex>);
45        } else if (data_type == type_int64 && data_format == format_large && collection_strategy == value) {
46                return boost::shared_ptr<PDHCollectors::PDHCollector>(new PDHCollectors::StaticPDHCounterListener<__int64, PDHCollectors::format_large, PDHCollectors::PDHCounterNormalMutex>);
47        } else if (data_type == type_int64 && data_format == format_large && collection_strategy == rrd) {
48                unsigned int buffer_size = get_buffer_length(check_intervall);
49                return boost::shared_ptr<PDHCollectors::PDHCollector>(new PDHCollectors::RoundINTPDHBufferListener<__int64, PDHCollectors::format_large, PDHCollectors::PDHCounterNormalMutex>(buffer_size));
50        }
51        return boost::shared_ptr<PDHCollectors::PDHCollector>();
52}
53
54
55/**
56* Thread that collects the data every "CHECK_INTERVAL" seconds.
57*
58* @param lpParameter Not used
59* @return thread exit status
60*
61* @author mickem
62*
63* @date 03-13-2004               
64*
65* @bug If we have "custom named" counters ?
66* @bug This whole concept needs work I think.
67*
68*/
69DWORD PDHCollector::threadProc(LPVOID lpParameter) {
70        hStopEvent_ = CreateEvent(NULL, TRUE, FALSE, NULL);
71        if (!hStopEvent_) {
72                NSC_LOG_ERROR_STD(_T("Create StopEvent failed: ") + error::lookup::last_error());
73                return 0;
74        }
75
76        system_counter_data *data = reinterpret_cast<system_counter_data*>(lpParameter);
77
78        check_intervall_ = data->check_intervall;
79        std::wstring default_buffer_length = data->buffer_length;
80        PDH::PDHQuery pdh;
81        bool bInit = true;
82
83        {
84                SetThreadLocale(MAKELCID(MAKELANGID(LANG_ENGLISH,SUBLANG_ENGLISH_US),SORT_DEFAULT));
85                WriteLock lock(&mutex_, true, 5000);
86                if (!lock.IsLocked()) {
87                        NSC_LOG_ERROR_STD(_T("Failed to get mutex when trying to start thread... thread will now die..."));
88                        bInit = false;
89                } else {
90                        pdh.removeAllCounters();
91                        NSC_DEBUG_MSG_STD(_T("Loading counters..."));
92                        BOOST_FOREACH(system_counter_data::counter c, data->counters) {
93                                try {
94                                        NSC_DEBUG_MSG_STD(_T("Loading counter: ") + c.alias + _T(" = ") + c.path);
95
96                                        c.set_default_buffer_size(default_buffer_length);
97                                        collector_ptr collector = c.create(check_intervall_);
98                                        if (collector) {
99                                                counters_[c.alias] = collector;
100                                                PDH::PDHQuery::counter_ptr counter = pdh.addCounter(c.path, collector);
101                                                PDH::PDHError status = counter->validate();
102                                                if (status.is_error()) {
103                                                        NSC_DEBUG_MSG_STD(_T("Counter status: ") + status.to_wstring());
104                                                }
105                                        } else {
106                                                NSC_LOG_ERROR_STD(_T("Failed to load counter: ") + c.alias + _T(" = ") + c.path);
107                                        }
108                                } catch (...) {
109                                        NSC_LOG_ERROR_STD(_T("EXCEPTION: Failed to load counter: ") + c.alias + _T(" = ") + c.path);
110                                }
111                        }
112                        try {
113                                pdh.open();
114                        } catch (const PDH::PDHException &e) {
115                                NSC_LOG_ERROR_STD(_T("Failed to open performance counters: ") + e.getError());
116                                bInit = false;
117                        }
118                }
119        }
120        data = NULL;
121        delete data;
122
123        DWORD waitStatus = 0;
124        if (bInit) {
125                bool first = true;
126                do {
127                        std::list<std::wstring> errors;
128                        {
129                                ReadLock lock(&mutex_, true, 5000);
130                                if (!lock.IsLocked()) {
131                                        NSC_LOG_ERROR(_T("Failed to get Mutex!"));
132                                } else {
133                                        try {
134                                                pdh.gatherData();
135                                        } catch (const PDH::PDHException &e) {
136                                                if (first) {    // If this is the first run an error will be thrown since the data is not yet available
137                                                        // This is "ok" but perhaps another solution would be better, but this works :)
138                                                        first = false;
139                                                } else {
140                                                        errors.push_back(_T("Failed to query performance counters: ") + e.getError());
141                                                }
142                                        } catch (...) {
143                                                errors.push_back(_T("Failed to query performance counters: "));
144                                        }
145                                }
146                        }
147                        for (std::list<std::wstring>::const_iterator cit = errors.begin(); cit != errors.end(); ++cit) {
148                                NSC_LOG_ERROR_STD(*cit);
149                        }
150                } while (((waitStatus = WaitForSingleObject(hStopEvent_, check_intervall_*100)) == WAIT_TIMEOUT));
151        } else {
152                NSC_LOG_ERROR_STD(_T("No performance counters were found we will not wait for the end instead..."));
153                waitStatus = WaitForSingleObject(hStopEvent_, INFINITE);
154        }
155        if (waitStatus != WAIT_OBJECT_0) {
156                NSC_LOG_ERROR(_T("Something odd happened when terminating PDH collection thread!"));
157        }
158
159        {
160                WriteLock lock(&mutex_, true, 5000);
161                if (!lock.IsLocked()) {
162                        NSC_LOG_ERROR(_T("Failed to get Mute when closing thread!"));
163                }
164
165                if (!CloseHandle(hStopEvent_)) {
166                        NSC_LOG_ERROR_STD(_T("Failed to close stopEvent handle: ") + error::lookup::last_error());
167                } else
168                        hStopEvent_ = NULL;
169                try {
170                        pdh.close();
171                } catch (const PDH::PDHException &e) {
172                        NSC_LOG_ERROR_STD(_T("Failed to close performance counters: ") + e.getError());
173                }
174        }
175        return 0;
176}
177
178__int64 PDHCollector::get_int_value(std::wstring counter) {
179        ReadLock lock(&mutex_, true, 5000);
180        if (!lock.IsLocked())  {
181                NSC_LOG_ERROR(_T("Failed to get Mutex for: ") + counter);
182                return 0;
183        }
184
185        counter_map::iterator it = counters_.find(counter);
186        if (it == counters_.end())
187                return 0;
188        collector_ptr ptr = (*it).second;
189        return ptr->get_int64();
190}
191
192double PDHCollector::get_avg_value(std::wstring counter, unsigned int delta) {
193        ReadLock lock(&mutex_, true, 5000);
194        if (!lock.IsLocked())  {
195                NSC_LOG_ERROR(_T("Failed to get Mutex for: ") + counter);
196                return 0;
197        }
198
199        counter_map::iterator it = counters_.find(counter);
200        if (it == counters_.end())
201                return 0;
202        collector_ptr ptr = (*it).second;
203        return ptr->get_average(delta);
204}
205
206
207/**
208* Request termination of the thread (waiting for thread termination is not handled)
209*/
210void PDHCollector::exitThread(void) {
211        if (hStopEvent_ == NULL) {
212                NSC_LOG_ERROR(_T("Stop event is not created!"));
213        } else if (!SetEvent(hStopEvent_)) {
214                        NSC_LOG_ERROR_STD(_T("SetStopEvent failed"));
215        }
216}
217/**
218* Get the average CPU usage for "time"
219* @param time Time to check
220* @return average CPU usage
221*/
222int PDHCollector::getCPUAvrage(std::wstring time) {
223        int frequency;
224        {
225                ReadLock lock(&mutex_, true, 5000);
226                if (!lock.IsLocked()) {
227                        NSC_LOG_ERROR(_T("Failed to get Mutex!"));
228                        return -1;
229                }
230                frequency = check_intervall_*100;
231
232        }
233        try {
234                unsigned int mseconds = strEx::stoui_as_time(time);
235                return static_cast<int>(get_avg_value(PDH_SYSTEM_KEY_CPU, mseconds/frequency));
236        } catch (PDHCollectors::PDHException &e) {
237                NSC_LOG_ERROR(_T("Failed to get CPU value: ") + e.getError());
238                return -1;
239        } catch (...) {
240                NSC_LOG_ERROR(_T("Failed to get CPU value"));
241                return -1;
242        }
243}
244/**
245* Get uptime from counter
246* @bug Do we need to collect this all the time ? (perhaps we can collect this in real time ?)
247* @return uptime for the system
248* @bug Are we overflow protected here ? (seem to recall some issues with overflow before ?)
249*/
250long long PDHCollector::getUptime() {
251        try {
252                return get_int_value(PDH_SYSTEM_KEY_UPT);
253        } catch (PDHCollectors::PDHException &e) {
254                NSC_LOG_ERROR(_T("Failed to get UPTIME value: ") + e.getError());
255                return -1;
256        } catch (...) {
257                NSC_LOG_ERROR(_T("Failed to get UPTIME value"));
258                return -1;
259        }
260}
261/**
262* Memory commit limit (your guess is as good as mine to what this is :)
263* @return Some form of memory check
264*/
265unsigned long long PDHCollector::getMemCommitLimit() {
266        try {
267                return get_int_value(PDH_SYSTEM_KEY_MCL);
268        } catch (PDHCollectors::PDHException &e) {
269                NSC_LOG_ERROR(_T("Failed to get MEM_CMT_LIMIT value: ") + e.getError());
270                return -1;
271        } catch (...) {
272                NSC_LOG_ERROR(_T("Failed to get MEM_CMT_LIMIT value"));
273                return -1;
274        }
275}
276/**
277*
278* Memory committed bytes (your guess is as good as mine to what this is :)
279* @return Some form of memory check
280*/
281unsigned long long PDHCollector::getMemCommit() {
282        try {
283                return get_int_value(PDH_SYSTEM_KEY_MCB);
284        } catch (PDHCollectors::PDHException &e) {
285                NSC_LOG_ERROR(_T("Failed to get MEM_CMT value: ") + e.getError());
286                return -1;
287        } catch (...) {
288                NSC_LOG_ERROR(_T("Failed to get MEM_CMT value"));
289                return -1;
290        }
291}
292
293double PDHCollector::get_double(std::wstring counter) {
294        ReadLock lock(&mutex_, true, 5000);
295        if (!lock.IsLocked()) {
296                NSC_LOG_ERROR(_T("Failed to get Mutex!"));
297                return -1;
298        }
299        try {
300                counter_map::iterator it = counters_.find(counter);
301                if (it == counters_.end()) {
302                        NSC_LOG_ERROR(_T("COunter not found: ") + counter);
303                        return -1;
304                }
305                return (*it).second->get_double();
306        } catch (PDHCollectors::PDHException &e) {
307                NSC_LOG_ERROR(_T("Failed to get double value: ") + e.getError());
308                return -1;
309        } catch (...) {
310                NSC_LOG_ERROR(_T("Failed to get double value"));
311                return -1;
312        }
313}
Note: See TracBrowser for help on using the repository browser.