source: nscp/modules/CheckSystem/PDHCollector.cpp @ ee230f7

0.4.10.4.2
Last change on this file since ee230f7 was ee230f7, checked in by Michael Medin <michael@…>, 11 months ago
  • Hopefully fixed the "cant load counter" issue by reowrking how counters are handled.
  • Property mode set to 100644
File size: 9.8 KB
Line 
1//////////////////////////////////////////////////////////////////////////
2// PDH Collector
3//
4// Functions from this file collects data from the PDH subsystem and stores
5// it for later use
6// *NOTICE* that this is done in a separate thread so threading issues has
7// to be handled. I handle threading issues in the CounterListener's get/
8// set accessors.
9//
10// Copyright (c) 2004 MySolutions NORDIC (http://www.medin.nu)
11//
12// Date: 2004-03-13
13// Author: Michael Medin - <michael@medin.name>
14//
15// This software is provided "AS IS", without a warranty of any kind.
16// You are free to use/modify this code but leave this header intact.
17//
18//////////////////////////////////////////////////////////////////////////
19
20#include "stdafx.h"
21#include "PDHCollector.h"
22#include <sysinfo.h>
23#include "settings.hpp"
24
25PDHCollector::PDHCollector() : stop_event_(NULL) {
26}
27
28PDHCollector::~PDHCollector()
29{
30        if (stop_event_ != NULL)
31                CloseHandle(stop_event_);
32}
33
34boost::shared_ptr<PDHCollectors::PDHCollector> PDHCollector::system_counter_data::counter::create(int check_intervall) {
35        if (data_type == type_uint64 && data_format == format_large && collection_strategy == value) {
36                return boost::shared_ptr<PDHCollectors::PDHCollector>(new PDHCollectors::StaticPDHCounterListener<unsigned __int64, PDHCollectors::format_large, PDHCollectors::PDHCounterNormalMutex>);
37        } else if (data_type == type_int64 && data_format == format_large && collection_strategy == value) {
38                return boost::shared_ptr<PDHCollectors::PDHCollector>(new PDHCollectors::StaticPDHCounterListener<__int64, PDHCollectors::format_large, PDHCollectors::PDHCounterNormalMutex>);
39        } else if (data_type == type_int64 && data_format == format_large && collection_strategy == rrd) {
40                unsigned int buffer_size = get_buffer_length(check_intervall);
41                return boost::shared_ptr<PDHCollectors::PDHCollector>(new PDHCollectors::RoundINTPDHBufferListener<__int64, PDHCollectors::format_large, PDHCollectors::PDHCounterNormalMutex>(buffer_size));
42        }
43        return boost::shared_ptr<PDHCollectors::PDHCollector>();
44}
45
46
47/**
48* Thread that collects the data every "CHECK_INTERVAL" seconds.
49*
50* @param lpParameter Not used
51* @return thread exit status
52*
53* @author mickem
54*
55* @date 03-13-2004               
56*
57* @bug If we have "custom named" counters ?
58* @bug This whole concept needs work I think.
59*
60*/
61void PDHCollector::thread_proc() {
62
63        if (!thread_data_) {
64                NSC_LOG_ERROR_STD(_T("No configuration for PDH thread: Exiting"));
65                return;
66
67        }
68        if (thread_data_->subsystem == _T("fast") || thread_data_->subsystem == _T("auto")) {
69        } else if (thread_data_->subsystem == _T("thread-safe")) {
70                PDH::PDHFactory::set_threadSafe();
71        } else {
72                NSC_LOG_ERROR_STD(_T("Unknown PDH subsystem (") + thread_data_->subsystem + _T(") valid values are: fast (auto) and thread-safe"));
73        }
74
75        check_intervall_ = thread_data_->check_intervall;
76        std::wstring default_buffer_length = thread_data_->buffer_length;
77        PDH::PDHQuery pdh;
78
79        if (thread_data_->counters.empty()) {
80                NSC_LOG_ERROR_STD(_T("No counters configure in PDH thread."));
81                return;
82        }
83
84        {
85                SetThreadLocale(MAKELCID(MAKELANGID(LANG_ENGLISH,SUBLANG_ENGLISH_US),SORT_DEFAULT));
86                boost::unique_lock<boost::shared_mutex> writeLock(mutex_, boost::get_system_time() + boost::posix_time::seconds(10));
87                if (!writeLock.owns_lock()) {
88                        NSC_LOG_ERROR_STD(_T("Failed to get mutex when trying to start thread."));
89                        return;
90                }
91                pdh.removeAllCounters();
92                BOOST_FOREACH(system_counter_data::counter c, thread_data_->counters) {
93                        try {
94                                NSC_DEBUG_MSG_STD(_T("Loading counter: ") + c.alias + _T(" = ") + c.path);
95                                c.set_default_buffer_size(default_buffer_length);
96                                collector_ptr collector = c.create(check_intervall_);
97                                if (collector) {
98                                        counters_[c.alias] = collector;
99                                        pdh.addCounter(c.path, collector);
100                                } else {
101                                        NSC_LOG_ERROR_STD(_T("Failed to load counter: ") + c.alias + _T(" = ") + c.path);
102                                }
103                        } catch (...) {
104                                NSC_LOG_ERROR_STD(_T("EXCEPTION: Failed to load counter: ") + c.alias + _T(" = ") + c.path);
105                        }
106                }
107                try {
108                        pdh.open();
109                } catch (const PDH::PDHException &e) {
110                        NSC_LOG_ERROR_STD(_T("Failed to open performance counters: ") + e.getError());
111                        return;
112                }
113        }
114
115        DWORD waitStatus = 0;
116        bool first = true;
117        do {
118                std::list<std::wstring> errors;
119                {
120                        boost::unique_lock<boost::shared_mutex> writeLock(mutex_, boost::get_system_time() + boost::posix_time::seconds(5));
121                        if (!writeLock.owns_lock()) {
122                                NSC_LOG_ERROR(_T("Failed to get Mutex!"));
123                        } else {
124                                try {
125                                        pdh.gatherData();
126                                } catch (const PDH::PDHException &e) {
127                                        if (first) {    // If this is the first run an error will be thrown since the data is not yet available
128                                                // This is "ok" but perhaps another solution would be better, but this works :)
129                                                first = false;
130                                        } else {
131                                                errors.push_back(_T("Failed to query performance counters: ") + e.getError());
132                                        }
133                                } catch (...) {
134                                        errors.push_back(_T("Failed to query performance counters: "));
135                                }
136                        }
137                }
138                for (std::list<std::wstring>::const_iterator cit = errors.begin(); cit != errors.end(); ++cit) {
139                        NSC_LOG_ERROR_STD(*cit);
140                }
141        } while (((waitStatus = WaitForSingleObject(stop_event_, check_intervall_*100)) == WAIT_TIMEOUT));
142        if (waitStatus != WAIT_OBJECT_0) {
143                NSC_LOG_ERROR(_T("Something odd happened when terminating PDH collection thread!"));
144                return;
145        }
146
147        {
148                boost::unique_lock<boost::shared_mutex> writeLock(mutex_, boost::get_system_time() + boost::posix_time::seconds(5));
149                if (!writeLock.owns_lock()) {
150                        NSC_LOG_ERROR(_T("Failed to get Mute when closing thread!"));
151                }
152                try {
153                        pdh.close();
154                } catch (const PDH::PDHException &e) {
155                        NSC_LOG_ERROR_STD(_T("Failed to close performance counters: ") + e.getError());
156                }
157        }
158}
159
160__int64 PDHCollector::get_int_value(std::wstring counter) {
161        boost::shared_lock<boost::shared_mutex> readLock(mutex_, boost::get_system_time() + boost::posix_time::seconds(5));
162        if (!readLock.owns_lock()) {
163                NSC_LOG_ERROR(_T("Failed to get Mutex for: ") + counter);
164                return 0;
165        }
166
167        counter_map::iterator it = counters_.find(counter);
168        if (it == counters_.end())
169                return 0;
170        collector_ptr ptr = (*it).second;
171        return ptr->get_int64();
172}
173
174double PDHCollector::get_avg_value(std::wstring counter, unsigned int delta) {
175        boost::shared_lock<boost::shared_mutex> readLock(mutex_, boost::get_system_time() + boost::posix_time::seconds(5));
176        if (!readLock.owns_lock()) {
177                NSC_LOG_ERROR(_T("Failed to get Mutex for: ") + counter);
178                return 0;
179        }
180
181        counter_map::iterator it = counters_.find(counter);
182        if (it == counters_.end())
183                return 0;
184        collector_ptr ptr = (*it).second;
185        return ptr->get_average(delta);
186}
187
188
189/**
190* Get the average CPU usage for "time"
191* @param time Time to check
192* @return average CPU usage
193*/
194int PDHCollector::getCPUAvrage(std::wstring time) {
195        int frequency;
196        {
197                boost::shared_lock<boost::shared_mutex> readLock(mutex_, boost::get_system_time() + boost::posix_time::seconds(5));
198                if (!readLock.owns_lock()) {
199                        NSC_LOG_ERROR(_T("Failed to get Mutex!"));
200                        return -1;
201                }
202                frequency = check_intervall_*100;
203
204        }
205        try {
206                unsigned int mseconds = strEx::stoui_as_time(time);
207                return static_cast<int>(get_avg_value(PDH_SYSTEM_KEY_CPU, mseconds/frequency));
208        } catch (PDHCollectors::PDHException &e) {
209                NSC_LOG_ERROR(_T("Failed to get CPU value: ") + e.getError());
210                return -1;
211        } catch (...) {
212                NSC_LOG_ERROR(_T("Failed to get CPU value"));
213                return -1;
214        }
215}
216/**
217* Get uptime from counter
218* @bug Do we need to collect this all the time ? (perhaps we can collect this in real time ?)
219* @return uptime for the system
220* @bug Are we overflow protected here ? (seem to recall some issues with overflow before ?)
221*/
222long long PDHCollector::getUptime() {
223        try {
224                return get_int_value(PDH_SYSTEM_KEY_UPT);
225        } catch (PDHCollectors::PDHException &e) {
226                NSC_LOG_ERROR(_T("Failed to get UPTIME value: ") + e.getError());
227                return -1;
228        } catch (...) {
229                NSC_LOG_ERROR(_T("Failed to get UPTIME value"));
230                return -1;
231        }
232}
233/**
234* Memory commit limit (your guess is as good as mine to what this is :)
235* @return Some form of memory check
236*/
237unsigned long long PDHCollector::getMemCommitLimit() {
238        try {
239                return get_int_value(PDH_SYSTEM_KEY_MCL);
240        } catch (PDHCollectors::PDHException &e) {
241                NSC_LOG_ERROR(_T("Failed to get MEM_CMT_LIMIT value: ") + e.getError());
242                return -1;
243        } catch (...) {
244                NSC_LOG_ERROR(_T("Failed to get MEM_CMT_LIMIT value"));
245                return -1;
246        }
247}
248/**
249*
250* Memory committed bytes (your guess is as good as mine to what this is :)
251* @return Some form of memory check
252*/
253unsigned long long PDHCollector::getMemCommit() {
254        try {
255                return get_int_value(PDH_SYSTEM_KEY_MCB);
256        } catch (PDHCollectors::PDHException &e) {
257                NSC_LOG_ERROR(_T("Failed to get MEM_CMT value: ") + e.getError());
258                return -1;
259        } catch (...) {
260                NSC_LOG_ERROR(_T("Failed to get MEM_CMT value"));
261                return -1;
262        }
263}
264
265double PDHCollector::get_double(std::wstring counter) {
266        boost::shared_lock<boost::shared_mutex> readLock(mutex_, boost::get_system_time() + boost::posix_time::seconds(5));
267        if (!readLock.owns_lock()) {
268                NSC_LOG_ERROR(_T("Failed to get Mutex!"));
269                return -1;
270        }
271        try {
272                counter_map::iterator it = counters_.find(counter);
273                if (it == counters_.end()) {
274                        NSC_LOG_ERROR(_T("COunter not found: ") + counter);
275                        return -1;
276                }
277                return (*it).second->get_double();
278        } catch (PDHCollectors::PDHException &e) {
279                NSC_LOG_ERROR(_T("Failed to get double value: ") + e.getError());
280                return -1;
281        } catch (...) {
282                NSC_LOG_ERROR(_T("Failed to get double value"));
283                return -1;
284        }
285}
286
287void PDHCollector::start(boost::shared_ptr<system_counter_data> data)
288{
289        if (thread_)
290                return;
291        thread_data_ = data;
292        stop_event_ = CreateEvent(NULL, TRUE, FALSE, _T("PDHCollectorShutdown"));
293        thread_ = boost::shared_ptr<boost::thread>(new boost::thread(boost::bind(&PDHCollector::thread_proc, this)));
294}
295
296bool PDHCollector::stop()
297{
298        SetEvent(stop_event_);
299        if (thread_)
300                return thread_->timed_join(boost::posix_time::seconds(5));
301        return true;
302}
Note: See TracBrowser for help on using the repository browser.