-
Notifications
You must be signed in to change notification settings - Fork 33
[RFC] trappy: add support for event callbacks #241
Changes from all commits
2d4439c
bffdec0
b1974af
dd299a8
90c79d8
0dfed97
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -105,6 +105,7 @@ def __init__(self, parse_raw=False): | |
| self.comm_array = [] | ||
| self.pid_array = [] | ||
| self.cpu_array = [] | ||
| self.callback = None | ||
| self.parse_raw = parse_raw | ||
|
|
||
| def finalize_object(self): | ||
|
|
@@ -171,42 +172,60 @@ def append_data(self, time, comm, pid, cpu, data): | |
| self.cpu_array.append(cpu) | ||
| self.data_array.append(data) | ||
|
|
||
| if not self.callback: | ||
| return | ||
| data_dict = self.generate_data_dict(comm, pid, cpu, data) | ||
| self.callback(time, data_dict) | ||
|
|
||
| def generate_data_dict(self, comm, pid, cpu, data_str): | ||
| data_dict = {"__comm": comm, "__pid": pid, "__cpu": cpu} | ||
| prev_key = None | ||
| for field in data_str.split(): | ||
| if "=" not in field: | ||
| # Concatenation is supported only for "string" values | ||
| if type(data_dict[prev_key]) is not str: | ||
| continue | ||
| data_dict[prev_key] += ' ' + field | ||
| continue | ||
| (key, value) = field.split('=', 1) | ||
| try: | ||
| value = int(value) | ||
| except ValueError: | ||
| pass | ||
| data_dict[key] = value | ||
| prev_key = key | ||
| return data_dict | ||
|
|
||
| def generate_parsed_data(self): | ||
|
|
||
| # Get a rough idea of how much memory we have to play with | ||
| CHECK_MEM_COUNT = 10000 | ||
| kb_free = _get_free_memory_kb() | ||
| starting_maxrss = getrusage(RUSAGE_SELF).ru_maxrss | ||
| check_memory_usage = True | ||
| check_memory_count = 1 | ||
|
|
||
| for (comm, pid, cpu, data_str) in zip(self.comm_array, self.pid_array, | ||
| self.cpu_array, self.data_array): | ||
| data_dict = {"__comm": comm, "__pid": pid, "__cpu": cpu} | ||
| prev_key = None | ||
| for field in data_str.split(): | ||
| if "=" not in field: | ||
| # Concatenation is supported only for "string" values | ||
| if type(data_dict[prev_key]) is not str: | ||
| continue | ||
| data_dict[prev_key] += ' ' + field | ||
| continue | ||
| (key, value) = field.split('=', 1) | ||
| try: | ||
| value = int(value) | ||
| except ValueError: | ||
| pass | ||
| data_dict[key] = value | ||
| prev_key = key | ||
| data_dict = self.generate_data_dict(comm, pid, cpu, data_str) | ||
|
|
||
| # When running out of memory, Pandas has been observed to segfault | ||
| # rather than throwing a proper Python error. | ||
| # Look at how much memory our process is using and warn if we seem | ||
| # to be getting close to the system's limit. | ||
| # to be getting close to the system's limit, check it only once | ||
| # in the beginning and then every CHECK_MEM_COUNT events | ||
| check_memory_count -= 1 | ||
| if check_memory_count != 0: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I find this confusing - IMO this would be much clearer (no Not sure if this is just me..
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. PS this
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Brendan, the memory limit has never been a problem for me even for large data frames, I don't mind ripping it out altogether unless anyone else has an objection? Not sure how much RAM you had, but why not rip it out yourself and see if you still hit it with new pandas? I agree its pretty ugly.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, I'll give it a try (I was using a VM with 1Gb memory).
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, I tested it and pandas still segfaults when you run out of memory, so we'll need to keep this hack. |
||
| yield data_dict | ||
| continue | ||
|
|
||
| kb_used = (getrusage(RUSAGE_SELF).ru_maxrss - starting_maxrss) | ||
| if check_memory_usage and kb_free and kb_used > kb_free * 0.9: | ||
| warnings.warn("TRAPpy: Appear to be low on memory. " | ||
| "If errors arise, try providing more RAM") | ||
| check_memory_usage = False | ||
|
|
||
| check_memory_count = CHECK_MEM_COUNT | ||
| yield data_dict | ||
|
|
||
| def create_dataframe(self): | ||
|
|
@@ -237,15 +256,3 @@ def write_csv(self, fname): | |
| :type fname: str | ||
| """ | ||
| self.data_frame.to_csv(fname) | ||
|
|
||
| def normalize_time(self, basetime): | ||
| """Substract basetime from the Time of the data frame | ||
|
|
||
| :param basetime: The offset which needs to be subtracted from | ||
| the time index | ||
| :type basetime: float | ||
| """ | ||
| if basetime and not self.data_frame.empty: | ||
| self.data_frame.reset_index(inplace=True) | ||
| self.data_frame["Time"] = self.data_frame["Time"] - basetime | ||
| self.data_frame.set_index("Time", inplace=True) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why not making the callback even more generic by passing back the exact signature used by
generate_data_dict?I'm asking that for two main reasons:
generate_data_dictcan still be used from the callback if requiredAs you can see, the
dataportion of these events cannot be parsed bygenerate_data_dict.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I didn't follow how you wanted this done, can you give an example?
I think if this is to be made generic, it should be made so internal to trappy. The user's callback implementation shouldn't have any business with parsing strings in my opinion, trappy should parse them in a generic way and pass some generic data type to the callback. so in your trace for systrace, it should pass to the callback something like { '__comm': 'surfaceflinger', __pid: 543, 'mark': 'begin', 'name': 'query' }