Skip to content
This repository was archived by the owner on Nov 26, 2021. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion trappy/bare_trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,13 @@ class BareTrace(object):

"""

def __init__(self, name=""):
def __init__(self, name="", build_df=True):
self.name = name
self.normalized_time = False
self.class_definitions = {}
self.trace_classes = []
self.basetime = 0
self.build_df = build_df

def get_duration(self):
"""Returns the largest time value of all classes,
Expand Down Expand Up @@ -133,6 +134,8 @@ def add_parsed_event(self, name, dfr, pivot=None):
setattr(self, name, event)

def finalize_objects(self):
if not self.build_df:
return
for trace_class in self.trace_classes:
trace_class.create_dataframe()
trace_class.finalize_object()
65 changes: 36 additions & 29 deletions trappy/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ def __init__(self, parse_raw=False):
self.comm_array = []
self.pid_array = []
self.cpu_array = []
self.callback = None
self.parse_raw = parse_raw

def finalize_object(self):
Expand Down Expand Up @@ -171,42 +172,60 @@ def append_data(self, time, comm, pid, cpu, data):
self.cpu_array.append(cpu)
self.data_array.append(data)

if not self.callback:
return
data_dict = self.generate_data_dict(comm, pid, cpu, data)
self.callback(time, data_dict)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not making the callback even more generic by passing back the exact signature used by generate_data_dict?

I'm asking that for two main reasons:

  1. the generate_data_dict can still be used from the callback if required
  2. I have a WIP series which aims at adding parsing for non key-value formatted events, like for example the events injected from user-space by the Android run-time. This is an example of these events:
surfaceflinger-543   (  543) [002] ...1  3210.843141: tracing_mark_write: B|543|query
surfaceflinger-543   (  543) [002] ...1  3210.843145: tracing_mark_write: E
surfaceflinger-543   (  543) [002] ...1  3210.843149: tracing_mark_write: B|543|updateTexImage
surfaceflinger-543   (  543) [002] ...1  3210.843168: tracing_mark_write: B|543|acquireBuffer
surfaceflinger-543   (  543) [002] ...1  3210.843178: tracing_mark_write: B|543|com.prefabulated.touchlatency/com.prefabulated.touchlatency.TouchLatencyActivity#0: 1
surfaceflinger-543   (  543) [002] ...1  3210.843180: tracing_mark_write: E
surfaceflinger-543   (  543) [002] ...1  3210.843187: tracing_mark_write: C|543|com.prefabulated.touchlatency/com.prefabulated.touchlatency.TouchLatencyActivity#0|0
surfaceflinger-543   (  543) [002] ...1  3210.843208: tracing_mark_write: E

As you can see, the data portion of these events cannot be parsed by generate_data_dict.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't follow how you wanted this done, can you give an example?

I think if this is to be made generic, it should be made so internal to trappy. The user's callback implementation shouldn't have any business with parsing strings in my opinion, trappy should parse them in a generic way and pass some generic data type to the callback. so in your trace for systrace, it should pass to the callback something like { '__comm': 'surfaceflinger', __pid: 543, 'mark': 'begin', 'name': 'query' }


def generate_data_dict(self, comm, pid, cpu, data_str):
data_dict = {"__comm": comm, "__pid": pid, "__cpu": cpu}
prev_key = None
for field in data_str.split():
if "=" not in field:
# Concatenation is supported only for "string" values
if type(data_dict[prev_key]) is not str:
continue
data_dict[prev_key] += ' ' + field
continue
(key, value) = field.split('=', 1)
try:
value = int(value)
except ValueError:
pass
data_dict[key] = value
prev_key = key
return data_dict

def generate_parsed_data(self):

# Get a rough idea of how much memory we have to play with
CHECK_MEM_COUNT = 10000
kb_free = _get_free_memory_kb()
starting_maxrss = getrusage(RUSAGE_SELF).ru_maxrss
check_memory_usage = True
check_memory_count = 1

for (comm, pid, cpu, data_str) in zip(self.comm_array, self.pid_array,
self.cpu_array, self.data_array):
data_dict = {"__comm": comm, "__pid": pid, "__cpu": cpu}
prev_key = None
for field in data_str.split():
if "=" not in field:
# Concatenation is supported only for "string" values
if type(data_dict[prev_key]) is not str:
continue
data_dict[prev_key] += ' ' + field
continue
(key, value) = field.split('=', 1)
try:
value = int(value)
except ValueError:
pass
data_dict[key] = value
prev_key = key
data_dict = self.generate_data_dict(comm, pid, cpu, data_str)

# When running out of memory, Pandas has been observed to segfault
# rather than throwing a proper Python error.
# Look at how much memory our process is using and warn if we seem
# to be getting close to the system's limit.
# to be getting close to the system's limit, check it only once
# in the beginning and then every CHECK_MEM_COUNT events
check_memory_count -= 1
if check_memory_count != 0:
Copy link
Contributor

@bjackman bjackman Apr 24, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find this confusing - IMO this would be much clearer (no continue):

        if check_memory_usage and check_memory_count == 0:
             kb_used = (getrusage(RUSAGE_SELF).ru_maxrss - starting_maxrss)
             if  kb_free and kb_used > kb_free * 0.9:
                 warnings.warn("TRAPpy: Appear to be low on memory. "
                               "If errors arise, try providing more RAM")
                 check_memory_usage = False
             check_memory_count = CHECK_MEM_COUNT
        
        yield data_dict

Not sure if this is just me..

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PS this getrusage stuff was originally added (by me) to get around the fact that Pandas was segfaulting, instead of raising a proper Python error, when it ran out of memory. It might be worth checking that the latest Pandas still has that problem; if it doesn't maybe we can drop this stuff entirely as it's pretty ugly.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Brendan, the memory limit has never been a problem for me even for large data frames, I don't mind ripping it out altogether unless anyone else has an objection?

Not sure how much RAM you had, but why not rip it out yourself and see if you still hit it with new pandas? I agree its pretty ugly.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, I'll give it a try (I was using a VM with 1Gb memory).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I tested it and pandas still segfaults when you run out of memory, so we'll need to keep this hack.

yield data_dict
continue

kb_used = (getrusage(RUSAGE_SELF).ru_maxrss - starting_maxrss)
if check_memory_usage and kb_free and kb_used > kb_free * 0.9:
warnings.warn("TRAPpy: Appear to be low on memory. "
"If errors arise, try providing more RAM")
check_memory_usage = False

check_memory_count = CHECK_MEM_COUNT
yield data_dict

def create_dataframe(self):
Expand Down Expand Up @@ -237,15 +256,3 @@ def write_csv(self, fname):
:type fname: str
"""
self.data_frame.to_csv(fname)

def normalize_time(self, basetime):
"""Substract basetime from the Time of the data frame

:param basetime: The offset which needs to be subtracted from
the time index
:type basetime: float
"""
if basetime and not self.data_frame.empty:
self.data_frame.reset_index(inplace=True)
self.data_frame["Time"] = self.data_frame["Time"] - basetime
self.data_frame.set_index("Time", inplace=True)
19 changes: 12 additions & 7 deletions trappy/ftrace.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,9 @@ class GenericFTrace(BareTrace):
dynamic_classes = {}

def __init__(self, name="", normalize_time=True, scope="all",
events=[], window=(0, None), abs_window=(0, None)):
super(GenericFTrace, self).__init__(name)
events=[], event_callbacks={}, window=(0, None),
abs_window=(0, None), build_df=True):
super(GenericFTrace, self).__init__(name, build_df)

if not hasattr(self, "needs_raw_parsing"):
self.needs_raw_parsing = False
Expand All @@ -73,6 +74,8 @@ def __init__(self, name="", normalize_time=True, scope="all",

for attr, class_def in self.class_definitions.iteritems():
trace_class = class_def()
if event_callbacks.has_key(attr):
trace_class.callback = event_callbacks[attr]
setattr(self, attr, trace_class)
self.trace_classes.append(trace_class)

Expand All @@ -82,9 +85,6 @@ def __init__(self, name="", normalize_time=True, scope="all",
raw=True)
self.finalize_objects()

if normalize_time:
self.normalize_time()

@classmethod
def register_parser(cls, cobject, scope):
"""Register the class as an Event. This function
Expand Down Expand Up @@ -207,6 +207,9 @@ def contains_unique_word(line, unique_words=cls_for_unique_word.keys()):
except AttributeError:
continue

if self.normalize_time:
timestamp = timestamp - self.basetime

data_str = line[data_start_idx:]

# Remove empty arrays from the trace
Expand Down Expand Up @@ -480,14 +483,16 @@ class FTrace(GenericFTrace):
"""

def __init__(self, path=".", name="", normalize_time=True, scope="all",
events=[], window=(0, None), abs_window=(0, None)):
events=[], event_callbacks={}, window=(0, None),
abs_window=(0, None), build_df=True):
self.trace_path, self.trace_path_raw = self.__process_path(path)
self.needs_raw_parsing = True

self.__populate_metadata()

super(FTrace, self).__init__(name, normalize_time, scope, events,
window, abs_window)
event_callbacks, window, abs_window,
build_df)

def __process_path(self, basepath):
"""Process the path and return the path to the trace text file"""
Expand Down
9 changes: 4 additions & 5 deletions trappy/sched.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,10 @@ class SchedSwitch(Base):
def __init__(self):
super(SchedSwitch, self).__init__(parse_raw=True)

def create_dataframe(self):
self.data_array = [line.replace(" ==> ", " ", 1)
for line in self.data_array]

super(SchedSwitch, self).create_dataframe()
def append_data(self, time, comm, pid, cpu, data):
data_rep = data.replace(" ==> ", " ")
super(SchedSwitch, self).append_data(time, comm, pid, cpu,
data_rep)

register_ftrace_parser(SchedSwitch, "sched")

Expand Down
9 changes: 6 additions & 3 deletions trappy/systrace.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,16 @@ class SysTrace(GenericFTrace):
"""

def __init__(self, path=".", name="", normalize_time=True, scope="all",
events=[], window=(0, None), abs_window=(0, None)):
events=[], event_callbacks={}, window=(0, None),
abs_window=(0, None), build_df=True):

self.trace_path = path

super(SysTrace, self).__init__(name, normalize_time, scope, events,
window, abs_window)

event_callbacks, window, abs_window,
build_df)
if not build_df:
return
try:
self._cpus = 1 + self.sched_switch.data_frame["__cpu"].max()
except AttributeError:
Expand Down