ARM-software · joelagnel · Apr 23, 2017 · Apr 23, 2017 · Apr 23, 2017 · Apr 23, 2017
diff --git a/trappy/bare_trace.py b/trappy/bare_trace.py
@@ -27,12 +27,13 @@ class BareTrace(object):
 
     """
 
-    def __init__(self, name=""):
+    def __init__(self, name="", build_df=True):
         self.name = name
         self.normalized_time = False
         self.class_definitions = {}
         self.trace_classes = []
         self.basetime = 0
+        self.build_df = build_df
 
     def get_duration(self):
         """Returns the largest time value of all classes,
@@ -133,6 +134,8 @@ def add_parsed_event(self, name, dfr, pivot=None):
         setattr(self, name, event)
 
     def finalize_objects(self):
+        if not self.build_df:
+            return
         for trace_class in self.trace_classes:
             trace_class.create_dataframe()
             trace_class.finalize_object()
diff --git a/trappy/base.py b/trappy/base.py
@@ -105,6 +105,7 @@ def __init__(self, parse_raw=False):
         self.comm_array = []
         self.pid_array = []
         self.cpu_array = []
+        self.callback = None
         self.parse_raw = parse_raw
 
     def finalize_object(self):
@@ -171,42 +172,60 @@ def append_data(self, time, comm, pid, cpu, data):
         self.cpu_array.append(cpu)
         self.data_array.append(data)
 
+	if not self.callback:
+            return
+        data_dict = self.generate_data_dict(comm, pid, cpu, data)
+        self.callback(time, data_dict)
+
+    def generate_data_dict(self, comm, pid, cpu, data_str):
+        data_dict = {"__comm": comm, "__pid": pid, "__cpu": cpu}
+        prev_key = None
+        for field in data_str.split():
+            if "=" not in field:
+                # Concatenation is supported only for "string" values
+                if type(data_dict[prev_key]) is not str:
+                    continue
+                data_dict[prev_key] += ' ' + field
+                continue
+            (key, value) = field.split('=', 1)
+            try:
+                value = int(value)
+            except ValueError:
+                pass
+            data_dict[key] = value
+            prev_key = key
+        return data_dict
+
     def generate_parsed_data(self):
 
         # Get a rough idea of how much memory we have to play with
+        CHECK_MEM_COUNT = 10000
         kb_free = _get_free_memory_kb()
         starting_maxrss = getrusage(RUSAGE_SELF).ru_maxrss
         check_memory_usage = True
+        check_memory_count = 1
 
         for (comm, pid, cpu, data_str) in zip(self.comm_array, self.pid_array,
                                               self.cpu_array, self.data_array):
-            data_dict = {"__comm": comm, "__pid": pid, "__cpu": cpu}
-            prev_key = None
-            for field in data_str.split():
-                if "=" not in field:
-                    # Concatenation is supported only for "string" values
-                    if type(data_dict[prev_key]) is not str:
-                        continue
-                    data_dict[prev_key] += ' ' + field
-                    continue
-                (key, value) = field.split('=', 1)
-                try:
-                    value = int(value)
-                except ValueError:
-                    pass
-                data_dict[key] = value
-                prev_key = key
+            data_dict = self.generate_data_dict(comm, pid, cpu, data_str)
 
             # When running out of memory, Pandas has been observed to segfault
             # rather than throwing a proper Python error.
             # Look at how much memory our process is using and warn if we seem
-            # to be getting close to the system's limit.
+            # to be getting close to the system's limit, check it only once
+            # in the beginning and then every CHECK_MEM_COUNT events
+            check_memory_count -= 1
+            if check_memory_count != 0:
+                yield data_dict
+                continue
+
             kb_used = (getrusage(RUSAGE_SELF).ru_maxrss - starting_maxrss)
             if check_memory_usage and kb_free and kb_used > kb_free * 0.9:
                 warnings.warn("TRAPpy: Appear to be low on memory. "
                               "If errors arise, try providing more RAM")
                 check_memory_usage = False
 
+            check_memory_count = CHECK_MEM_COUNT
             yield data_dict
 
     def create_dataframe(self):
@@ -237,15 +256,3 @@ def write_csv(self, fname):
         :type fname: str
         """
         self.data_frame.to_csv(fname)
-
-    def normalize_time(self, basetime):
-        """Substract basetime from the Time of the data frame
-
-        :param basetime: The offset which needs to be subtracted from
-            the time index
-        :type basetime: float
-        """
-        if basetime and not self.data_frame.empty:
-            self.data_frame.reset_index(inplace=True)
-            self.data_frame["Time"] = self.data_frame["Time"] - basetime
-            self.data_frame.set_index("Time", inplace=True)
diff --git a/trappy/ftrace.py b/trappy/ftrace.py
@@ -54,8 +54,9 @@ class GenericFTrace(BareTrace):
     dynamic_classes = {}
 
     def __init__(self, name="", normalize_time=True, scope="all",
-                 events=[], window=(0, None), abs_window=(0, None)):
-        super(GenericFTrace, self).__init__(name)
+                 events=[], event_callbacks={}, window=(0, None),
+                 abs_window=(0, None), build_df=True):
+        super(GenericFTrace, self).__init__(name, build_df)
 
         if not hasattr(self, "needs_raw_parsing"):
             self.needs_raw_parsing = False
@@ -73,6 +74,8 @@ def __init__(self, name="", normalize_time=True, scope="all",
 
         for attr, class_def in self.class_definitions.iteritems():
             trace_class = class_def()
+            if event_callbacks.has_key(attr):
+                trace_class.callback = event_callbacks[attr]
             setattr(self, attr, trace_class)
             self.trace_classes.append(trace_class)
 
@@ -82,9 +85,6 @@ def __init__(self, name="", normalize_time=True, scope="all",
                                     raw=True)
         self.finalize_objects()
 
-        if normalize_time:
-            self.normalize_time()
-
     @classmethod
     def register_parser(cls, cobject, scope):
         """Register the class as an Event. This function
@@ -207,6 +207,9 @@ def contains_unique_word(line, unique_words=cls_for_unique_word.keys()):
             except AttributeError:
                 continue
 
+            if self.normalize_time:
+                timestamp = timestamp - self.basetime
+
             data_str = line[data_start_idx:]
 
             # Remove empty arrays from the trace
@@ -480,14 +483,16 @@ class FTrace(GenericFTrace):
     """
 
     def __init__(self, path=".", name="", normalize_time=True, scope="all",
-                 events=[], window=(0, None), abs_window=(0, None)):
+                 events=[], event_callbacks={}, window=(0, None),
+                 abs_window=(0, None), build_df=True):
         self.trace_path, self.trace_path_raw = self.__process_path(path)
         self.needs_raw_parsing = True
 
         self.__populate_metadata()
 
         super(FTrace, self).__init__(name, normalize_time, scope, events,
-                                     window, abs_window)
+                                     event_callbacks, window, abs_window,
+                                     build_df)
 
     def __process_path(self, basepath):
         """Process the path and return the path to the trace text file"""

diff --git a/trappy/sched.py b/trappy/sched.py
@@ -108,11 +108,10 @@ class SchedSwitch(Base):
     def __init__(self):
         super(SchedSwitch, self).__init__(parse_raw=True)
 
-    def create_dataframe(self):
-        self.data_array = [line.replace(" ==> ", " ", 1)
-                           for line in self.data_array]
-
-        super(SchedSwitch, self).create_dataframe()
+    def append_data(self, time, comm, pid, cpu, data):
+        data_rep = data.replace(" ==> ", " ")
+        super(SchedSwitch, self).append_data(time, comm, pid, cpu,
+                                             data_rep)
 
 register_ftrace_parser(SchedSwitch, "sched")
 

diff --git a/trappy/systrace.py b/trappy/systrace.py
@@ -50,13 +50,16 @@ class SysTrace(GenericFTrace):
     """
 
     def __init__(self, path=".", name="", normalize_time=True, scope="all",
-                 events=[], window=(0, None), abs_window=(0, None)):
+                 events=[], event_callbacks={}, window=(0, None),
+                 abs_window=(0, None), build_df=True):
 
         self.trace_path = path
 
         super(SysTrace, self).__init__(name, normalize_time, scope, events,
-                                       window, abs_window)
-
+                                       event_callbacks, window, abs_window,
+                                       build_df)
+        if not build_df:
+            return
         try:
             self._cpus = 1 + self.sched_switch.data_frame["__cpu"].max()
         except AttributeError: