Merge pull request #1336 from MetadataForensics/patch-7

stark4n6 · web-flow · commit 115dc8f8c0f4 · 2025-12-05T11:09:53.000-05:00
Update SMSmissingROWIDs.py
diff --git a/scripts/artifacts/SMSmissingROWIDs.py b/scripts/artifacts/SMSmissingROWIDs.py
@@ -1,82 +1,108 @@
-# Module Description: Parses missing ROWID values from the SMS.db, presents the number of missing rows, and provides timestamps for data rows before and after the missing data
-# Author: @SQL_McGee
-# Date: 2023-03-20
-# Artifact version: 0.0.1
-# Requirements: none
-
-# This query was the product of research completed by James McGee, Metadata Forensics, LLC, for "Lagging for the Win", published by Belkasoft
-# https://belkasoft.com/lagging-for-win
-
-import sqlite3
-import textwrap
+__artifacts_v2__ = {
+    "SMS_Missing_ROWIDs": {
+        "name": "SMS - Missing ROWIDs",
+        "description": "Parses missing ROWID values from the SMS.db, presents the number of missing rows, and provides timestamps for data rows before and after the missing data",
+        "author": "@SQLMcGee for Metadata Forensics, LLC",
+        "creation_date": "2023-03-20",
+        "last_update_date": "2025-11-13",
+        "requirements": "none",
+        "category": "SMS & iMessage",
+        "notes": "This query was the product of research completed by James McGee, Metadata Forensics, LLC, for 'Lagging for the Win', published by Belkasoft https://belkasoft.com/lagging-for-win, updated upon further research",
+        "paths": ("*SMS/sms*"),
+        "output_types": "standard",
+        "artifact_icon": "message-circle"
+    }
+}
 
-from packaging import version
-from scripts.artifact_report import ArtifactHtmlReport
-from scripts.ilapfuncs import logfunc, logdevinfo, tsv, timeline, is_platform_windows, open_sqlite_db_readonly
+from scripts.ilapfuncs import artifact_processor, get_sqlite_db_records, convert_cocoa_core_data_ts_to_utc
 
-def get_SMS(files_found, report_folder, seeker, wrap_text, timezone_offset):
-    
-    sms = ''
+@artifact_processor
+def SMS_Missing_ROWIDs(context):
+    """ See artifact description """
+    data_source = context.get_source_file_path('sms.db')
     
-    for file_found in files_found:
-        file_name = str(file_found)
-        if file_name.endswith('sms.db'):
-            sms = str(file_found)
-            source_file_sms = file_found.replace(seeker.data_folder, '')
-   
-    db = open_sqlite_db_readonly(sms)
+    data_list = []
     
-    cursor = db.cursor()
-    
-    cursor.execute('''
+    query = '''
+	WITH LastROWID AS (
+        SELECT seq AS last_rowid
+        FROM sqlite_sequence
+        WHERE sqlite_sequence.name = 'message'
+    )
     SELECT * FROM (
-        SELECT 
-        CASE -- This column is the same as the very first column but obtaining timestamp instead of the ROWID value. A CASE statement is used to capture data whether using seconds since Jan 1, 1970 or microseconds since Jan 1, 1970
-            WHEN length(DATE) = 18 
-            THEN LAG(DATETIME(DATE/1000000000 + 978307200, 'UNIXEPOCH'),1) OVER (ORDER BY ROWID) 
-            WHEN length(DATE) = 9
-            THEN LAG(DATETIME(DATE + 978307200, 'UNIXEPOCH'),1) OVER (ORDER BY ROWID)
-            END AS "Beginning Timestamp",
-        CASE -- Finally, this last column obtains the timestamp for the row following the missing row
-            WHEN length(DATE) = 18 
-            THEN DATETIME(DATE/1000000000 + 978307200, 'UNIXEPOCH') 
-            WHEN length(DATE) = 9
-            THEN DATETIME(DATE + 978307200, 'UNIXEPOCH')
-            END  AS "Ending Timestamp",
-		LAG (ROWID,1) OVER (ORDER BY ROWID) AS "Previous ROWID", -- This column uses the LAG function to obtain the ROWID value prior to a missing row
-        ROWID AS "ROWID", -- This column obtains the ROWID value following the missing row
-        (ROWID - (LAG (ROWID,1) OVER (ORDER BY ROWID)) - 1) AS "Number of Missing Rows" -- This column is a subtraction of the first two columns, minus one additional value, to obtain the number of missing rows
-        FROM message) list
-        WHERE ROWID - "Previous ROWID" > 1;
-    ''')
+	    SELECT * FROM (
+	        SELECT 
+	        LAG(message.date,1) OVER (ORDER BY ROWID) AS "Beginning Timestamp",
+            message.date AS "Ending Timestamp",
+			LAG (guid,1) OVER (ORDER BY ROWID) AS "Previous guid", 
+            guid AS "guid", 
+			LAG (ROWID,1) OVER (ORDER BY ROWID) AS "Previous ROWID", 
+	        ROWID AS "ROWID", 
+	        (ROWID - (LAG (ROWID,1) OVER (ORDER BY ROWID)) - 1) AS "Number of Missing Rows" 
+	        FROM message) list
+	        WHERE ROWID - "Previous ROWID" > 1
+
+			UNION ALL
+
+            SELECT
+            CASE
+                WHEN message.ROWID != (SELECT last_rowid FROM LastROWID)
+                THEN MAX(message.date)
+                END AS "Beginning Timestamp",
+            CASE
+                WHEN message.ROWID != (SELECT last_rowid FROM LastROWID)
+                THEN "Time of Extraction"
+                END AS "Ending Timestamp",
+            CASE
+                WHEN message.ROWID != (SELECT last_rowid FROM LastROWID)
+                THEN guid
+                END AS "Previous guid",
+            CASE
+                WHEN message.ROWID != (SELECT last_rowid FROM LastROWID)
+                THEN "Unknown" 
+                END AS "guid",
+            CASE
+                WHEN message.ROWID != (SELECT last_rowid FROM LastROWID)
+                THEN MAX(ROWID)
+                END AS "Previous ROWID",
+            CASE
+                WHEN message.ROWID != (SELECT last_rowid FROM LastROWID)
+                THEN (SELECT last_rowid FROM LastROWID)
+                END AS "ROWID",
+            CASE
+                WHEN message.ROWID != (SELECT last_rowid FROM LastROWID)
+                THEN ((SELECT last_rowid FROM LastROWID) - message.ROWID)
+                END AS "Number of Missing Rows"
+            FROM message)
+        WHERE "ROWID" IS NOT NULL;'''
     
-    all_rows = cursor.fetchall()
-    usageentries = len(all_rows)
-    if usageentries > 0:
-        data_list = []
-        for row in all_rows:
-            data_list.append(
-            (row[0], row[1], row[2], row[3], row[4]))
+    data_headers = (('Beginning Timestamp', 'datetime'), ('Ending Timestamp', 'datetime'), 'Previous guid', 'guid', 'Previous ROWID', 'ROWID', 'Number of Missing Rows')
+
+    db_records = get_sqlite_db_records(data_source, query)
+    
+    def fix_ts(val):
+        if not isinstance(val, (int, float)):
+            return val
             
-        report = ArtifactHtmlReport('SMS - Missing ROWIDs')
-        report.start_artifact_report(report_folder, 'SMS - Missing ROWIDs')
-        report.add_script()
-        data_headers = (
-            'Beginning Timestamp', 'Ending Timestamp','Previous ROWID', 'ROWID', 'Number of Missing Rows')
-        report.write_artifact_data_table(data_headers, data_list, sms)
-        report.end_artifact_report()
+        digits = len(str(abs(int(val))))
 
-        tsvname = 'SMS - Missing ROWIDs'
-        tsv(report_folder, data_headers, data_list, tsvname)
+        if digits > 17:
+            val = val / 1e9
 
-        tlactivity = 'SMS - Missing ROWIDs'
-        timeline(report_folder, tlactivity, data_list, data_headers)
-    else:
-        logfunc('No data available in SMS - Missing ROWIDs')
-        
-__artifacts__ = {
-    "SMS Missing ROWIDs": (
-        "SMS & iMessage",
-        ('*/mobile/Library/SMS/sms*'),
-        get_SMS)
-}
+        elif digits > 14: 
+            val = val / 1e6
+
+        return convert_cocoa_core_data_ts_to_utc(val)
+    
+    for record in db_records:
+        start_raw = record[0]
+        end_raw   = record[1]
+
+        start_timestamp = fix_ts(start_raw)
+        end_timestamp   = fix_ts(end_raw)
+
+        data_list.append(
+            (start_timestamp, end_timestamp, record[2], record[3], record[4], record[5], record[6])
+        )
+    
+    return data_headers, data_list, data_source