fix: address remaining issues

Hoper-J · Hoper-J · commit 8da35a41b129 · 2025-09-07T16:06:13.000+08:00
- Fix path traversal vulnerability with strict regex validation
- Implement unified inclusive date boundaries across components
- Remove problematic session-level skip check in history manager
- Add date range validation in CLI
- Update test expectations for new boundary semantics
- Remove complex date conversion logic from main.py
diff --git a/src/claude_monitor/cli/main.py b/src/claude_monitor/cli/main.py
@@ -386,7 +386,7 @@ def _run_table_view(
 
     try:
         # Parse date filters early so they can be used for both current and historical data
-        from datetime import datetime, timedelta
+        from datetime import datetime
 
         from claude_monitor.utils.time_utils import TimezoneHandler
 
@@ -401,17 +401,21 @@ def _parse_date(date_str: Optional[str]):
                 except ValueError:
                     continue
             print_themed(
-                f"Invalid date format: {date_str}. Use YYYY-MM-DD.", style="warning"
+                f"Invalid date format: {date_str}. Use one of: YYYY-MM-DD, YYYY.MM.DD, YYYY/MM/DD.",
+                style="warning",
             )
             return None
 
         start_dt = _parse_date(getattr(args, "start_date", None))
         end_dt = _parse_date(getattr(args, "end_date", None))
 
-        # Note: end_dt is already inclusive in history_manager
-        end_dt_inclusive = (
-            end_dt + timedelta(days=1) if end_dt else None
-        )  # For aggregator (needs exclusive end)
+        # Validate date range
+        if start_dt and end_dt and start_dt > end_dt:
+            print_themed(
+                f"Error: start_date ({getattr(args, 'start_date', None)}) must be on or before end_date ({getattr(args, 'end_date', None)})",
+                style="error",
+            )
+            return
 
         # Create aggregator with appropriate mode
         aggregator = UsageAggregator(
@@ -425,7 +429,7 @@ def _parse_date(date_str: Optional[str]):
 
         # Get aggregated data with date filters
         logger.info(f"Loading {view_mode} usage data...")
-        aggregated_data = aggregator.aggregate(start_dt, end_dt_inclusive)
+        aggregated_data = aggregator.aggregate(start_dt, end_dt)
 
         # Initialize history manager for daily and monthly views
         history_mode = getattr(args, "history", "auto")
@@ -475,9 +479,7 @@ def _parse_date(date_str: Optional[str]):
                         aggregation_mode="daily",
                         timezone=args.timezone,
                     )
-                    current_daily = daily_aggregator.aggregate(
-                        start_dt, end_dt_inclusive
-                    )
+                    current_daily = daily_aggregator.aggregate(start_dt, end_dt)
 
                     # Load historical daily data
                     daily_historical = history_manager.load_historical_daily_data(
diff --git a/src/claude_monitor/core/settings.py b/src/claude_monitor/core/settings.py
@@ -139,11 +139,13 @@ def _get_system_time_format() -> str:
     )
 
     start_date: Optional[str] = Field(
-        default=None, description="Start date for filtering data (YYYY-MM-DD format)"
+        default=None,
+        description="Start date for filtering data (formats: YYYY-MM-DD, YYYY.MM.DD, YYYY/MM/DD)",
     )
 
     end_date: Optional[str] = Field(
-        default=None, description="End date for filtering data (YYYY-MM-DD format)"
+        default=None,
+        description="End date for filtering data (formats: YYYY-MM-DD, YYYY.MM.DD, YYYY/MM/DD)",
     )
 
     custom_limit_tokens: Optional[int] = Field(
diff --git a/src/claude_monitor/data/aggregator.py b/src/claude_monitor/data/aggregator.py
@@ -7,7 +7,7 @@
 import logging
 from collections import defaultdict
 from dataclasses import dataclass, field
-from datetime import datetime
+from datetime import datetime, timedelta
 from typing import Any, Callable, Dict, List, Optional
 
 from claude_monitor.core.models import SessionBlock, UsageEntry, normalize_model_name
@@ -105,7 +105,9 @@ def __init__(
         self.data_path = data_path
         self.aggregation_mode = aggregation_mode
         self.timezone = timezone
-        self.timezone_handler = TimezoneHandler()
+        # Initialize handler with the user-selected timezone so subsequent
+        # conversions and localizations use it consistently.
+        self.timezone_handler = TimezoneHandler(timezone)
 
     def _aggregate_by_period(
         self,
@@ -121,23 +123,48 @@ def _aggregate_by_period(
             entries: List of usage entries
             period_key_func: Function to extract period key from timestamp
             period_type: Type of period ('date' or 'month')
-            start_date: Optional start date filter
-            end_date: Optional end date filter
+            start_date: Optional start date filter (inclusive)
+            end_date: Optional end date filter (inclusive - includes the whole day)
 
         Returns:
             List of aggregated data dictionaries
+
+        Note:
+            Both start_date and end_date are inclusive. If end_date is provided,
+            all entries from that entire day are included (up to 23:59:59.999999).
         """
         period_data: Dict[str, AggregatedPeriod] = {}
 
+        # Normalize filter boundaries into the configured timezone for
+        # consistent, intuitive "whole-day inclusive" semantics.
+        norm_start = (
+            self.timezone_handler.to_timezone(start_date, self.timezone)
+            if start_date
+            else None
+        )
+        norm_end = (
+            self.timezone_handler.to_timezone(end_date, self.timezone)
+            if end_date
+            else None
+        )
+
         for entry in entries:
-            # Apply date filters
-            if start_date and entry.timestamp < start_date:
-                continue
-            if end_date and entry.timestamp > end_date:
-                continue
+            # Convert entry timestamp to the configured timezone for filtering
+            # and period-key extraction.
+            ts_local = self.timezone_handler.to_timezone(entry.timestamp, self.timezone)
 
-            # Get period key
-            period_key = period_key_func(entry.timestamp)
+            # Apply date filters (inclusive boundaries in local timezone)
+            if norm_start and ts_local < norm_start:
+                continue
+            # For end_date, include all entries up to the end of that day.
+            # Exclude entries >= next day's midnight in local timezone.
+            if norm_end:
+                next_day = norm_end + timedelta(days=1)
+                if ts_local >= next_day:
+                    continue
+
+            # Get period key using local time
+            period_key = period_key_func(ts_local)
 
             # Get or create period aggregate
             if period_key not in period_data:
diff --git a/src/claude_monitor/data/history_manager.py b/src/claude_monitor/data/history_manager.py
@@ -10,7 +10,7 @@
 import logging
 from datetime import datetime, timedelta
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Set
+from typing import Any, Dict, List, Optional
 
 logger = logging.getLogger(__name__)
 
@@ -29,8 +29,7 @@ def __init__(self, data_dir: Optional[Path] = None):
         self.daily_dir = self.data_dir / "daily"
         self.daily_dir.mkdir(parents=True, exist_ok=True)
 
-        # Keep track of which dates have been saved this session to avoid duplicates
-        self._saved_dates: Set[str] = set()
+        # Session-level saved-date tracking removed to avoid short-circuiting logic
 
     def _get_daily_file_path(self, date_str: str) -> Path:
         """Get the file path for a specific date's data.
@@ -84,10 +83,6 @@ def save_daily_data(
                 logger.warning("Daily data missing 'date' field, skipping")
                 continue
 
-            # Skip if already saved in this session (unless overwrite)
-            if not overwrite and date_str in self._saved_dates:
-                continue
-
             file_path = self._get_daily_file_path(date_str)
 
             # Check if file exists and whether to overwrite
@@ -97,9 +92,8 @@ def save_daily_data(
                     with open(file_path, "r", encoding="utf-8") as f:
                         existing_data = json.load(f)
 
-                    # If the data is identical, skip
+                    # If the data is identical, skip writing
                     if existing_data == day_data:
-                        self._saved_dates.add(date_str)
                         continue
 
                     # Compare total information to decide which data to keep
@@ -130,7 +124,7 @@ def save_daily_data(
                         and existing_entries >= new_entries
                         and existing_cost >= new_cost
                     ):
-                        self._saved_dates.add(date_str)
+                        # Keep existing file; no write needed
                         continue
 
                     # Otherwise, save the new data (it has more information)
@@ -145,7 +139,6 @@ def save_daily_data(
                     json.dump(day_data, f, indent=2, default=str, ensure_ascii=False)
                 temp_file.replace(file_path)
 
-                self._saved_dates.add(date_str)
                 saved_count += 1
                 logger.debug(f"Saved historical data for {date_str}")
 
@@ -165,15 +158,17 @@ def load_historical_daily_data(
     ) -> List[Dict[str, Any]]:
         """Load historical daily data within the specified range.
 
-        Both start_date and end_date are inclusive when specified.
-
         Args:
-            start_date: Start date for data retrieval
-            end_date: End date for data retrieval
+            start_date: Start date for data retrieval (inclusive)
+            end_date: End date for data retrieval (inclusive)
             days_back: Alternative to date range - get last N days of data
 
         Returns:
             List of historical daily data dictionaries
+
+        Note:
+            Both start_date and end_date are inclusive. For example, specifying
+            end_date as 2024-12-15 will include the file for 2024-12-15.
         """
         historical_data = []
 
diff --git a/src/tests/test_aggregator.py b/src/tests/test_aggregator.py
@@ -349,9 +349,8 @@ def test_aggregate_daily_with_date_filter(
     ) -> None:
         """Test daily aggregation with date filters."""
         start_date = datetime(2024, 1, 15, tzinfo=timezone.utc)
-        end_date = datetime(
-            2024, 1, 31, 23, 59, 59, tzinfo=timezone.utc
-        )  # Include the whole day
+        # end_date is inclusive - pass Jan 31 to include all of Jan 31
+        end_date = datetime(2024, 1, 31, tzinfo=timezone.utc)
 
         result = aggregator.aggregate_daily(sample_entries, start_date, end_date)
 
@@ -644,7 +643,8 @@ def test_aggregate_daily_with_date_filters(
 
         # Filter for days 3-7 (Jan 3 to Jan 7)
         start_date = datetime(2024, 1, 3, tzinfo=timezone.utc)
-        end_date = datetime(2024, 1, 8, tzinfo=timezone.utc)  # End is exclusive
+        # end_date is inclusive - to get Jan 3-7, pass Jan 7
+        end_date = datetime(2024, 1, 7, tzinfo=timezone.utc)
 
         result = aggregator.aggregate_daily(entries, start_date, end_date)
 
@@ -729,7 +729,8 @@ def test_aggregate_with_date_filters(
 
         # Test with date filters
         start_date = datetime(2024, 1, 2, tzinfo=timezone.utc)
-        end_date = datetime(2024, 1, 4, tzinfo=timezone.utc)
+        # end_date is inclusive - to get Jan 2-3, pass Jan 3
+        end_date = datetime(2024, 1, 3, tzinfo=timezone.utc)
 
         result = aggregator.aggregate(start_date=start_date, end_date=end_date)
 
@@ -741,3 +742,49 @@ def test_aggregate_with_date_filters(
         # Test without filters - should return all
         result_all = aggregator.aggregate()
         assert len(result_all) == 5
+
+    def test_timezone_grouping_and_filters(self, tmp_path) -> None:
+        """Entries should be grouped and filtered using the selected timezone."""
+        from claude_monitor.core.models import UsageEntry
+
+        # Two entries around the UTC day boundary
+        e1 = UsageEntry(
+            timestamp=datetime(2023, 12, 31, 23, 30, tzinfo=timezone.utc),
+            input_tokens=100,
+            output_tokens=50,
+            cache_creation_tokens=0,
+            cache_read_tokens=0,
+            cost_usd=0.001,
+            model="m",
+            message_id="a",
+            request_id="a",
+        )
+        e2 = UsageEntry(
+            timestamp=datetime(2024, 1, 1, 0, 30, tzinfo=timezone.utc),
+            input_tokens=200,
+            output_tokens=100,
+            cache_creation_tokens=0,
+            cache_read_tokens=0,
+            cost_usd=0.002,
+            model="m",
+            message_id="b",
+            request_id="b",
+        )
+
+        entries = [e1, e2]
+
+        # Under UTC they should fall into different dates (2023-12-31 and 2024-01-01)
+        agg_utc = UsageAggregator(data_path=str(tmp_path), timezone="UTC")
+        res_utc = agg_utc.aggregate_daily(entries)
+        assert len(res_utc) == 2
+        assert res_utc[0]["date"] == "2023-12-31"
+        assert res_utc[1]["date"] == "2024-01-01"
+
+        # Under America/New_York (UTC-5) both timestamps belong to 2023-12-31
+        agg_est = UsageAggregator(data_path=str(tmp_path), timezone="America/New_York")
+        res_est = agg_est.aggregate_daily(entries)
+        assert len(res_est) == 1
+        assert res_est[0]["date"] == "2023-12-31"
+        # Validate totals
+        assert res_est[0]["input_tokens"] == 300
+        assert res_est[0]["output_tokens"] == 150
diff --git a/src/tests/test_history_manager.py b/src/tests/test_history_manager.py
@@ -31,7 +31,6 @@ def test_initialization(
         assert history_manager.data_dir == temp_dir
         assert history_manager.daily_dir == temp_dir / "daily"
         assert history_manager.daily_dir.exists()
-        assert history_manager._saved_dates == set()
 
     def test_get_daily_file_path(self, history_manager: HistoryManager) -> None:
         """Test file path generation for daily data."""
@@ -82,30 +81,40 @@ def test_save_daily_data(self, history_manager: HistoryManager) -> None:
     def test_save_daily_data_no_overwrite(
         self, history_manager: HistoryManager
     ) -> None:
-        """Test that save_daily_data doesn't overwrite by default."""
+        """Test that save_daily_data doesn't overwrite when existing data has more information."""
         daily_data = [
             {
                 "date": "2024-12-15",
-                "input_tokens": 1000,
-                "output_tokens": 500,
-                "total_cost": 0.015,
+                "input_tokens": 2000,
+                "output_tokens": 1000,
+                "total_cost": 0.030,
+                "entries_count": 10,
             }
         ]
 
-        # Save first time
+        # Save first time with more data
         saved_count = history_manager.save_daily_data(daily_data)
         assert saved_count == 1
 
-        # Modify data and try to save again
-        daily_data[0]["input_tokens"] = 2000
-        saved_count = history_manager.save_daily_data(daily_data, overwrite=False)
+        # Try to save again with less data
+        less_data = [
+            {
+                "date": "2024-12-15",
+                "input_tokens": 1000,
+                "output_tokens": 500,
+                "total_cost": 0.015,
+                "entries_count": 5,
+            }
+        ]
+        saved_count = history_manager.save_daily_data(less_data, overwrite=False)
         assert saved_count == 0  # Should not save again
 
         # Verify original data is preserved
         file_path = history_manager._get_daily_file_path("2024-12-15")
         with open(file_path, "r") as f:
             saved_data = json.load(f)
-        assert saved_data["input_tokens"] == 1000
+        assert saved_data["input_tokens"] == 2000
+        assert saved_data["output_tokens"] == 1000
 
     def test_save_daily_data_with_overwrite(
         self, history_manager: HistoryManager
@@ -368,9 +377,6 @@ def test_save_daily_data_with_existing_better_data(
         ]
         history_manager.save_daily_data(initial_data)
 
-        # Clear saved dates to allow checking existing file
-        history_manager._saved_dates.clear()
-
         # Try to save data with fewer total tokens
         new_data = [
             {
@@ -411,9 +417,6 @@ def test_save_daily_data_updates_with_more_info(
         ]
         history_manager.save_daily_data(initial_data)
 
-        # Clear saved dates
-        history_manager._saved_dates.clear()
-
         # Save new data with more total tokens
         new_data = [
             {

Original file line number	Diff line number	Diff line change
`@@ -139,11 +139,13 @@ def _get_system_time_format() -> str:`
`139`	`139`	`)`
`140`	`140`
`141`	`141`	`start_date: Optional[str] = Field(`
`142`		`- default=None, description="Start date for filtering data (YYYY-MM-DD format)"`
	`142`	`+ default=None,`
	`143`	`+ description="Start date for filtering data (formats: YYYY-MM-DD, YYYY.MM.DD, YYYY/MM/DD)",`
`143`	`144`	`)`
`144`	`145`
`145`	`146`	`end_date: Optional[str] = Field(`
`146`		`- default=None, description="End date for filtering data (YYYY-MM-DD format)"`
	`147`	`+ default=None,`
	`148`	`+ description="End date for filtering data (formats: YYYY-MM-DD, YYYY.MM.DD, YYYY/MM/DD)",`
`147`	`149`	`)`
`148`	`150`
`149`	`151`	`custom_limit_tokens: Optional[int] = Field(`