diff --git a/buffer.go b/buffer.go index d9c86b2..bd20cbc 100644 --- a/buffer.go +++ b/buffer.go @@ -28,6 +28,23 @@ const ( defaultRowCapacity = 10000 ) +// Common date formats for parsing (most common first for performance) +// Shared by both isDateValue and parseDateValueFast to ensure consistency +var commonDateFormats = []string{ + "2006-01-02", // ISO date: 2024-10-17 + "2006-01-02 15:04:05", // ISO datetime: 2024-10-17 15:30:00 + "01/02/2006", // US date: 10/17/2024 + "02/01/2006", // EU date: 17/10/2024 + "2006/01/02", // Alt ISO: 2024/10/17 + time.RFC3339, // RFC3339: 2024-10-17T15:30:00Z + "2006-01-02T15:04:05", // ISO8601 without timezone + "Jan 02, 2006", // Mon DD, YYYY + "January 02, 2006", // Month DD, YYYY + "02-Jan-2006", // DD-Mon-YYYY + "02 Jan 2006", // DD Mon YYYY + "2006.01.02", // Dotted date +} + // createNewBuffer initializes and returns a new empty Buffer func createNewBuffer() *Buffer { return &Buffer{ @@ -250,23 +267,8 @@ func parseDateValueFast(s string) int64 { return 0 } - // Try common date formats (most common first for performance) - formats := []string{ - "2006-01-02", // ISO date: 2024-10-17 - "2006-01-02 15:04:05", // ISO datetime: 2024-10-17 15:30:00 - "01/02/2006", // US date: 10/17/2024 - "02/01/2006", // EU date: 17/10/2024 - "2006/01/02", // Alt ISO: 2024/10/17 - time.RFC3339, // RFC3339: 2024-10-17T15:30:00Z - "2006-01-02T15:04:05", // ISO8601 without timezone - "Jan 02, 2006", // Mon DD, YYYY - "January 02, 2006", // Month DD, YYYY - "02-Jan-2006", // DD-Mon-YYYY - "02 Jan 2006", // DD Mon YYYY - "2006.01.02", // Dotted date - } - - for _, format := range formats { + // Try common date formats using shared constant + for _, format := range commonDateFormats { if t, err := time.Parse(format, s); err == nil { return t.Unix() } @@ -397,23 +399,8 @@ func isDateValue(s string) bool { return false } - // Common date formats (most common first for performance) - formats := []string{ - "2006-01-02", // ISO date: 2024-10-17 - "2006-01-02 15:04:05", // ISO datetime: 2024-10-17 15:30:00 - "01/02/2006", // US date: 10/17/2024 - "02/01/2006", // EU date: 17/10/2024 - "2006/01/02", // Alt ISO: 2024/10/17 - time.RFC3339, // RFC3339: 2024-10-17T15:30:00Z - "2006-01-02T15:04:05", // ISO8601 without timezone - "Jan 02, 2006", // Mon DD, YYYY - "January 02, 2006", // Month DD, YYYY - "02-Jan-2006", // DD-Mon-YYYY - "02 Jan 2006", // DD Mon YYYY - "2006.01.02", // Dotted date - } - - for _, format := range formats { + // Try common date formats using shared constant + for _, format := range commonDateFormats { if _, err := time.Parse(format, s); err == nil { return true } @@ -478,10 +465,45 @@ func isNumericValue(s string) bool { } // detectAllColumnTypes automatically detects types for all columns +// Uses parallel processing for better performance on multi-column datasets func (b *Buffer) detectAllColumnTypes() { + // For small number of columns, sequential processing is faster + if b.colLen <= 4 { + for i := 0; i < b.colLen; i++ { + detectedType := b.autoDetectColumnType(i) + b.setColType(i, detectedType) + } + return + } + + // For larger datasets, use parallel processing + type result struct { + index int + ctype int + } + + results := make(chan result, b.colLen) + var wg sync.WaitGroup + + // Process columns in parallel for i := 0; i < b.colLen; i++ { - detectedType := b.autoDetectColumnType(i) - b.setColType(i, detectedType) + wg.Add(1) + go func(colIndex int) { + defer wg.Done() + detectedType := b.autoDetectColumnType(colIndex) + results <- result{index: colIndex, ctype: detectedType} + }(i) + } + + // Close results channel when all goroutines finish + go func() { + wg.Wait() + close(results) + }() + + // Collect results and set column types + for res := range results { + b.setColType(res.index, res.ctype) } } @@ -534,6 +556,39 @@ func (b *Buffer) filterByColumn(colIndex int, options FilterOptions) *Buffer { colType = b.colType[colIndex] } + // Pre-compile regex if using regex operator (performance optimization) + var compiledRegex *regexp.Regexp + if options.Operator == "regex" { + var err error + compiledRegex, err = regexp.Compile(options.Query) + if err != nil { + // Invalid regex - return buffer with just header + return filtered + } + } + + // Pre-parse numeric threshold if using numeric operators (performance optimization) + var thresholdVal float64 + isNumericOp := false + if colType == colTypeFloat || colType == colTypeDate { + switch options.Operator { + case ">", "<", ">=", "<=": + isNumericOp = true + var err error + thresholdVal, err = strconv.ParseFloat(strings.TrimSpace(options.Query), 64) + if err != nil { + // Invalid numeric threshold - return buffer with just header + return filtered + } + } + } + + // Pre-convert query to lowercase if case-insensitive (performance optimization) + lowerQuery := options.Query + if !options.CaseSensitive && options.Operator != "regex" { + lowerQuery = strings.ToLower(options.Query) + } + // Filter data rows startRow := b.rowFreeze for i := startRow; i < b.rowLen; i++ { @@ -543,8 +598,8 @@ func (b *Buffer) filterByColumn(colIndex int, options FilterOptions) *Buffer { cellValue := b.cont[i][colIndex] - // Evaluate filter condition - if evaluateFilter(cellValue, options, colType) { + // Evaluate filter condition with pre-compiled/parsed values + if evaluateFilterOptimized(cellValue, options, colType, compiledRegex, isNumericOp, thresholdVal, lowerQuery) { filtered.cont = append(filtered.cont, b.cont[i]) filtered.rowLen++ } @@ -553,45 +608,36 @@ func (b *Buffer) filterByColumn(colIndex int, options FilterOptions) *Buffer { return filtered } -// evaluateFilter checks if a cell value matches the filter query based on the operator. -func evaluateFilter(cellValue string, options FilterOptions, colType int) bool { - query := options.Query +// evaluateFilterOptimized checks if a cell value matches the filter query based on the operator. +// This version accepts pre-compiled regex and pre-parsed values for better performance. +func evaluateFilterOptimized(cellValue string, options FilterOptions, colType int, compiledRegex *regexp.Regexp, isNumericOp bool, thresholdVal float64, lowerQuery string) bool { operator := options.Operator // Handle numeric comparisons first - if colType == colTypeFloat || colType == colTypeDate { - isNumericOperator := false + if isNumericOp { + cellVal := parseNumericValueFast(cellValue) switch operator { - case ">", "<", ">=", "<=": - isNumericOperator = true + case ">": + return cellVal > thresholdVal + case "<": + return cellVal < thresholdVal + case ">=": + return cellVal >= thresholdVal + case "<=": + return cellVal <= thresholdVal } + } - if isNumericOperator { - cellVal := parseNumericValueFast(cellValue) - thresholdVal, err := strconv.ParseFloat(strings.TrimSpace(query), 64) - if err != nil { - return false // Cannot compare if query is not a number - } - - switch operator { - case ">": - return cellVal > thresholdVal - case "<": - return cellVal < thresholdVal - case ">=": - return cellVal >= thresholdVal - case "<=": - return cellVal <= thresholdVal - } - } + // Handle regex operator with pre-compiled regex + if operator == "regex" && compiledRegex != nil { + return compiledRegex.MatchString(cellValue) } // Prepare strings for comparison cell := cellValue - q := query + q := lowerQuery if !options.CaseSensitive { cell = strings.ToLower(cell) - q = strings.ToLower(q) } // Handle string-based operators @@ -604,15 +650,45 @@ func evaluateFilter(cellValue string, options FilterOptions, colType int) bool { return strings.HasPrefix(cell, q) case "ends with": return strings.HasSuffix(cell, q) - case "regex": - // When using regex, the user has full control over case sensitivity in the pattern. - re, err := regexp.Compile(options.Query) - if err != nil { - return false // Invalid regex - } - return re.MatchString(cellValue) default: // Default to contains for backward compatibility if operator is empty return strings.Contains(cell, q) } } + +// evaluateFilter checks if a cell value matches the filter query based on the operator. +// Kept for backward compatibility - calls evaluateFilterOptimized with default parameters. +func evaluateFilter(cellValue string, options FilterOptions, colType int) bool { + // For backward compatibility, compile regex on-the-fly if needed + var compiledRegex *regexp.Regexp + if options.Operator == "regex" { + var err error + compiledRegex, err = regexp.Compile(options.Query) + if err != nil { + return false + } + } + + // Parse numeric threshold if needed + var thresholdVal float64 + isNumericOp := false + if colType == colTypeFloat || colType == colTypeDate { + switch options.Operator { + case ">", "<", ">=", "<=": + isNumericOp = true + var err error + thresholdVal, err = strconv.ParseFloat(strings.TrimSpace(options.Query), 64) + if err != nil { + return false + } + } + } + + // Convert query to lowercase if needed + lowerQuery := options.Query + if !options.CaseSensitive && options.Operator != "regex" { + lowerQuery = strings.ToLower(options.Query) + } + + return evaluateFilterOptimized(cellValue, options, colType, compiledRegex, isNumericOp, thresholdVal, lowerQuery) +} diff --git a/utils.go b/utils.go index c861bcf..e3f9c7c 100644 --- a/utils.go +++ b/utils.go @@ -5,6 +5,7 @@ import ( "os" "regexp" "strconv" + "strings" "github.com/fatih/color" ) @@ -330,37 +331,12 @@ func performSearch(b *Buffer, query string, useRegex bool, caseSensitive bool) [ return results } -// toLower converts a string to lowercase +// toLower converts a string to lowercase using optimized standard library func toLower(s string) string { - runes := []rune(s) - for i, r := range runes { - if r >= 'A' && r <= 'Z' { - runes[i] = r + 32 - } - } - return string(runes) + return strings.ToLower(s) } -// stringContains checks if s contains substr +// stringContains checks if s contains substr using optimized standard library func stringContains(s, substr string) bool { - if len(substr) == 0 { - return true - } - if len(substr) > len(s) { - return false - } - - for i := 0; i <= len(s)-len(substr); i++ { - match := true - for j := 0; j < len(substr); j++ { - if s[i+j] != substr[j] { - match = false - break - } - } - if match { - return true - } - } - return false + return strings.Contains(s, substr) }