From 8c7311c36dfa870ec8133b5992a23109e72e3bca Mon Sep 17 00:00:00 2001 From: Sylver Date: Sat, 3 Jun 2023 16:28:14 +0200 Subject: [PATCH 1/7] First working version Pushing this first working version, further tests will be done later --- lib/rouge/demos/spl | 14 ++ lib/rouge/lexers/spl.rb | 468 ++++++++++++++++++++++++++++++++++++++++ spec/lexers/spl_spec.rb | 14 ++ spec/visual/samples/spl | 14 ++ 4 files changed, 510 insertions(+) create mode 100644 lib/rouge/demos/spl create mode 100644 lib/rouge/lexers/spl.rb create mode 100644 spec/lexers/spl_spec.rb create mode 100644 spec/visual/samples/spl diff --git a/lib/rouge/demos/spl b/lib/rouge/demos/spl new file mode 100644 index 0000000000..55de49644a --- /dev/null +++ b/lib/rouge/demos/spl @@ -0,0 +1,14 @@ +index=_internal AND sourcetype=splunkd component="Metrics" NOT code=1 avg > 2.5 debug='on' flag=0x2F00 + [index=authentications user IN ("admin","root") | stats count by user | fields user ] +| eval user = coalesce(user,src_user) test=1 +| rex field=_raw "Reason:(?[^\]]+)\]" +| stats count AS metric_count dc(user) as dc_user dc(eval(if(status=404, clientip, NULL()))) BY host +| join host type=left + [search index=_audit sourcetype=audittrail + | stats count AS audit_count BY host] +``` Some comments +on multiple lines``` +| table time host metric_count audit_count +| lookup assets-list host OUTPUT asset_type +| `ctime(time)` +`comment("END OF QUERY")` \ No newline at end of file diff --git a/lib/rouge/lexers/spl.rb b/lib/rouge/lexers/spl.rb new file mode 100644 index 0000000000..5d64d9cb1b --- /dev/null +++ b/lib/rouge/lexers/spl.rb @@ -0,0 +1,468 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +module Rouge + module Lexers + class SPL < RegexLexer + title "SPL" + desc "Splunk Query Language" + tag 'spl' + aliases 'splunk' + filenames '*.spl' + + # Greatly inspired by https://github.com/ChrisYounger/highlighter/blob/master/appserver/static/spl_language.js + + # Serves both as the list of all available commands and their list of allowed arguments, if any + def self.command_arguments + @command_arguments = { + "abstract" => ['maxterms','maxlines'], + "accum" => [''], + "addcoltotals" => ['labelfield','label'], + "addinfo" => [''], + "addtotals" => ['row','col','labelfield','label','fieldname'], + "analyzefields" => ['classfield'], + "anomalies" => ['threshold','labelonly','normalize','maxvalues','field','blacklist','blacklistthreshold'], + "anomalousvalue" => ['minsupcount','maxanofreq','minsupfreq','minnormfreq','pthresh','action'], + "anomalydetection" => ['pthresh','cutoff','method','action','action'], + "append" => ['extendtimerange','maxtime','maxout','timeout'], + "appendcols" => ['override','extendtimerange','maxtime','maxout','timeout'], + "appendpipe" => ['run_in_preview'], + "archivebuckets" => ['forcerun','retries'], + "arules" => ['sup','conf'], + "associate" => ['supcnt','supfreq','improv'], + "audit" => [''], + "autoregress" => ['p'], + "bin" => ['bins','minspan'], + "bucketdir" => ['maxcount','countfield','sep','pathfield','sizefield'], + "chart" => ['sep','format','cont','limit','minspan','minspan','useother','useother','aligntime','span','start','end','nullstr','otherstr','bins'], + "cluster" => ['t','delims','showcount','countfield','labelfield','field','labelonly','match'], + "cofilter" => [''], + "collect" => ['addtime','index','index','file','spool','marker','testmode','run_in_preview','host','source','sourcetype'], + "concurrency" => ['start','output','duration'], + "contingency" => ['usetotal','totalstr','maxrows','maxcols','mincolcover','minrowcover'], + "convert" => ['timeformat'], + "correlate" => [''], + "datamodel" => [''], + "dbinspect" => ['index','corruptonly','span'], + "dedup" => ['keepevents','keepempty','consecutive'], + "delete" => [''], + "delta" => ['p'], + "diff" => ['position1','position2','attribute','diffheader','context','maxlen'], + "erex" => ['fromfield','maxtrainers','examples','counterexamples'], + "eval" => ['field'], + "eventcount" => ['index','summarize','report_size','list_vix'], + "eventstats" => ['allnum'], + "extract" => ['segment','reload','kvdelim','pairdelim','limit','maxchars','mv_add','clean_keys'], + "fieldformat" => [''], + "fields" => [''], + "fieldsummary" => ['maxvals'], + "file" => [''], + "filldown" => [''], + "fillnull" => ['value'], + "findtypes" => ['max'], + "foreach" => ['fieldstr','matchstr','matchseg1','matchseg2','matchseg3'], + "format" => ['maxresults','mvsep'], + "from" => [''], + "gauge" => [''], + "gentimes" => ['increment','start','end'], + "geom" => ['gen'], + "geomfilter" => [''], + "geostats" => ['translatetoxy','latfield','longfield','outputlatfield','outputlongfield','globallimit','locallimit','binspanlat','maxzoomlevel','binspanlong'], + "head" => ['limit','null','keeplast'], + "highlight" => [''], + "history" => ['events'], + "iconify" => [''], + "input" => ['sourcetype','index','add','remove'], + "inputcsv" => ['dispatch','append','start','max','events'], + "inputlookup" => ['append','start','max'], + "iplocation" => ['prefix','allfields','lang'], + "join" => ['left','right','usetime','earlier','overwrite','max','type','field'], + "kmeans" => ['reps','maxiters','t','k','cfield','showcentroid','dt'], + "kvform" => ['form','field'], + "loadjob" => ['events','job_delegate','artifact_offset','ignore_running','savedsearch'], + "localize" => ['maxpause','timeafter','timebefore'], + "localop" => [''], + "lookup" => ['local','update','event_time_field'], + "makecontinuous" => ['bins','minspan','span','start','end','aligntime'], + "makejson" => ['output'], + "makemv" => ['delim','allowempty','setsv','tokenizer'], + "makeresults" => ['count','annotate','splunk_server','splunk_server_group'], + "map" => ['maxsearches','search'], + "mcollect" => ['index','file','split','spool','prefix_field','host','source','sourcetype'], + "metadata" => ['index','splunk_server','splunk_server_group','datatype','type'], + "metasearch" => ['savedsearch','savedsplunk','field','eventtypetag','hosttag'], + "meventcollect" => ['index','split','spool','prefix_field','host','source','sourcetype'], + "mstats" => ['prestats','append','backfill','update_period','span','savedsearch','savedsplunk','field'], + "multikv" => ['conf','copyattrs','forceheader','multitable','noheader','rmorig','fields','filter'], + "multisearch" => [''], + "mvcombine" => ['delim'], + "mvexpand" => ['limit'], + "nomv" => [''], + "outlier" => ['param','uselower','mark','action'], + "outputcsv" => ['append','create_empty','override_if_empty','dispatch','usexml','singlefile'], + "outputlookup" => ['append','create_empty','override_if_empty','max','key_field','createinapp','output_format'], + "outputtelemetry" => ['input','type','component','support','anonymous','license','optinrequired'], + "outputtext" => ['usexml'], + "overlap" => [''], + "pivot" => [''], + "predict" => ['correlate','future_timespan','holdback','period','suppress','algorithm','upper','lower'], + "rangemap" => ['default','field'], + "rare" => ['showcount','showperc','limit','countfield','percentfield','useother','otherstr'], + "redistribute" => ['num_of_reducers'], + "regex" => [''], + "relevancy" => [''], + "reltime" => [''], + "rename" => [''], + "replace" => [''], + "rest" => ['count','splunk_server','splunk_server_group','timeout'], + "return" => [''], + "reverse" => [''], + "rex" => ['field','max_match','offset_field','mode'], + "rtorder" => ['discard','buffer_span','max_buffer_size'], + "savedsearch" => ['nosubstitution'], + "script" => ['maxinputs'], + "scrub" => ['dictionary','timeconfig','namespace','public-terms','private-terms','name-terms'], + "search" => ['index','sourcetype','source','eventtype','tag','host','earliest','latest','_index_earliest','_index_latest','savedsearch','savedsplunk','field'], + "searchtxn" => ['max_terms','use_disjunct','eventsonly'], + "selfjoin" => ['overwrite','max','keepsingle'], + "sendemail" => ['to','from','cc','bcc','paperorientation','priority','papersize','content_type','format','subject','message','footer','sendresults','inline','sendcsv','sendpdf','pdfview','server','graceful','width_sort_columns','use_ssl','use_tls','maxinputs','maxtime'], + "set" => [''], + "shape" => ['maxvalues','maxresolution'], + "sichart" => ['sep','format','cont','limit','minspan','start','end','span','bins','usenull','useother','otherstr','nullstr'], + "sirare" => ['showcount','showperc','limit','countfield','percentfield','useother','otherstr'], + "sistats" => ['partitions','allnum','delim'], + "sitimechart" => ['sep','format','fixedrange','partial','cont','limit','minspan','bins','usenull','useother','nullstr','otherstr'], + "sitop" => ['showcount','showperc','limit','countfield','percentfield','useother','otherstr'], + "sort" => [''], + "spath" => ['output','path','input'], + "stats" => ['partitions','allnum','delim'], + "strcat" => ['allrequired'], + "streamstats" => ['reset_on_change','current','window','time_window','global','allnum','reset_before'], + "table" => [''], + "tags" => ['outputfield','inclname','inclvalue'], + "tail" => [''], + "timechart" => ['sep','format','fixedrange','partial','cont','limit','minspan'], + "timewrap" => ['time_format','align','series'], + "top" => ['showcount','showperc','limit','countfield','percentfield','useother','otherstr'], + "transaction" => ['name','maxspan','maxopentxn','delim','maxpause','maxevents','connected','unifyends','keeporphans','maxopenevents','keepevicted','mvlist','nullstr','mvraw','startswith','endswith'], + "transpose" => ['column_name','header_field','include_empty'], + "trendline" => ['sma','ema','wma'], + "tstats" => ['prestats','local','append','summariesonly','allow_old_summaries','span','sid','datamodel','chunk_size','savedsearch','savedsplunk','field'], + "typeahead" => ['max_time','index','collapse','prefix','count'], + "typer" => [''], + "union" => ['extendtimerange','maxtime','maxout','timeout'], + "uniq" => [''], + "untable" => [''], + "where" => [''], + "x11" => ['mult','add'], + "xmlkv" => ['maxinputs'], + "xmlunescape" => ['maxinputs'], + "xpath" => ['field','outfield','default'], + "xyseries" => ['grouped','sep','format'] + }; + end + + # Some commands use specific operators, some even require them to be in upper case, but we will not make sure of that here + def self.command_operators + @command_operators = { + "bin" => ['as'], + "chart" => ['where','over','not','and','or','xor','like','by','as'], + "convert" => ['as'], + "dedup" => ['sortby'], + "delta" => ['as'], + "eval" => ['and','or','xor','not','like'], + "eventstats" => ['by','as'], + "fieldformat" => ['and','or','xor','not','like'], + "geostats" => ['as'], + "head" => ['and','or','xor','not','like'], + "inputcsv" => ['where'], + "inputlookup" => ['where'], + "join" => ['where'], + "lookup" => ['outputnew','output','as'], + "metasearch" => ['in'], + "mstats" => ['as'], + "predict" => ['as'], + "rare" => ['by'], + "redistribute" => ['by'], + "replace" => ['with','in'], + "rename" => ['as'], + "search" => ['by','where','over','and','or','xor','not','term','in','case'], + "set" => ['union','diff','intersect'], + "sichart" => ['by','where','over','and','or','xor','not','as'], + "sirare" => ['by'], + "sistats" => ['by','as'], + "sitimechart" => ['like','not','and','or','xor','where','like','by','as'], + "sitop" => ['by'], + "sort" => ['auto','str','ip','num','desc','d'], + "stats" => ['by','as'], + "stremstats" => ['like','not','and','or','xor','where','like','by','as'], + "timechart" => ['like','not','and','or','xor','where','by','as'], + "top" => ['by'], + "trendline" => ['as'], + "tstats" => ['like','not','and','or','xor','where','by','in','groupby','as'], + "where" => ['like','not','and','or','xor','like'], + "x11" => ['as'] + }; + end + + # Available evaluation functions + def self.eval_functions + @eval_functions = ['abs','case','ceiling','cidrmatch','coalesce','commands','exact','exp','false','floor','if','ifnull','isbool','isint','isnotnull','isnull','isnum','isstr','len','like','ln','log','lower','match','max','md5','min','mvappend','mvcount','mvdedup','mvindex','mvfilter','mvfind','mvjoin','mvrange','mvsort','mvzip','now','null','nullif','pi','pow','random','relative_time','replace','round','searchmatch','sha1','sha256','sha512','sigfig','spath','split','sqrt','strftime','strptime','substr','time','tostring','trim','ltrim','rtrim','true','typeof','upper','urldecode','validate','tonumber','acos','acosh','asin','asinh','atan','atan2','atanh','cos','cosh','hypot','sin','sinh','tan','tanh'] + end + + # Commands which support evaluation functions (and only those) + def self.eval_commands + @eval_commands = ['eval','head','where'] + end + + # Available aggregation functions (+ eval which is just a link between eval and aggregation) + def self.aggr_functions + @aggr_functions = ['eval','sparkline','c','count','dc','distinct_count','mean','avg','stdev','stdevp','var','varp','sum','sumsq','min','max','mode','median','earliest','first','last','latest','perc','p','exactperc','upperperc','list','values','range','estdc','estdc_error','earliest_time','latest_time','perc70','perc80','perc90','perc91','perc92','perc93','perc94','perc95','perc96','perc97','perc98','perc99'] + end + + # Commands which support aggregation functions (and eval functions consequently through the "eval()" function) + def self.aggr_commands + @aggr_commands = ['chart','eventstats','geostats','mstats','sichart','sistats','sitimechart','stats','streamstats','timechart','tstats'] + end + + # Stack of commands being ran (usually only 1 but it can be more if can of subsearches) + command_stack = Array.new + + state :root do + rule %r/(?=.)/, Text, :query + end + + state :query do + rule %r/\|/, Text, :command + # By default, we assume it is an implict search command + rule %r/(?=.)/ do |m| + command_stack.push "search" + token Text + push :search_command + end + end + + state :subquery do + rule %r/\]/ do |m| + # At the end of a subsearch, we need to clear the last command context + if command_stack.length > 0 + command_stack.pop + end + token Punctuation + pop! + end + rule %r/\|/, Text, :command + rule %r/\w+(?=[ \t]*)(?=\=)/ do |m| + # We can find filters or arguments already + # By default we assume we were in an implicit search command + command_stack.push "search" + if self.class.command_arguments["search"].include? m[0].downcase + token Keyword::Reserved + else + token Text + end + push :search_command + end + # Sub-queries do not need a leading | when running a command + # Trying to avoid to match an argument + rule %r/\w+(?=[ \t]*)(?!\=)/ do |m| + if m[0].downcase == "search" + token Name::Builtin + command_stack.push(m[0].downcase) + push :search_command + elsif self.class.command_arguments.key? m[0].downcase + token Name::Builtin + command_stack.push(m[0].downcase) + push :command_args + else + token Text + end + end + # By default, we assume it is an implict search command + rule %r/(?=.)/ do |m| + command_stack.push "search" + token Text + push :search_command + end + end + + # Search commands have a specific status, being implicit in some situations + state :search_command do + rule %r/```/, Comment::Multiline, :multiline_comments + rule %r/`\s*comment\s*\(\s*"/, Comment::Preproc, :comment_macro + rule %r/(`)(\s*\w+)([^`]*)(`)/, Comment::Preproc + rule %r/0[xX][0-9a-fA-F]*/, Num::Hex + rule %r/[$][+-]*\d*(\.\d*)?/, Num + rule %r/((\d+(\.\d*)?)|(\.\d+))([eE][\-+]?\d+)?/, Num + rule %r/[!<>=,]+/, Punctuation + rule %r/[()]/, Punctuation + rule %r/\|/, Text, :command + rule %r/["]/, Str::Escape, :double_string + rule %r/[']/, Str::Escape, :single_string + rule %r/\s+/m, Text + rule %r/\[/, Punctuation, :subquery + rule %r/\w+(?=[ \t]*)(?=\=)/ do |m| + if self.class.command_arguments.key? command_stack.last + if self.class.command_arguments[command_stack.last].include? m[0].downcase + token Keyword::Reserved + else + token Text + end + else + token Text + end + end + # Some commands have specific operators available + rule %r/[^ \t"'\d!<>=,()\[\]]+/m do |m| + if self.class.command_operators.key? command_stack.last + if self.class.command_operators[command_stack.last].include? m[0].downcase + token Operator::Word + else + token Text + end + else + token Text + end + end + # If finding a closing bracket, popping twice to leave the current state AND the subquery state + rule %r/\]/ do |m| + token Punctuation + pop! + pop! + end + end + + # Other commands not being implicit, we were only handle the initial part "| command_name" and then just into arguments if any + state :command do + rule %r/\s+/m, Text + # Highlighting only known Splunk commands + rule %r/\w+/m do |m| + if self.class.command_arguments.key? m[0].downcase + token Name::Builtin + command_stack.push(m[0].downcase) + else + command_stack.push "unknown" + token Text + end + push :command_args + end + # When jumping to the next command, clearing last command + rule %r/\|/ do |m| + if command_stack.length > 0 + command_stack.pop + end + token Punctuation + end + rule %r/\[/, Punctuation, :subquery + # If finding a closing bracket, popping twice to leave the current state AND the subquery state + rule %r/\]/ do |m| + token Punctuation + pop! + pop! + end + end + + # Handling arguments after having initialized the command context + state :command_args do + rule %r/```/, Comment::Multiline, :multiline_comments + rule %r/`\s*comment\s*\(\s*"/, Comment::Preproc, :comment_macro + rule %r/(`)(\s*\w+)([^`]*)(`)/, Comment::Preproc + rule %r/\s+/m, Text + rule %r/0[xX][0-9a-fA-F]*/, Num::Hex + rule %r/[$][+-]*\d*(\.\d*)?/, Num + rule %r/((\d+(\.\d*)?)|(\.\d+))([eE][\-+]?\d+)?/, Num + rule %r/[!<>=,%\+\.\*\-\/]+/, Punctuation + rule %r/[()]/, Punctuation + # Command arguments, checking it is a known argument for the current command + rule %r/\w+(?=[ \t]*)(?=\=)/ do |m| + if self.class.command_arguments.key? command_stack.last + if self.class.command_arguments[command_stack.last].include? m[0].downcase + token Keyword::Reserved + else + token Text + end + else + token Text + end + end + rule %r/\w+(?=[ \t]*)(?=\()/ do |m| + if ( self.class.eval_commands.include?(command_stack.last) && self.class.eval_functions.include?(m[0].downcase) ) + token Name::Function + # Aggregation functions can use eval functions through the "eval()" function + elsif ( self.class.aggr_commands.include?(command_stack.last) && ( self.class.aggr_functions.include?(m[0].downcase) || self.class.eval_functions.include?(m[0].downcase)) ) + token Name::Function + else + token Text + end + end + rule %r/["]/, Str::Escape, :double_string + rule %r/[']/, Str::Escape, :single_string + # When jumping to the next command, clearing last command + rule %r/\|/ do |m| + if command_stack.length > 0 + command_stack.pop + end + token Punctuation + pop! + end + # A subquery can occur anywhere + rule %r/\[/, Text, :subquery + # If finding a closing bracket, popping twice to leave the current state AND the subquery state + rule %r/\]/ do |m| + token Punctuation + pop! + pop! + end + # Some commands have specific operators available + rule %r/[^ \t"'\d!<>=,()\[\]]+/m do |m| + if self.class.command_operators.key? command_stack.last + if self.class.command_operators[command_stack.last].include? m[0].downcase + token Operator::Word + else + token Text + end + else + token Text + end + end + end + + state :multiline_comments do + rule %r(```), Comment::Multiline, :pop! + rule %r/./, Comment::Multiline + end + + state :comment_macro do + rule %r/"\s*\)\s*`/, Comment::Preproc, :pop! + rule %r/\\./, Comment::Single + rule %r/[^\\"]+/, Comment::Single + end + + # When found in a rex/regex command, a double string will be a regex + state :double_string do + rule %r/\\./ do |m| + if ( (command_stack.last == "rex") || (command_stack.last == "regex") ) + token Str::Regex + else + token Str::Double + end + end + rule %r/["]/, Str::Escape, :pop! + rule %r/[^\\"]+/ do |m| + if ( (command_stack.last == "rex") || (command_stack.last == "regex") ) + token Str::Regex + else + token Str::Double + end + end + end + + state :single_string do + rule %r/\\./, Str::Single + rule %r/[']/, Str::Escape, :pop! + rule %r/[^\\']+/, Str::Single + end + + end + end +end \ No newline at end of file diff --git a/spec/lexers/spl_spec.rb b/spec/lexers/spl_spec.rb new file mode 100644 index 0000000000..2f68dc4b99 --- /dev/null +++ b/spec/lexers/spl_spec.rb @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +describe Rouge::Lexers::SPL do + let(:subject) { Rouge::Lexers::SPL.new } + + describe 'guessing' do + include Support::Guessing + + it 'guesses by filename' do + assert_guess :filename => 'foo.spl' + end + end +end \ No newline at end of file diff --git a/spec/visual/samples/spl b/spec/visual/samples/spl new file mode 100644 index 0000000000..55de49644a --- /dev/null +++ b/spec/visual/samples/spl @@ -0,0 +1,14 @@ +index=_internal AND sourcetype=splunkd component="Metrics" NOT code=1 avg > 2.5 debug='on' flag=0x2F00 + [index=authentications user IN ("admin","root") | stats count by user | fields user ] +| eval user = coalesce(user,src_user) test=1 +| rex field=_raw "Reason:(?[^\]]+)\]" +| stats count AS metric_count dc(user) as dc_user dc(eval(if(status=404, clientip, NULL()))) BY host +| join host type=left + [search index=_audit sourcetype=audittrail + | stats count AS audit_count BY host] +``` Some comments +on multiple lines``` +| table time host metric_count audit_count +| lookup assets-list host OUTPUT asset_type +| `ctime(time)` +`comment("END OF QUERY")` \ No newline at end of file From c1a454e06a8bbe7f6709afb9c239ecfcf22772eb Mon Sep 17 00:00:00 2001 From: Sylver Date: Sat, 3 Jun 2023 16:49:03 +0200 Subject: [PATCH 2/7] Replaces tabs by spaces Getting rid of tabs --- lib/rouge/lexers/spl.rb | 861 ++++++++++++++++++++-------------------- 1 file changed, 430 insertions(+), 431 deletions(-) diff --git a/lib/rouge/lexers/spl.rb b/lib/rouge/lexers/spl.rb index 5d64d9cb1b..b214c90243 100644 --- a/lib/rouge/lexers/spl.rb +++ b/lib/rouge/lexers/spl.rb @@ -7,462 +7,461 @@ class SPL < RegexLexer title "SPL" desc "Splunk Query Language" tag 'spl' - aliases 'splunk' + aliases 'splunk' filenames '*.spl' - # Greatly inspired by https://github.com/ChrisYounger/highlighter/blob/master/appserver/static/spl_language.js - - # Serves both as the list of all available commands and their list of allowed arguments, if any - def self.command_arguments + # Greatly inspired by https://github.com/ChrisYounger/highlighter/blob/master/appserver/static/spl_language.js + + # Serves both as the list of all available commands and their list of allowed arguments, if any + def self.command_arguments @command_arguments = { - "abstract" => ['maxterms','maxlines'], - "accum" => [''], - "addcoltotals" => ['labelfield','label'], - "addinfo" => [''], - "addtotals" => ['row','col','labelfield','label','fieldname'], - "analyzefields" => ['classfield'], - "anomalies" => ['threshold','labelonly','normalize','maxvalues','field','blacklist','blacklistthreshold'], - "anomalousvalue" => ['minsupcount','maxanofreq','minsupfreq','minnormfreq','pthresh','action'], - "anomalydetection" => ['pthresh','cutoff','method','action','action'], - "append" => ['extendtimerange','maxtime','maxout','timeout'], - "appendcols" => ['override','extendtimerange','maxtime','maxout','timeout'], - "appendpipe" => ['run_in_preview'], - "archivebuckets" => ['forcerun','retries'], - "arules" => ['sup','conf'], - "associate" => ['supcnt','supfreq','improv'], - "audit" => [''], - "autoregress" => ['p'], - "bin" => ['bins','minspan'], - "bucketdir" => ['maxcount','countfield','sep','pathfield','sizefield'], - "chart" => ['sep','format','cont','limit','minspan','minspan','useother','useother','aligntime','span','start','end','nullstr','otherstr','bins'], - "cluster" => ['t','delims','showcount','countfield','labelfield','field','labelonly','match'], - "cofilter" => [''], - "collect" => ['addtime','index','index','file','spool','marker','testmode','run_in_preview','host','source','sourcetype'], - "concurrency" => ['start','output','duration'], - "contingency" => ['usetotal','totalstr','maxrows','maxcols','mincolcover','minrowcover'], - "convert" => ['timeformat'], - "correlate" => [''], - "datamodel" => [''], - "dbinspect" => ['index','corruptonly','span'], - "dedup" => ['keepevents','keepempty','consecutive'], - "delete" => [''], - "delta" => ['p'], - "diff" => ['position1','position2','attribute','diffheader','context','maxlen'], - "erex" => ['fromfield','maxtrainers','examples','counterexamples'], - "eval" => ['field'], - "eventcount" => ['index','summarize','report_size','list_vix'], - "eventstats" => ['allnum'], - "extract" => ['segment','reload','kvdelim','pairdelim','limit','maxchars','mv_add','clean_keys'], - "fieldformat" => [''], - "fields" => [''], - "fieldsummary" => ['maxvals'], - "file" => [''], - "filldown" => [''], - "fillnull" => ['value'], - "findtypes" => ['max'], - "foreach" => ['fieldstr','matchstr','matchseg1','matchseg2','matchseg3'], - "format" => ['maxresults','mvsep'], - "from" => [''], - "gauge" => [''], - "gentimes" => ['increment','start','end'], - "geom" => ['gen'], - "geomfilter" => [''], - "geostats" => ['translatetoxy','latfield','longfield','outputlatfield','outputlongfield','globallimit','locallimit','binspanlat','maxzoomlevel','binspanlong'], - "head" => ['limit','null','keeplast'], - "highlight" => [''], - "history" => ['events'], - "iconify" => [''], - "input" => ['sourcetype','index','add','remove'], - "inputcsv" => ['dispatch','append','start','max','events'], - "inputlookup" => ['append','start','max'], - "iplocation" => ['prefix','allfields','lang'], - "join" => ['left','right','usetime','earlier','overwrite','max','type','field'], - "kmeans" => ['reps','maxiters','t','k','cfield','showcentroid','dt'], - "kvform" => ['form','field'], - "loadjob" => ['events','job_delegate','artifact_offset','ignore_running','savedsearch'], - "localize" => ['maxpause','timeafter','timebefore'], - "localop" => [''], - "lookup" => ['local','update','event_time_field'], - "makecontinuous" => ['bins','minspan','span','start','end','aligntime'], - "makejson" => ['output'], - "makemv" => ['delim','allowempty','setsv','tokenizer'], - "makeresults" => ['count','annotate','splunk_server','splunk_server_group'], - "map" => ['maxsearches','search'], - "mcollect" => ['index','file','split','spool','prefix_field','host','source','sourcetype'], - "metadata" => ['index','splunk_server','splunk_server_group','datatype','type'], - "metasearch" => ['savedsearch','savedsplunk','field','eventtypetag','hosttag'], - "meventcollect" => ['index','split','spool','prefix_field','host','source','sourcetype'], - "mstats" => ['prestats','append','backfill','update_period','span','savedsearch','savedsplunk','field'], - "multikv" => ['conf','copyattrs','forceheader','multitable','noheader','rmorig','fields','filter'], - "multisearch" => [''], - "mvcombine" => ['delim'], - "mvexpand" => ['limit'], - "nomv" => [''], - "outlier" => ['param','uselower','mark','action'], - "outputcsv" => ['append','create_empty','override_if_empty','dispatch','usexml','singlefile'], - "outputlookup" => ['append','create_empty','override_if_empty','max','key_field','createinapp','output_format'], - "outputtelemetry" => ['input','type','component','support','anonymous','license','optinrequired'], - "outputtext" => ['usexml'], - "overlap" => [''], - "pivot" => [''], - "predict" => ['correlate','future_timespan','holdback','period','suppress','algorithm','upper','lower'], - "rangemap" => ['default','field'], - "rare" => ['showcount','showperc','limit','countfield','percentfield','useother','otherstr'], - "redistribute" => ['num_of_reducers'], - "regex" => [''], - "relevancy" => [''], - "reltime" => [''], - "rename" => [''], - "replace" => [''], - "rest" => ['count','splunk_server','splunk_server_group','timeout'], - "return" => [''], - "reverse" => [''], - "rex" => ['field','max_match','offset_field','mode'], - "rtorder" => ['discard','buffer_span','max_buffer_size'], - "savedsearch" => ['nosubstitution'], - "script" => ['maxinputs'], - "scrub" => ['dictionary','timeconfig','namespace','public-terms','private-terms','name-terms'], - "search" => ['index','sourcetype','source','eventtype','tag','host','earliest','latest','_index_earliest','_index_latest','savedsearch','savedsplunk','field'], - "searchtxn" => ['max_terms','use_disjunct','eventsonly'], - "selfjoin" => ['overwrite','max','keepsingle'], - "sendemail" => ['to','from','cc','bcc','paperorientation','priority','papersize','content_type','format','subject','message','footer','sendresults','inline','sendcsv','sendpdf','pdfview','server','graceful','width_sort_columns','use_ssl','use_tls','maxinputs','maxtime'], - "set" => [''], - "shape" => ['maxvalues','maxresolution'], - "sichart" => ['sep','format','cont','limit','minspan','start','end','span','bins','usenull','useother','otherstr','nullstr'], - "sirare" => ['showcount','showperc','limit','countfield','percentfield','useother','otherstr'], - "sistats" => ['partitions','allnum','delim'], - "sitimechart" => ['sep','format','fixedrange','partial','cont','limit','minspan','bins','usenull','useother','nullstr','otherstr'], - "sitop" => ['showcount','showperc','limit','countfield','percentfield','useother','otherstr'], - "sort" => [''], - "spath" => ['output','path','input'], - "stats" => ['partitions','allnum','delim'], - "strcat" => ['allrequired'], - "streamstats" => ['reset_on_change','current','window','time_window','global','allnum','reset_before'], - "table" => [''], - "tags" => ['outputfield','inclname','inclvalue'], - "tail" => [''], - "timechart" => ['sep','format','fixedrange','partial','cont','limit','minspan'], - "timewrap" => ['time_format','align','series'], - "top" => ['showcount','showperc','limit','countfield','percentfield','useother','otherstr'], - "transaction" => ['name','maxspan','maxopentxn','delim','maxpause','maxevents','connected','unifyends','keeporphans','maxopenevents','keepevicted','mvlist','nullstr','mvraw','startswith','endswith'], - "transpose" => ['column_name','header_field','include_empty'], - "trendline" => ['sma','ema','wma'], - "tstats" => ['prestats','local','append','summariesonly','allow_old_summaries','span','sid','datamodel','chunk_size','savedsearch','savedsplunk','field'], - "typeahead" => ['max_time','index','collapse','prefix','count'], - "typer" => [''], - "union" => ['extendtimerange','maxtime','maxout','timeout'], - "uniq" => [''], - "untable" => [''], - "where" => [''], - "x11" => ['mult','add'], - "xmlkv" => ['maxinputs'], - "xmlunescape" => ['maxinputs'], - "xpath" => ['field','outfield','default'], - "xyseries" => ['grouped','sep','format'] - }; + "abstract" => ['maxterms','maxlines'], + "accum" => [''], + "addcoltotals" => ['labelfield','label'], + "addinfo" => [''], + "addtotals" => ['row','col','labelfield','label','fieldname'], + "analyzefields" => ['classfield'], + "anomalies" => ['threshold','labelonly','normalize','maxvalues','field','blacklist','blacklistthreshold'], + "anomalousvalue" => ['minsupcount','maxanofreq','minsupfreq','minnormfreq','pthresh','action'], + "anomalydetection" => ['pthresh','cutoff','method','action','action'], + "append" => ['extendtimerange','maxtime','maxout','timeout'], + "appendcols" => ['override','extendtimerange','maxtime','maxout','timeout'], + "appendpipe" => ['run_in_preview'], + "archivebuckets" => ['forcerun','retries'], + "arules" => ['sup','conf'], + "associate" => ['supcnt','supfreq','improv'], + "audit" => [''], + "autoregress" => ['p'], + "bin" => ['bins','minspan'], + "bucketdir" => ['maxcount','countfield','sep','pathfield','sizefield'], + "chart" => ['sep','format','cont','limit','minspan','minspan','useother','useother','aligntime','span','start','end','nullstr','otherstr','bins'], + "cluster" => ['t','delims','showcount','countfield','labelfield','field','labelonly','match'], + "cofilter" => [''], + "collect" => ['addtime','index','index','file','spool','marker','testmode','run_in_preview','host','source','sourcetype'], + "concurrency" => ['start','output','duration'], + "contingency" => ['usetotal','totalstr','maxrows','maxcols','mincolcover','minrowcover'], + "convert" => ['timeformat'], + "correlate" => [''], + "datamodel" => [''], + "dbinspect" => ['index','corruptonly','span'], + "dedup" => ['keepevents','keepempty','consecutive'], + "delete" => [''], + "delta" => ['p'], + "diff" => ['position1','position2','attribute','diffheader','context','maxlen'], + "erex" => ['fromfield','maxtrainers','examples','counterexamples'], + "eval" => ['field'], + "eventcount" => ['index','summarize','report_size','list_vix'], + "eventstats" => ['allnum'], + "extract" => ['segment','reload','kvdelim','pairdelim','limit','maxchars','mv_add','clean_keys'], + "fieldformat" => [''], + "fields" => [''], + "fieldsummary" => ['maxvals'], + "file" => [''], + "filldown" => [''], + "fillnull" => ['value'], + "findtypes" => ['max'], + "foreach" => ['fieldstr','matchstr','matchseg1','matchseg2','matchseg3'], + "format" => ['maxresults','mvsep'], + "from" => [''], + "gauge" => [''], + "gentimes" => ['increment','start','end'], + "geom" => ['gen'], + "geomfilter" => [''], + "geostats" => ['translatetoxy','latfield','longfield','outputlatfield','outputlongfield','globallimit','locallimit','binspanlat','maxzoomlevel','binspanlong'], + "head" => ['limit','null','keeplast'], + "highlight" => [''], + "history" => ['events'], + "iconify" => [''], + "input" => ['sourcetype','index','add','remove'], + "inputcsv" => ['dispatch','append','start','max','events'], + "inputlookup" => ['append','start','max'], + "iplocation" => ['prefix','allfields','lang'], + "join" => ['left','right','usetime','earlier','overwrite','max','type','field'], + "kmeans" => ['reps','maxiters','t','k','cfield','showcentroid','dt'], + "kvform" => ['form','field'], + "loadjob" => ['events','job_delegate','artifact_offset','ignore_running','savedsearch'], + "localize" => ['maxpause','timeafter','timebefore'], + "localop" => [''], + "lookup" => ['local','update','event_time_field'], + "makecontinuous" => ['bins','minspan','span','start','end','aligntime'], + "makejson" => ['output'], + "makemv" => ['delim','allowempty','setsv','tokenizer'], + "makeresults" => ['count','annotate','splunk_server','splunk_server_group'], + "map" => ['maxsearches','search'], + "mcollect" => ['index','file','split','spool','prefix_field','host','source','sourcetype'], + "metadata" => ['index','splunk_server','splunk_server_group','datatype','type'], + "metasearch" => ['savedsearch','savedsplunk','field','eventtypetag','hosttag'], + "meventcollect" => ['index','split','spool','prefix_field','host','source','sourcetype'], + "mstats" => ['prestats','append','backfill','update_period','span','savedsearch','savedsplunk','field'], + "multikv" => ['conf','copyattrs','forceheader','multitable','noheader','rmorig','fields','filter'], + "multisearch" => [''], + "mvcombine" => ['delim'], + "mvexpand" => ['limit'], + "nomv" => [''], + "outlier" => ['param','uselower','mark','action'], + "outputcsv" => ['append','create_empty','override_if_empty','dispatch','usexml','singlefile'], + "outputlookup" => ['append','create_empty','override_if_empty','max','key_field','createinapp','output_format'], + "outputtelemetry" => ['input','type','component','support','anonymous','license','optinrequired'], + "outputtext" => ['usexml'], + "overlap" => [''], + "pivot" => [''], + "predict" => ['correlate','future_timespan','holdback','period','suppress','algorithm','upper','lower'], + "rangemap" => ['default','field'], + "rare" => ['showcount','showperc','limit','countfield','percentfield','useother','otherstr'], + "redistribute" => ['num_of_reducers'], + "regex" => [''], + "relevancy" => [''], + "reltime" => [''], + "rename" => [''], + "replace" => [''], + "rest" => ['count','splunk_server','splunk_server_group','timeout'], + "return" => [''], + "reverse" => [''], + "rex" => ['field','max_match','offset_field','mode'], + "rtorder" => ['discard','buffer_span','max_buffer_size'], + "savedsearch" => ['nosubstitution'], + "script" => ['maxinputs'], + "scrub" => ['dictionary','timeconfig','namespace','public-terms','private-terms','name-terms'], + "search" => ['index','sourcetype','source','eventtype','tag','host','earliest','latest','_index_earliest','_index_latest','savedsearch','savedsplunk','field'], + "searchtxn" => ['max_terms','use_disjunct','eventsonly'], + "selfjoin" => ['overwrite','max','keepsingle'], + "sendemail" => ['to','from','cc','bcc','paperorientation','priority','papersize','content_type','format','subject','message','footer','sendresults','inline','sendcsv','sendpdf','pdfview','server','graceful','width_sort_columns','use_ssl','use_tls','maxinputs','maxtime'], + "set" => [''], + "shape" => ['maxvalues','maxresolution'], + "sichart" => ['sep','format','cont','limit','minspan','start','end','span','bins','usenull','useother','otherstr','nullstr'], + "sirare" => ['showcount','showperc','limit','countfield','percentfield','useother','otherstr'], + "sistats" => ['partitions','allnum','delim'], + "sitimechart" => ['sep','format','fixedrange','partial','cont','limit','minspan','bins','usenull','useother','nullstr','otherstr'], + "sitop" => ['showcount','showperc','limit','countfield','percentfield','useother','otherstr'], + "sort" => [''], + "spath" => ['output','path','input'], + "stats" => ['partitions','allnum','delim'], + "strcat" => ['allrequired'], + "streamstats" => ['reset_on_change','current','window','time_window','global','allnum','reset_before'], + "table" => [''], + "tags" => ['outputfield','inclname','inclvalue'], + "tail" => [''], + "timechart" => ['sep','format','fixedrange','partial','cont','limit','minspan'], + "timewrap" => ['time_format','align','series'], + "top" => ['showcount','showperc','limit','countfield','percentfield','useother','otherstr'], + "transaction" => ['name','maxspan','maxopentxn','delim','maxpause','maxevents','connected','unifyends','keeporphans','maxopenevents','keepevicted','mvlist','nullstr','mvraw','startswith','endswith'], + "transpose" => ['column_name','header_field','include_empty'], + "trendline" => ['sma','ema','wma'], + "tstats" => ['prestats','local','append','summariesonly','allow_old_summaries','span','sid','datamodel','chunk_size','savedsearch','savedsplunk','field'], + "typeahead" => ['max_time','index','collapse','prefix','count'], + "typer" => [''], + "union" => ['extendtimerange','maxtime','maxout','timeout'], + "uniq" => [''], + "untable" => [''], + "where" => [''], + "x11" => ['mult','add'], + "xmlkv" => ['maxinputs'], + "xmlunescape" => ['maxinputs'], + "xpath" => ['field','outfield','default'], + "xyseries" => ['grouped','sep','format'] + }; end - - # Some commands use specific operators, some even require them to be in upper case, but we will not make sure of that here - def self.command_operators + + # Some commands use specific operators, some even require them to be in upper case, but we will not make sure of that here + def self.command_operators @command_operators = { - "bin" => ['as'], - "chart" => ['where','over','not','and','or','xor','like','by','as'], - "convert" => ['as'], - "dedup" => ['sortby'], - "delta" => ['as'], - "eval" => ['and','or','xor','not','like'], - "eventstats" => ['by','as'], - "fieldformat" => ['and','or','xor','not','like'], - "geostats" => ['as'], - "head" => ['and','or','xor','not','like'], - "inputcsv" => ['where'], - "inputlookup" => ['where'], - "join" => ['where'], - "lookup" => ['outputnew','output','as'], - "metasearch" => ['in'], - "mstats" => ['as'], - "predict" => ['as'], - "rare" => ['by'], - "redistribute" => ['by'], - "replace" => ['with','in'], - "rename" => ['as'], - "search" => ['by','where','over','and','or','xor','not','term','in','case'], - "set" => ['union','diff','intersect'], - "sichart" => ['by','where','over','and','or','xor','not','as'], - "sirare" => ['by'], - "sistats" => ['by','as'], - "sitimechart" => ['like','not','and','or','xor','where','like','by','as'], - "sitop" => ['by'], - "sort" => ['auto','str','ip','num','desc','d'], - "stats" => ['by','as'], - "stremstats" => ['like','not','and','or','xor','where','like','by','as'], - "timechart" => ['like','not','and','or','xor','where','by','as'], - "top" => ['by'], - "trendline" => ['as'], - "tstats" => ['like','not','and','or','xor','where','by','in','groupby','as'], - "where" => ['like','not','and','or','xor','like'], - "x11" => ['as'] - }; + "bin" => ['as'], + "chart" => ['where','over','not','and','or','xor','like','by','as'], + "convert" => ['as'], + "dedup" => ['sortby'], + "delta" => ['as'], + "eval" => ['and','or','xor','not','like'], + "eventstats" => ['by','as'], + "fieldformat" => ['and','or','xor','not','like'], + "geostats" => ['as'], + "head" => ['and','or','xor','not','like'], + "inputcsv" => ['where'], + "inputlookup" => ['where'], + "join" => ['where'], + "lookup" => ['outputnew','output','as'], + "metasearch" => ['in'], + "mstats" => ['as'], + "predict" => ['as'], + "rare" => ['by'], + "redistribute" => ['by'], + "replace" => ['with','in'], + "rename" => ['as'], + "search" => ['by','where','over','and','or','xor','not','term','in','case'], + "set" => ['union','diff','intersect'], + "sichart" => ['by','where','over','and','or','xor','not','as'], + "sirare" => ['by'], + "sistats" => ['by','as'], + "sitimechart" => ['like','not','and','or','xor','where','like','by','as'], + "sitop" => ['by'], + "sort" => ['auto','str','ip','num','desc','d'], + "stats" => ['by','as'], + "stremstats" => ['like','not','and','or','xor','where','like','by','as'], + "timechart" => ['like','not','and','or','xor','where','by','as'], + "top" => ['by'], + "trendline" => ['as'], + "tstats" => ['like','not','and','or','xor','where','by','in','groupby','as'], + "where" => ['like','not','and','or','xor','like'], + "x11" => ['as'] + }; end - - # Available evaluation functions - def self.eval_functions - @eval_functions = ['abs','case','ceiling','cidrmatch','coalesce','commands','exact','exp','false','floor','if','ifnull','isbool','isint','isnotnull','isnull','isnum','isstr','len','like','ln','log','lower','match','max','md5','min','mvappend','mvcount','mvdedup','mvindex','mvfilter','mvfind','mvjoin','mvrange','mvsort','mvzip','now','null','nullif','pi','pow','random','relative_time','replace','round','searchmatch','sha1','sha256','sha512','sigfig','spath','split','sqrt','strftime','strptime','substr','time','tostring','trim','ltrim','rtrim','true','typeof','upper','urldecode','validate','tonumber','acos','acosh','asin','asinh','atan','atan2','atanh','cos','cosh','hypot','sin','sinh','tan','tanh'] - end - - # Commands which support evaluation functions (and only those) - def self.eval_commands - @eval_commands = ['eval','head','where'] - end - - # Available aggregation functions (+ eval which is just a link between eval and aggregation) - def self.aggr_functions - @aggr_functions = ['eval','sparkline','c','count','dc','distinct_count','mean','avg','stdev','stdevp','var','varp','sum','sumsq','min','max','mode','median','earliest','first','last','latest','perc','p','exactperc','upperperc','list','values','range','estdc','estdc_error','earliest_time','latest_time','perc70','perc80','perc90','perc91','perc92','perc93','perc94','perc95','perc96','perc97','perc98','perc99'] - end - - # Commands which support aggregation functions (and eval functions consequently through the "eval()" function) - def self.aggr_commands - @aggr_commands = ['chart','eventstats','geostats','mstats','sichart','sistats','sitimechart','stats','streamstats','timechart','tstats'] - end - - # Stack of commands being ran (usually only 1 but it can be more if can of subsearches) - command_stack = Array.new - - state :root do - rule %r/(?=.)/, Text, :query + + # Available evaluation functions + def self.eval_functions + @eval_functions = ['abs','case','ceiling','cidrmatch','coalesce','commands','exact','exp','false','floor','if','ifnull','isbool','isint','isnotnull','isnull','isnum','isstr','len','like','ln','log','lower','match','max','md5','min','mvappend','mvcount','mvdedup','mvindex','mvfilter','mvfind','mvjoin','mvrange','mvsort','mvzip','now','null','nullif','pi','pow','random','relative_time','replace','round','searchmatch','sha1','sha256','sha512','sigfig','spath','split','sqrt','strftime','strptime','substr','time','tostring','trim','ltrim','rtrim','true','typeof','upper','urldecode','validate','tonumber','acos','acosh','asin','asinh','atan','atan2','atanh','cos','cosh','hypot','sin','sinh','tan','tanh'] end - - state :query do - rule %r/\|/, Text, :command - # By default, we assume it is an implict search command - rule %r/(?=.)/ do |m| - command_stack.push "search" - token Text - push :search_command - end - end - - state :subquery do - rule %r/\]/ do |m| - # At the end of a subsearch, we need to clear the last command context - if command_stack.length > 0 - command_stack.pop - end - token Punctuation - pop! - end - rule %r/\|/, Text, :command - rule %r/\w+(?=[ \t]*)(?=\=)/ do |m| - # We can find filters or arguments already - # By default we assume we were in an implicit search command - command_stack.push "search" - if self.class.command_arguments["search"].include? m[0].downcase - token Keyword::Reserved - else - token Text - end - push :search_command - end - # Sub-queries do not need a leading | when running a command - # Trying to avoid to match an argument - rule %r/\w+(?=[ \t]*)(?!\=)/ do |m| - if m[0].downcase == "search" - token Name::Builtin - command_stack.push(m[0].downcase) - push :search_command - elsif self.class.command_arguments.key? m[0].downcase - token Name::Builtin - command_stack.push(m[0].downcase) - push :command_args + + # Commands which support evaluation functions (and only those) + def self.eval_commands + @eval_commands = ['eval','head','where'] + end + + # Available aggregation functions (+ eval which is just a link between eval and aggregation) + def self.aggr_functions + @aggr_functions = ['eval','sparkline','c','count','dc','distinct_count','mean','avg','stdev','stdevp','var','varp','sum','sumsq','min','max','mode','median','earliest','first','last','latest','perc','p','exactperc','upperperc','list','values','range','estdc','estdc_error','earliest_time','latest_time','perc70','perc80','perc90','perc91','perc92','perc93','perc94','perc95','perc96','perc97','perc98','perc99'] + end + + # Commands which support aggregation functions (and eval functions consequently through the "eval()" function) + def self.aggr_commands + @aggr_commands = ['chart','eventstats','geostats','mstats','sichart','sistats','sitimechart','stats','streamstats','timechart','tstats'] + end + + # Stack of commands being ran (usually only 1 but it can be more if can of subsearches) + command_stack = Array.new + + state :root do + rule %r/(?=.)/, Text, :query + end + + state :query do + rule %r/\|/, Text, :command + # By default, we assume it is an implict search command + rule %r/(?=.)/ do |m| + command_stack.push "search" + token Text + push :search_command + end + end + + state :subquery do + rule %r/\]/ do |m| + # At the end of a subsearch, we need to clear the last command context + if command_stack.length > 0 + command_stack.pop + end + token Punctuation + pop! + end + rule %r/\|/, Text, :command + rule %r/\w+(?=[ \t]*)(?=\=)/ do |m| + # We can find filters or arguments already + # By default we assume we were in an implicit search command + command_stack.push "search" + if self.class.command_arguments["search"].include? m[0].downcase + token Keyword::Reserved else - token Text + token Text end - end - # By default, we assume it is an implict search command - rule %r/(?=.)/ do |m| - command_stack.push "search" - token Text - push :search_command - end - end - - # Search commands have a specific status, being implicit in some situations - state :search_command do - rule %r/```/, Comment::Multiline, :multiline_comments - rule %r/`\s*comment\s*\(\s*"/, Comment::Preproc, :comment_macro - rule %r/(`)(\s*\w+)([^`]*)(`)/, Comment::Preproc - rule %r/0[xX][0-9a-fA-F]*/, Num::Hex - rule %r/[$][+-]*\d*(\.\d*)?/, Num - rule %r/((\d+(\.\d*)?)|(\.\d+))([eE][\-+]?\d+)?/, Num - rule %r/[!<>=,]+/, Punctuation - rule %r/[()]/, Punctuation - rule %r/\|/, Text, :command - rule %r/["]/, Str::Escape, :double_string - rule %r/[']/, Str::Escape, :single_string - rule %r/\s+/m, Text - rule %r/\[/, Punctuation, :subquery - rule %r/\w+(?=[ \t]*)(?=\=)/ do |m| - if self.class.command_arguments.key? command_stack.last - if self.class.command_arguments[command_stack.last].include? m[0].downcase - token Keyword::Reserved - else - token Text - end - else - token Text - end - end - # Some commands have specific operators available - rule %r/[^ \t"'\d!<>=,()\[\]]+/m do |m| - if self.class.command_operators.key? command_stack.last - if self.class.command_operators[command_stack.last].include? m[0].downcase - token Operator::Word - else - token Text - end - else - token Text - end - end - # If finding a closing bracket, popping twice to leave the current state AND the subquery state - rule %r/\]/ do |m| - token Punctuation - pop! - pop! - end - end - - # Other commands not being implicit, we were only handle the initial part "| command_name" and then just into arguments if any - state :command do - rule %r/\s+/m, Text - # Highlighting only known Splunk commands - rule %r/\w+/m do |m| - if self.class.command_arguments.key? m[0].downcase + push :search_command + end + # Sub-queries do not need a leading | when running a command + # Trying to avoid to match an argument + rule %r/\w+(?=[ \t]*)(?!\=)/ do |m| + if m[0].downcase == "search" token Name::Builtin - command_stack.push(m[0].downcase) + command_stack.push(m[0].downcase) + push :search_command + elsif self.class.command_arguments.key? m[0].downcase + token Name::Builtin + command_stack.push(m[0].downcase) + push :command_args else - command_stack.push "unknown" token Text end - push :command_args - end - # When jumping to the next command, clearing last command - rule %r/\|/ do |m| - if command_stack.length > 0 - command_stack.pop - end - token Punctuation - end - rule %r/\[/, Punctuation, :subquery - # If finding a closing bracket, popping twice to leave the current state AND the subquery state - rule %r/\]/ do |m| - token Punctuation - pop! - pop! - end - end - - # Handling arguments after having initialized the command context - state :command_args do - rule %r/```/, Comment::Multiline, :multiline_comments - rule %r/`\s*comment\s*\(\s*"/, Comment::Preproc, :comment_macro - rule %r/(`)(\s*\w+)([^`]*)(`)/, Comment::Preproc - rule %r/\s+/m, Text - rule %r/0[xX][0-9a-fA-F]*/, Num::Hex - rule %r/[$][+-]*\d*(\.\d*)?/, Num - rule %r/((\d+(\.\d*)?)|(\.\d+))([eE][\-+]?\d+)?/, Num - rule %r/[!<>=,%\+\.\*\-\/]+/, Punctuation - rule %r/[()]/, Punctuation - # Command arguments, checking it is a known argument for the current command - rule %r/\w+(?=[ \t]*)(?=\=)/ do |m| - if self.class.command_arguments.key? command_stack.last - if self.class.command_arguments[command_stack.last].include? m[0].downcase - token Keyword::Reserved - else - token Text - end - else - token Text - end - end - rule %r/\w+(?=[ \t]*)(?=\()/ do |m| - if ( self.class.eval_commands.include?(command_stack.last) && self.class.eval_functions.include?(m[0].downcase) ) - token Name::Function - # Aggregation functions can use eval functions through the "eval()" function - elsif ( self.class.aggr_commands.include?(command_stack.last) && ( self.class.aggr_functions.include?(m[0].downcase) || self.class.eval_functions.include?(m[0].downcase)) ) - token Name::Function - else - token Text - end - end - rule %r/["]/, Str::Escape, :double_string - rule %r/[']/, Str::Escape, :single_string - # When jumping to the next command, clearing last command - rule %r/\|/ do |m| - if command_stack.length > 0 - command_stack.pop - end - token Punctuation - pop! - end - # A subquery can occur anywhere - rule %r/\[/, Text, :subquery - # If finding a closing bracket, popping twice to leave the current state AND the subquery state - rule %r/\]/ do |m| - token Punctuation - pop! - pop! - end - # Some commands have specific operators available - rule %r/[^ \t"'\d!<>=,()\[\]]+/m do |m| - if self.class.command_operators.key? command_stack.last - if self.class.command_operators[command_stack.last].include? m[0].downcase - token Operator::Word - else - token Text - end - else - token Text - end - end - end - - state :multiline_comments do + end + # By default, we assume it is an implict search command + rule %r/(?=.)/ do |m| + command_stack.push "search" + token Text + push :search_command + end + end + + # Search commands have a specific status, being implicit in some situations + state :search_command do + rule %r/```/, Comment::Multiline, :multiline_comments + rule %r/`\s*comment\s*\(\s*"/, Comment::Preproc, :comment_macro + rule %r/(`)(\s*\w+)([^`]*)(`)/, Comment::Preproc + rule %r/0[xX][0-9a-fA-F]*/, Num::Hex + rule %r/[$][+-]*\d*(\.\d*)?/, Num + rule %r/((\d+(\.\d*)?)|(\.\d+))([eE][\-+]?\d+)?/, Num + rule %r/[!<>=,]+/, Punctuation + rule %r/[()]/, Punctuation + rule %r/\|/, Text, :command + rule %r/["]/, Str::Escape, :double_string + rule %r/[']/, Str::Escape, :single_string + rule %r/\s+/m, Text + rule %r/\[/, Punctuation, :subquery + rule %r/\w+(?=[ \t]*)(?=\=)/ do |m| + if self.class.command_arguments.key? command_stack.last + if self.class.command_arguments[command_stack.last].include? m[0].downcase + token Keyword::Reserved + else + token Text + end + else + token Text + end + end + # Some commands have specific operators available + rule %r/[^ \t"'\d!<>=,()\[\]]+/m do |m| + if self.class.command_operators.key? command_stack.last + if self.class.command_operators[command_stack.last].include? m[0].downcase + token Operator::Word + else + token Text + end + else + token Text + end + end + # If finding a closing bracket, popping twice to leave the current state AND the subquery state + rule %r/\]/ do |m| + token Punctuation + pop! + pop! + end + end + + # Other commands not being implicit, we were only handle the initial part "| command_name" and then just into arguments if any + state :command do + rule %r/\s+/m, Text + # Highlighting only known Splunk commands + rule %r/\w+/m do |m| + if self.class.command_arguments.key? m[0].downcase + token Name::Builtin + command_stack.push(m[0].downcase) + else + command_stack.push "unknown" + token Text + end + push :command_args + end + # When jumping to the next command, clearing last command + rule %r/\|/ do |m| + if command_stack.length > 0 + command_stack.pop + end + token Punctuation + end + rule %r/\[/, Punctuation, :subquery + # If finding a closing bracket, popping twice to leave the current state AND the subquery state + rule %r/\]/ do |m| + token Punctuation + pop! + pop! + end + end + + # Handling arguments after having initialized the command context + state :command_args do + rule %r/```/, Comment::Multiline, :multiline_comments + rule %r/`\s*comment\s*\(\s*"/, Comment::Preproc, :comment_macro + rule %r/(`)(\s*\w+)([^`]*)(`)/, Comment::Preproc + rule %r/\s+/m, Text + rule %r/0[xX][0-9a-fA-F]*/, Num::Hex + rule %r/[$][+-]*\d*(\.\d*)?/, Num + rule %r/((\d+(\.\d*)?)|(\.\d+))([eE][\-+]?\d+)?/, Num + rule %r/[!<>=,%\+\.\*\-\/]+/, Punctuation + rule %r/[()]/, Punctuation + # Command arguments, checking it is a known argument for the current command + rule %r/\w+(?=[ \t]*)(?=\=)/ do |m| + if self.class.command_arguments.key? command_stack.last + if self.class.command_arguments[command_stack.last].include? m[0].downcase + token Keyword::Reserved + else + token Text + end + else + token Text + end + end + rule %r/\w+(?=[ \t]*)(?=\()/ do |m| + if ( self.class.eval_commands.include?(command_stack.last) && self.class.eval_functions.include?(m[0].downcase) ) + token Name::Function + # Aggregation functions can use eval functions through the "eval()" function + elsif ( self.class.aggr_commands.include?(command_stack.last) && ( self.class.aggr_functions.include?(m[0].downcase) || self.class.eval_functions.include?(m[0].downcase)) ) + token Name::Function + else + token Text + end + end + rule %r/["]/, Str::Escape, :double_string + rule %r/[']/, Str::Escape, :single_string + # When jumping to the next command, clearing last command + rule %r/\|/ do |m| + if command_stack.length > 0 + command_stack.pop + end + token Punctuation + pop! + end + # A subquery can occur anywhere + rule %r/\[/, Text, :subquery + # If finding a closing bracket, popping twice to leave the current state AND the subquery state + rule %r/\]/ do |m| + token Punctuation + pop! + pop! + end + # Some commands have specific operators available + rule %r/[^ \t"'\d!<>=,()\[\]]+/m do |m| + if self.class.command_operators.key? command_stack.last + if self.class.command_operators[command_stack.last].include? m[0].downcase + token Operator::Word + else + token Text + end + else + token Text + end + end + end + + state :multiline_comments do rule %r(```), Comment::Multiline, :pop! rule %r/./, Comment::Multiline end - - state :comment_macro do - rule %r/"\s*\)\s*`/, Comment::Preproc, :pop! + + state :comment_macro do + rule %r/"\s*\)\s*`/, Comment::Preproc, :pop! rule %r/\\./, Comment::Single rule %r/[^\\"]+/, Comment::Single - end - - # When found in a rex/regex command, a double string will be a regex - state :double_string do + end + + # When found in a rex/regex command, a double string will be a regex + state :double_string do rule %r/\\./ do |m| - if ( (command_stack.last == "rex") || (command_stack.last == "regex") ) - token Str::Regex - else - token Str::Double - end - end + if ( (command_stack.last == "rex") || (command_stack.last == "regex") ) + token Str::Regex + else + token Str::Double + end + end rule %r/["]/, Str::Escape, :pop! rule %r/[^\\"]+/ do |m| - if ( (command_stack.last == "rex") || (command_stack.last == "regex") ) - token Str::Regex - else - token Str::Double - end - end + if ( (command_stack.last == "rex") || (command_stack.last == "regex") ) + token Str::Regex + else + token Str::Double + end + end end - - state :single_string do + + state :single_string do rule %r/\\./, Str::Single rule %r/[']/, Str::Escape, :pop! rule %r/[^\\']+/, Str::Single end - - end + end end end \ No newline at end of file From e5bdd4c094846897067f4f6233905012543617a1 Mon Sep 17 00:00:00 2001 From: Sylver Date: Sat, 3 Jun 2023 17:11:39 +0200 Subject: [PATCH 3/7] Simplified states Removed the :search_command state to reduce redundancy and playing instead with the states stack --- ...7c1f1be2-ee1d-49b8-9344-7af822d4e280.vsidx | Bin 0 -> 43516 bytes lib/rouge/lexers/spl.rb | 107 +++++++----------- 2 files changed, 38 insertions(+), 69 deletions(-) create mode 100644 .vs/rouge/FileContentIndex/7c1f1be2-ee1d-49b8-9344-7af822d4e280.vsidx diff --git a/.vs/rouge/FileContentIndex/7c1f1be2-ee1d-49b8-9344-7af822d4e280.vsidx b/.vs/rouge/FileContentIndex/7c1f1be2-ee1d-49b8-9344-7af822d4e280.vsidx new file mode 100644 index 0000000000000000000000000000000000000000..ffab81345d9f640e8f73ff9f1c2e26f2a29da0a7 GIT binary patch literal 43516 zcmb{41(+Mx`7eHlnZ051B+ij`wUX_`NxZSMJDNF?#yjKztz=1-!_ZJe4Qx_oPE$@9 z(xlAHZOU!RO`9?^GyTu|_4nrN-N)_o``>#{v-3Ilo--q9G&Ay3&u{O(V#SJk?=l;c zBrjS_biBmzUB_=azIOb;(XOL2M|U0Fb6SDZN}N{av|XpIosRi0a01T> zVkbzQ&51J~IrCj7be%A9BG-vLC(=%oJ5lLGT_;YQq;!(5lLk(jIhpHZ+Q|whcb(jG za_!_DCl8!Faq`s33#V|MB5;btDKe+XoucoQj#I|YLdWT?b$XH0OPyZd8T!sJcScjr zsP2p!C#^o^8WO*nxSfzs@YebQl6_kPkCB-E#-BT7bq`OUZT8A zd8P7|@?GV7%Gb&dlwT;nRN7TKQaV;TSGucoPqkdt3REjptwgml)hbn6t9Gc`nd)fO zNmQq=f|d%}Du`4Ns~}ZDt~MuXUa5Jl=DR9%Rj5@Ms4!MxqQXK&u8MpWX%z)3N>r4q zs8mr`#jc7I6_+Y$sU%fNrjkM>rr1@fQmLy_U!_{5fl3pVW-2XITB^)dnO0e#vPfl# z$_ka0DtA?`Ro+o~pz=uNiON%zZ&7)n@=_H_6|O3@DmtnNRFSA6Q$?uYzt_-$ShSkcjUK#q8 zVP4r@t!y`cv(?JL)S5!ITB%kW)kdw_2&#>!I$N#IHmbA3>R?-Sm{*5`>Znm&tV}wM z$;zyWJ6Rb{Rx6X$+GMpcS)HA%=aY@vWMku`mrM?}O%5xQ!|LR4dUEJb4i_dz)ydJ+ zg?2HV`_Rd)u>H1qN%~QsbPI;G+SFe zG;h$FYSxrjQ+`e9nrhWlyQVrd71Y$`nwqbvu%@D#ifbyVskElDn#yacsHw81x;3>> zQ@xrR*DBRoeN-DwO;1iwPmQLh>(lkg=?(6*8&9{~>DFjE=}vDcr}Hh-g`O^g>7qY9 zY)p@;)1%3HwNam})+ZbFsYbm~tDn)RyY0Fc*1e?em32R<>u$YO*4y=Zr>F;UeRE!) z*Y(h?hi049qkcWs^)#ucWj#;pWm+Fj)`xz5)M&`*$!v!W)o4^Ijmm7J;x;P7MrC`W zT5Fi2Ha%_T-Z62b;WrxX#+pH6eZ8^q?1q~*nqkB1G<>(=>xQ2+blhkMjZV-AYK_SJ z=Qg6C5tofLY-GBTr;WU5^rjnwRgHnu7@XV~D0w#xCL4pZ8iQeDu&ptiYz+OzFmDVO z8pA8f^y`LChRSfkEVwKLbK4QjN(Y}cv{>(qu*)rQm5 zhV^R0>1x9nYQvdo!vrqP(0Y0k{}Gc$T-rZqFunVAV@W}=yybY>=-naO8n zx-&BiGfj7?iBJU8>* zOuJdz%>p-@ceBvVQa8)o%ycWy^j-Mnz~MYnL|OQ~?(!gCAl7A?2v zxJBR=p<5(wk-J6V7NuMC-O_itD z1#T~Ndx_gi-CpMQa<^Bwy{_Ba>h{KNzwY*Dn$ugFb+6fony%M$!={@zJ+J8{P2Fnh zpsB;A&YHSt2JL1LH-nFPf#-EL+VoXqJWf zQf>CD&3@4wn{UmUH(hv*%xmP{8Ed>V*LWMue`~!B>%0x8dK*sjHmvtHobGKn!`pDC zw_$^M70rubj1jv4pLNUA9Zv_Ijy;`uI`?$pwF0@)G2h#rn!LF> zp4ZV{$5cjMC-XYF*D1VC>2=0l;Cg}Y1uZX#ydd|2!kbs#eBGPZ-h9iO54`!%n=ic3 z^+N51u@@#@SbAaCi(M}+t))@0xwIvtn~7( zS2$jwyrSlrohYd;QNJKr07{+d%pI4-STzl z>&VxcuXA6U)~>I6z8?E6<+l>Q)s;_M;Q2wz4`M&a{2=#(!VgM+bK-~E4+B4T{lxQ= z$WKZ?S@2WePXj-5{Y?8=hrhb|Eqe*`><&Rx|9Q)(MA7}nJ_ZJhbYFdqSwW_CGJ)P+3E#?bM*IT;Y*7ZQw zH=FaOo~@Ykh&fYex25I0U_SZgvmKicq4^-0&xUSl-7L+UKzo6CX|Xz26K(`{@N_4Bxtx~tw>h?O_K2^6}-PXDt=ys&rneJ#E zggS_Ikm=324jmmT9lCO`L#@M>4g(z~Iy6m%j$9pS9VI$Sb(HIM2`F?ZdT^#$e zgt)EaK*za`3mum_9_yr~lSn7IP70lrI#sgFG}LLL(@dwi-lFuDQm*AQrL&68>N;!a z%$1KtrgawREYw+~vqWd9&N7|lIxBQ$hIDndptHWt#yWT80yeLjFBzTB%I7K9I`8N_ z(Rr%#EjrJ2Ug*5kg`*3li<&O#x^U&KRA^mvbP?zx(M6_f-d^H7|TU**IY`t zx~p|J(A`uoD7{e83w6DanVS!D$*g---D~Kct9zdAY26ETFVekK_X^z`>;A0n=eob3 z`(r&C=&@(swtB4fxTVK|9;bSo>2a>dg&r@oRIQ~(t?4scb+=XbT6Nv3hpl=lS3|DX za>JHen$K-ZXRVggYN=MsYqfN%)oHczR>!>T<}6~i?N$)Bg18lAt)SQ1oV3EE6}hdr zl>4M4Y$Zu6$y!O-%5*DBTDf_fD0!z8L8}Z~W!x%@RySz%omOABMw6|vZjH0Hssc;V)Ft7t+Cu`$#FCb4}x|a%fOY(CQF^BJ}_hwVfz^a9U(LCLu`XoW%BtT7BC^Gbw4 zZVpNqYz^nTVd#XRl1swS3qwB)br`nfWe?*pOu{e=!#oW8a?c&RVO+|UV`46I%$L9U zw8dePgh?7Ed6<}{g)lQGbtlYKn0s<|&x0^8%$<5z)WX6wCwud53X3ExvarbI+Ozb- zQio*}mT_2`nj-9)3sM#KYhm9H`&+}Y7mi!uIE+*+QlqHhMGfEF5131Ex%+oLc|W@I zksC&C9Jy)aW|5mmZV|b?sHvl75H&+{EfV?b<=KpRRG_CLU61sPTUvSvH{WM?G8*$vo@k<5*|0&STw;^+K$Bu^z`Q6}Pl};#*1FR&jeeZqLN+rhI9) zTX8##+gaQ$;&vIg$8krQ6OcI9&+4z|YgDxQzy`LWzy$Q#tWvHE5vIcLO4 zoV4Pk6DN_$#z__@d7Kn+vJj_boas1A;@pXI73X!iDak##Ofct|JdE=+&WkuNjEk{(r^jU!mq~1{NBT`USM{T~ zU&Q@|cr+D{J#*@c$Mf+xj>l;{HV=ks<^e%sE(Yc1sz{nn(rhG6H<3FBb9NagUM=w& ziRUJsmw4v1rxIVw$rd)K(o4A!kVG>2=otH%2BzDZZCyA>`Tub7$NxUwJUAZ=o zb%Ga6&c1TQ&C$)xfk?2=N!(B3agxl-=R1j#BukPaNmnJQBNz3lPSP|eT0a?+5cX)af+<_pjq&RnJWjJbMEbMqbOq=lCj9rI-< zhq>sdWz)Rn(=t!XBJI0dRBcPBwuJR9Sz#`9x0Jz_(!3P4OpUTiC95>Diknr6tTM4}tet1=QXUutO}S;1%Nlbv z)6Rm(TtLYC%N(sB&w?Tgx>>L_n^)#5Ih!|MSLOkOpM^mdIr1D=z7M=i&fw-PnP;(+ zC3U$0l}}obC6Reso1<>trshqSCEYAp$davDs^p#^?PO^o4=T*fK^Dk+IV-bVWqB>j z>vFr0n@f@+%LnG6zC0~1npxpxMIi5YGp@+9Vw{=B0OppW3^Q}CDpR?)=myzBmi3*i zud=?E^@D8eW#g8)IL*d+wrEaf)m+tbHOeQQe6o^HR`W?OpB&_q!+dgkK2^!58u`>9 zue*8O%j-I?2j&rEKGQIdkaBqv)XkgbmRp&p19HD2x8G*5<_8D!B)gjHnmlOHp4^P; zwtPj)ZI$_4$}N-ZlibpndzGHKbI4ofiq<^*uH+pTpIR>u{5)voLEBvU$tl)+j&WI* z2Spxi&E>MmTw5q}O=G@;&1pK1%}3WWXRADk%*n-^3(O}uPhGirG8b-XXg(71G<{2$ zZ^`o9$#a$Gb$JMsdwJf<^DxhoJTLORG{2C@L-(R8&xZ<6ZlleOSJ9SBzrtK)n6FlI zQZX+`kr#u!80Ez{FWtOs=4F_dQC_CGdG)$Z-c@$3-JGk5-GBnPSGQ#r$s5Ebyq#JdcwL zPji{$M)D;ikGD*R!aVUYPgust2el}Y zo;mRqXxXGc?C?LzWjb=eg?^FMP4s*ugDYg zsuX#*$QO!yAdl{3_so4yS`=AP^yNujnHA9Scbn_k)6TsFf} zp33FsVOi7=9z+gOwFBqt5vpj+3u7y)7?LtTbp85U*a zl#wcu2nWklGj)W_ZzA{~AVVT9HIq&3-`5mduYh~`0xi0gL9ICu5 z^KMxxH7oO%e$Xsa0T{rEz zS=TMP=JA|)_@$M3Hq_OASBK`RN6u9`@9JJxkGrj)+e*4^)eSs(ToJ_Gpx51;bVIq9 z>Lx`uRo%>-dUQ8Wx?6+p*gScyEvS+CHF9C9vM^O$m})Fc4HoL11-HN8X?%zK3tBJe z$UJRX=;(z`un>3)fxi%h3qfq&0t=D5V1ECMy@e!OFb_!Nizisf)xu!7F!bd+->LRg zt*1u4>Acs-dTZDAT(9RwJvZ)oot~HUwBOTNPj`Dk+%xYfxlIasQP_)}UaWet*Gtk~ zlJ%0Zmnu9GHs3&rIrsJQv{yL2LiLJTPd@FQJaj37UXjVu;@+s&SG|7T+nV%7gWkAl zp2zgYt=>57jicV!ymM=PHR@Lz=GUP9Y^86WQ#ZrDm-Kzz*KXfjQOf0H(of2Ms{3iu z&y_sO$+z?i^F*i9Ur72}&2ydBR_$!nYOD6PYJaOX#dxdEwg&0eBpIyQD5oxGG%!B^ zD>YE=Ks5)-8z_IE^gy)+sy$G_K+O+SI8f0*#RHWLR60=EK;;8f3{*K#-GMorYBZ=+ z2a}`0)Z}1ldeE30G-`u}x$~GFtQif~js|DV47_BZ^&r=SqCe>M2E%FdA`FJ3!EkXf zs+nJt2HS>%ZQF-VW9W>AYGi)D99D+I>d^drIGh{~r-sAnsp0f+SZ@sL=BK97a8q^Y zuach){i&fp8pb2@^)}3>hWT)qkIZRzSd4}XQ!>fIP=1~o42MH!I8-LYaI|VTs>-ib zqv3Eg8s0n{E>_JiSHne<84Yi#jaF?ORcoVaV>DSAO;$&fCNmsORn1RZqq;w;2cvp8 zs+Z=c&QYT_nl-yIHIiL5@opZCrbeU2w&~Hf#^knPzHL}+8xFS(7q@L6$>Ufz8#nd1 zS;|8iFOf%XelXVVSVv>?#IO~NTgkXp%9pu3WHGl0=C<4&hru{<$FVz>>-*5$&yAB* z9?_+lJf+J@c~F-J<~CxSr{g@AQ(9iiqq`y)7omAWjl02kA&{SQtF`Ub#`fx9d!x3! z5pDO9?ZbTgXma~#%KXy1eY^S9&ZI^2znh1*ELO~cso>bu7H79DnqO9${l#c;F-{iE z75T8TILsGEQ;XZHi;J6Xsnu^?HP_nlZ!1JthW-WD53j!Rx{EFtUUtd3|0{RF4d{B+W%aAiz2(lEZ@c66`*yj%w0xvw zouG#sC5bZc5%frU6g`?ALyx7$(c|d}^hCOfI`kxZGF?qiF-j6;9g0?Hl}^$rTBFmn zP8)O$T}#)|Q|W1RJw2VCLC>Tc=nOrJZls&&*>sl9QJ2a)Qufa?f09Hg`&3hT2TOgM zc4%OfBue{cI!{9y86}C*FP4HyOO^V>{E4aglO#%cixf;+s?=xZPt0jyl(bZ7FU_CW zr3+L(N>bmaTj_uf>4(evpA^g?GU@G40=0#CVdutHhm6#E`1(-KD~p!fWDBvh`yMKsJ(l^mJ)3?yK(znsK(|6E!(s$8!)A!K#()ZE#(+|)O z(ht!O(~r=P(tGH~=*Q_N=qKr?=%?vt=x6EY=)Lsw^b7Qh^h@;1^egnM^lS7!`gQsZ zdcRT9Qg<=sZ=!aGQvR0t6TeNrL%(a3v@@mu_t3gSDSzMmi9et}G)fYs{zpk#RH^@$`4j(5Q80N?>UWV1mF%vp-;M50 z_n>$l}^$rqok!uRgLY_Y+tHW z)mbk}wn5jhUX<)wmPN^~V|l4kbt>yc$(}~nvwmkv)9Gv%C3^-vlWveolSIkR(6d;- zGo@)G`)#6U(^)!al(bZ-a@pRb9`&WsBvG=OwphP2rK!z!QL-HxuzsmhwVCz*6Q%1s z`-_qdX~g=aN>$AEgr@BG-;^#}*gs=`QL;HLSidu+sbs${U0{DvvOU_TTj_ufjgoez zG>zE4RH@o#HsY9Wr{~af>3Q^gqa;zX7tjmoMf75N3B8m)h+bxtBue&jmPN^4L9e7& z(FfD3=`}`4qGYe7*U{_gL!{9pQL;DC8|h8-X1Yjkp|{e9(udK9(?`%p(nrxp)5jPk ziIROReH?u}eFA+VeUed zlM#ClP(FJ^gXO4Cc&zEr7tso97xqc4|&Nup$5!LlgX zSJFFKzcZ!jRcsd}dl$W%^`c~7&GJ&E>NRYCE!%gdG`)`fUQge^{-R{x$nsL9>P>8a zGkpvDiIROQeH(o{eFuFfeV0*^DA{+@_t5v!_tE#$56}-{U-eu{Wkp$ z{Vx3;{XYEx{UQAk{W1Ls{VDw!{kc(+DA`}oU(#REU(?^v-_qaF-_t+PKN=;8lKm6? zGyRKHnj}j0uk>%M7bW|5`Vaa~sWeHH?7!&0sXR!O`d#R*bT_&?-GlB)_o92#edxY) zKe|6XfF4K>GD;F<{)6cu^iX;jT}dbC;q(Z4Bt42AO^>0+(&Omy^aOe$T}2&w5*%TUG`gOiPS2oc(hYQmo<%p(O-4zg9QU(X7NtB( z=cr4Y)H6yFrN7UzC}mAsv`srSpquGD4QWJUn$VPPp&89-K}(~grOLXxY+s-~_7kPO zPq)$m9nulqM#pqJJ%^r4&!gwl3+RRPB6=~sgkDM?L@%S4(<|te^eXybdNsX8t5$=xdFVMCtcB`g-~X`bPRD z`eynT`c|W)rAoZFvHk6A7p49k^qurw^xgD5^u6?b^!@Y$^n>(6^uzQc^rQ41`Z4-( z`U(0;`YHNp`WgCJ`Z;mF`A& zr+XMBiPC>hx)?nC#b`_cXB0rWt65IvY4LJy^f(Uo+99!`&-N7AF{(exO4tWlCE z>pzZVQOd{D6X=O_6?N!I^kll)C`pt!r%**Jv`Q!G6s^%|TBi-VhOVXS=&AHHx}Kg+ z&!A`04RnT{MK{t-MoFUV$Jun2&QX^(sYiXPX^XaLhX!;rou?s^dyn?%Ryv?VI-=X?m~N-%&~xc|^n7{&y^vl+FQ%8!OX-7*l0;eWW%P1- z1-+79Wt1dJ{|D2n={59PdL6yqC`puY51}{E8|h8-X1Yjkp|{e9(udK9(?`%p8YPJ` z-=pZG>0{_)>En!&mMY^O&-N$KC$gU??N6dlX1yr&PoYnxPoqz#x6x1)_ulzy+J zucNPLyD05%pl_saqHm^ep>L&cqi?70pzoybqVJ~fF-j6;zW37i(f892&=1lN(GSy) z(2vr4=*Q^C=_lwX>8I$Y>1XI?>F4OZ^z-x!^ovGGqOAW*^vm=s^sDr1MoFUdzmI;M zeuLgmze&GEzfHeGze~SIzfXT)l(bZt?}u#v5&bd!3H_;2k|_OuMt@F!L4QeqMSo3y zLw{?Ov{Z@rJGTFx{(=6H{)zsX{)PUP{*C^f{)7IL{)_&b%0Cd3d3P~N5~aK=-Hq-} z_n>>yz3AR_AG$BykM2(opa;@}=)v?5dMG`NuA~$6aC!tik{(5mrpM4@>2dUUqa;!G z_XK((T}2&wl2MW<{ZFQ=SuaZcDOAx4t*?wA z40e7K9N3&KAAp+K9xR=KAql1pFwY@&!o?y&!*3z z&!x|!&!>0L7tj~d7tt5fm(Z8em(iEgSI}3|JL#+FT}DZw9EZEFen0 z=^N-9jgmwe_a^#g){9dA7W!8DHu`q@4*E{|F8Xfz9{OJTKKg$80s2AuA)}>^(bT7I$-G}Z=_oMsM z1L%SDAbK!8gdR!{qbunIJ)9n4lqAafk7QYt@=^3?dJH|59!HO-C(sk=D(cXa=*e`o zQIaV0okA6@&?=oYN)o026s^%|TBi-VhOVXS=&AHHx}Kg+&!A`04RppRX{oZFv)I0o zZlY(?Svp5u+N2)!sirNXBvIyR(+&;jW;#zp8qt_0G&M>RCGHlQ(VP~vq+Pl|d$dot z(g7XP5#45#B+9&Fx}Baw&oxT=Z_4=d*#CTb0lko3L@zc<5@p;a^itM~QvV>9MJZoK zFQ-?~E9q7A!Srf+jZxB4CEm4czm8r{A3|@SH`1Hv&2*98Vw5CG+*|2G>BH#5jgmy^ z_Xzq(`Y8Hn`WX6H`Z)S{`ULt!`Xu^f`V^xiQRaIp%c7K@MxRb^W4kEr&!D%{XVPcU zXVd4<=hElV=hHjr3+M~!i|C8#OXy4K%jnDLE9fifo%B`oE_ye8HGK_zEqxt*J$(ax zBYhKnGkptvt5K3D$LDPmF% zhklHHoPL6Sl75PQntq0U)+lMIvi{GpUX=2^EQ?b9JpBUwBK;EmGW`nus!@_CSk8T~o^1^p%c z75z2+4gD?s9sRvgk|^u_1N|fY6a6#&3;iqo8~r=|hf$I!@%~BwMgL7#?75?T7rHCm zjqXnOpnKB2=-zZ6x-Z?2?oSV(2hxM+!SoP%C_Rj>q!aXTdIUX^9z~C)$IxTxaYjj^ z9FODa39J{T{zSTpI`kxZGF?qip^8>$l}^$rTBFlONutbKrwzJ>uBGefsq{3u-Y7|w zIH%Jy=$UkbQIaVAX6RXTBi%&Lrn7X8y0l3>>QhZyv`srSpquGD4QWJUn$VPPp&89- zK}*^-N)ly%7if?6=~g_4X4={fXVdLBKWUO+FT7txF9CG=AI zAbJ_SoL)h%q*u`g)2rz<^jdlyy`DaV-av1pH_@BvBE5y)N*_ueMjuWeK_5vUMITKc zLmx{YM;}k0K%Z!oB+7Ar5`8j#3VkYl8htvwjXs0kPM>L%v{Z@zEVe(JK8O88X@4$# z9_#-n%DCsV|IU>5JJ>Et`2{SCQhp(Q5q&Xz34JMj8GSi@1$`yGlfH`HMenAsrmvx| zrLUu}r*EKdG)fX>oo}LVrf;EdrEjBer|&RI5@q~5>AUE=*)B@^d+2-V``9i@`}^q! z=m+VC=!fY?=tt>2^kekn^b_=x^i%ZH^fUCcMoFTq<8v&FQofgdo_>LTk$#DOnSO$A*QQ8lshtZXEf*x*^Buf7y z=#lg&dbCl}QlTc=nOrJZls&&*>sl9QI|HUM}4Yki?)rDMA^>{ z%c7J6x|zDBZadaY5CDE+RZ*VBj4 z8|aPnCVDenq_@yp=|hc@M2Y_}`f&OP`beXs|36B+M{(TFl=eq6{$uE4+5f*O;~vL; zqLd#`pFp2TpG2QbpF*EXpGKcfZ==tkx6^0RXVGWV=g{ZU=h5fWJLn7O3+apKi;a>* z*^igdm(rKfm(y3!SJFG_tLR zzMZ~zL&m_zMp=8evp2MewcoQe$*&Ql>NAeevE#ceu93Ieu{pYeujRQ zevaNtKTp3vzevACzigBw$~wM6ze>MG@1tL*-=O!?Z_;nkZ`1G4@6zwl@6#U`C5bZs z59yESkLge7PwCI-&*?AdFX^x7ujy~-Z|U#o@97`tAL*aypXpzWl9np#{VUu5#&%Ka z|4#ow|4IKv|4moyy`%pwbXU5YQPNT+?(S^ggYHT9qI=VQjFLndw=dm~?oSV(2hxM+ z!SoP%C_T(5NtC!N=>$ET9zld=$u$wo<{#9K{I zp^8?Fl0@lOrIU1u*61{?(*|8b*BT{>5^o(nmGz?3pGMcy)9D%XOuB*2(6i`9x{01m zXXzYuX_I=?r<%5ml0;cwn|5eGH`93<(ul@1p())$Gn&(amb6P3Xpi>kRyr_B5@o$Z zI-=X?m~N-%&~xc|^n7{&y^vl+FQ%8!OX-8?W%P1-1-+79MITJBrq|GG>2>sa`Ve{p zy^-ETZ>Ed%7J4gvs8NzA$LV48;q(z~U#j$bB-zeF=RjeHnc@ zeFc3by_3F*-bL@Gucoh|uQf^%Wj(K>ucvRIZ=`RcZ>DddZ>4XeZ>R5|?=(tUs?7H; zw!fRchrXA-kG`LNfPRpEh<=!UgnpFXLqA48PCr3EX_O?&dOpRnDCJMHyi}?G4C_TH zf0llZ-b+7Ezd*l8zeK-ml(bZd_Z7B(m41!hN54+LVU#4wxclih>9^>&>38UN>G$aO z=?~}+>5u4-=}+iS>CfoT=`ZLn>96Ro>2K(7>F?<8=^yAH>7VGI>0jty>EGzz=|7B; zmMX{PpR5<9{4bV8DgT?||47r6mn!YM?6afau0}~qmG<4(zB}7Rso#U;ohj{mvfp0p zCrbU^bRW7e-H+~Xlq5?31L%SDAbK!8gdR!{qbunIJ)9n4l(bZt=Sa36#rCC2`_XJa zh91j)OO<}dvHf^@0zHwgGD=#ijB{8oO8F#~mn!uqv;O}?8NZtGPoawOL}{{p}Hv`!mFNuu;$!}9+`>A#l!MJcbNr_$5tdU`rNgPut@&>4Cb-AFglv*|3I zqb_YykNQ;87H!iG4d`Y%PeU5fm?ku(TWCgeTF{bq=>qN1KHW+ObVx^Z8y(Z_MoFR^ z|8rOtrFs?Ao;RA zST^P#b}r4$Jy6t=D`hhwn^ZRF>`HFs14UWpiaFV!vn^s68ZZ%BZ4Lo!XS5z7QLsb| zLmhI@sW@%!?gFo!ub3?S}xE4I313h!Mcfi{+xucE$?q92kr6Sf8B|BWw{t zF$V%5C|c2g&Io{b%MK3u*G?7Zk_*McY*VOM2a(RofLTDGeHI63;^AY$Jz)c*jJ3mT4gt^wE5+*38O5CqLu52y zgCgSnzu90x*lB!b5Hww35_S-+C}O2}QLs`pV1(V7Wy5eZu)$ayCIrA(I}Ana)`ZME zCmSpS6CsBH=z`8DVuOVvht3GFtO1L}($Ed?jRs%^0S5E=cjNOVCf z=0FtW&<{B*&4$8Q93eXq>d?h@Mx9-$oz3<`cucNWn3T!}t!O~bl{$p7k>?($v;EK+ z4cMRoL9s|gMij(D9Y&xIOS7TuSad-YOgdT_hr-AL@`ps0mq`4V6mKPCoZQ5dly{&Tg=8SOz9X z(bi#vje;)NU=HMLWV>7(aSTHjj_b_HlaXAHxUL>-DYo-6S% z8-k(>Ht6C@>zr)tSnLr-pw;$60Mwz?MnTRtV7Z8b$q@h>^s~dTW*iM{unfB&7=bz) zz~=DYKoMho*#&G69>cJLWm95l7-l1*i#_5iB^0{Y4LW0KsKW@mW*iM$L}z=jF~Tkq zMRdU~pdTXRe1Hw&p$l?$851i^LW6C^SnMk1z|t@bp-{9N8w#D#1pzR^jztj-I2veJ z))^~6!-QGXjsxaP4ts{`$nYsOg2fgCo=#-bHL5gx6+EYgm}fk3P6 zZ2O@BvmuAE*r15bvU%-VCRUh)P>75@M;*FgEOyYY06FX`x?luWj};&aa%jN8woy=r zvFL&ZM8R^g4CHKpm1cQ64g}`4!!Q=}A}EU3*djuq&h8QFFeS!fO6=gW$cTc@=zN#glddocIjq3$*0L@b=1XVHh6Q1R<=Q<*4xR08*%8ZDiU8=0 zdF>+c*+QrZnG#)4w}%uF1$7vKBK8C8LxbJe0O*G)QHLS|U{}$AI&2UM4d`MQWY>)K zq1DcYA_6QsF7|NRpmtsau#s&J4R#K@X4{aiFbQ2S3`Ypd#R|~R?h%T%!7dlMLnIV- z3sKP7ZYHD^MNEl}|3Dq)aHWXJ5pSgwF&j2$K)?UEnUfJ1hAucl%Q|B(QHRc0Gjf>H zwqj}Mhk4NjyJf>;1o~kZx}Y=WwL#GjbrUO0LJ_SPX2+rq8w5oTE3o}gL?}eTaY0Z! zFIo`*U9d@4n1lxOLo4bK4_z=D;-McJP=^hQ*r1C&B76W#|X+ExrhKWu!n zs1=9E208S@uG+B(g$B%v4O&wfhVUrPJy1ks6jw?ea;QUR6fq@2O-L8y5YKkTN>PWg zuJpqO4G3k|Y^StyATr`%eRdeSpcUb5KkPZ`4wl&v6iY)C8wz#kf}nPC+XYcDFJ?mn zmTO1svBD%wi7tmo5nZrBogIPB*nQ;Cip}maY)&?4u&r9^(29O@QnVXH#^k88JA+|% zRhSY*bU_YdZDfqV9O%3(z_Q5^1ykZ6+FihLS!vd|<3nN_FapI1X~hQpFwvaUVO4ub z4hIoMjKBt~nv+&+Y&^7LKd?bOTl|mxuy9}Ep|hO`Ma*G0c7K=ErDpt&mm3XsHk(7_ zWy4U1E_QOezso|Q0lRO5+Py?5d(^N;DB8$&-w*{2saf}q*T{B8c-w$^ZDhP!s6znc zuu|mgULuG00mh;WHi+z+X?L6eP={9<@h}28EnU!xB60|d^&tSJ#Kw-WIgCI&-$dGR zaF4!mtk0~)5x}X)a;gBp_nq8G0 zhR!Hr1d7;TrTz+&P{ddp9wV?wEnUzVUC(hXxEoE5;%ob`V877De>4BM=^S z2#R?zVq%3!h=L-5Vi-0!F6g}Mkf1ZVARclnB@`O4K`5*qIc(5iLt)smT|gIv!U*gP zmVsTs2yAS4e}zdCvO(vawITpUV7YVB8S#+A4x$xBtl93ljfYmuVb^EJ+MFGMrC|iZ zqlg^p5O3KAtq6rE_W0SLXhj3Yq5->wBZ;EjUmR>31^sLh0qmaJ);(63WamH^wAwCc z#gy2Yodrc_EXW>%35o2>X6}KF9gB(3#qJjBP{gWi1185??!Sqt4EOx@8-?Z!0B=E#g&}lR6s;IlO{sMP$@r7?x%?XvGS!K|dS=?73~Q zxm0FbR-AaC&gSN%&K7+s&dFxkSgZ=m#lG1tDB9Jdh=pU3*x3ERFl;cT?Sk2CXS)ph zN@D~jLOhH>&IYv;p&uF$6j87Xb_7;xSAYX%BV#P)zy|TK`xt>%)Y_Xvtv=jftZtcc0q*{?b7hB zn~=IW*`NVS^QDMkwm2ts2!LTIq91Y?i}hjkC}Kfq#V()~MRY+98{2@jVByPFig-3M zTJ10!#V!M@M;F@158jP;?xzL5@jAj-0!7=d`mVYwKFqA$ac z!wT$d7-5ePro>B&&URiq7CFSTt6#aoBs+&IMNEWvXvLH`es%>IVb_8pTIVDHHfY5m zv5Q2P3F(3c6tTfX$obN0r?fe=V&TZ40UH$U90-8bBZm>Tvn%28QlJ4{&}xUF6^qf=jzDf%7j#A)rnEsZ7U6N6(a$atMKqueV^PEiw4!cV06Pph zEXc0Ujzt{;U}+O8O!8%u$_BfN9F~iIXhi@VcB}ChN4{+ za@e5Nc0oVHvr}RPu8F?mor5lR1ajz{N*CnN#rDHQ2s$BMkh1|W0z0#;4hIKg?ZH83 z6nzPW5q7!gj5!c**+g?P0(I!;N^wp$*uiD-5E*sYpflwnxWG+-F|VQF@bISGml8W7pG9wMz6i=Dxg$l2L279+4T=wj=z zN9b&ap>^58`H!oz3tzT+j6f*luu@E9`=J$eSQYwVkqElNBw58lk}>I+9jv%v)sD

l>a~YB%dY# literal 0 HcmV?d00001 diff --git a/lib/rouge/lexers/spl.rb b/lib/rouge/lexers/spl.rb index b214c90243..a6d05cf26a 100644 --- a/lib/rouge/lexers/spl.rb +++ b/lib/rouge/lexers/spl.rb @@ -233,12 +233,13 @@ def self.aggr_commands end state :query do - rule %r/\|/, Text, :command + rule %r/\|/, Text, :command_start # By default, we assume it is an implict search command rule %r/(?=.)/ do |m| command_stack.push "search" token Text - push :search_command + push :command_start + push :command_args end end @@ -257,11 +258,13 @@ def self.aggr_commands # By default we assume we were in an implicit search command command_stack.push "search" if self.class.command_arguments["search"].include? m[0].downcase - token Keyword::Reserved + token Keyword::Reserved else - token Text + token Text end - push :search_command + # Jumping straight into the command_args context, skipping command_start + push :command_start + push :command_args end # Sub-queries do not need a leading | when running a command # Trying to avoid to match an argument @@ -269,7 +272,9 @@ def self.aggr_commands if m[0].downcase == "search" token Name::Builtin command_stack.push(m[0].downcase) - push :search_command + # Jumping straight into the command_args context, skipping command_start + push :command_start + push :command_args elsif self.class.command_arguments.key? m[0].downcase token Name::Builtin command_stack.push(m[0].downcase) @@ -282,68 +287,27 @@ def self.aggr_commands rule %r/(?=.)/ do |m| command_stack.push "search" token Text - push :search_command + # Jumping straight into the command_args context, skipping command_start + push :command_start + push :command_args end end # Search commands have a specific status, being implicit in some situations - state :search_command do - rule %r/```/, Comment::Multiline, :multiline_comments - rule %r/`\s*comment\s*\(\s*"/, Comment::Preproc, :comment_macro - rule %r/(`)(\s*\w+)([^`]*)(`)/, Comment::Preproc - rule %r/0[xX][0-9a-fA-F]*/, Num::Hex - rule %r/[$][+-]*\d*(\.\d*)?/, Num - rule %r/((\d+(\.\d*)?)|(\.\d+))([eE][\-+]?\d+)?/, Num - rule %r/[!<>=,]+/, Punctuation - rule %r/[()]/, Punctuation - rule %r/\|/, Text, :command - rule %r/["]/, Str::Escape, :double_string - rule %r/[']/, Str::Escape, :single_string - rule %r/\s+/m, Text - rule %r/\[/, Punctuation, :subquery - rule %r/\w+(?=[ \t]*)(?=\=)/ do |m| - if self.class.command_arguments.key? command_stack.last - if self.class.command_arguments[command_stack.last].include? m[0].downcase - token Keyword::Reserved - else - token Text - end - else - token Text - end - end - # Some commands have specific operators available - rule %r/[^ \t"'\d!<>=,()\[\]]+/m do |m| - if self.class.command_operators.key? command_stack.last - if self.class.command_operators[command_stack.last].include? m[0].downcase - token Operator::Word - else - token Text - end - else - token Text - end - end - # If finding a closing bracket, popping twice to leave the current state AND the subquery state - rule %r/\]/ do |m| - token Punctuation - pop! - pop! - end - end + # Consequently, once we can infer we have a search command, we can jump straight to args - # Other commands not being implicit, we were only handle the initial part "| command_name" and then just into arguments if any - state :command do + # Other commands not being implicit, we will here only handle the initial part "| command_name" and then jump into arguments if any + state :command_start do rule %r/\s+/m, Text # Highlighting only known Splunk commands rule %r/\w+/m do |m| - if self.class.command_arguments.key? m[0].downcase - token Name::Builtin - command_stack.push(m[0].downcase) - else - command_stack.push "unknown" - token Text - end + if self.class.command_arguments.key? m[0].downcase + token Name::Builtin + command_stack.push(m[0].downcase) + else + command_stack.push "unknown" + token Text + end push :command_args end # When jumping to the next command, clearing last command @@ -377,10 +341,10 @@ def self.aggr_commands rule %r/\w+(?=[ \t]*)(?=\=)/ do |m| if self.class.command_arguments.key? command_stack.last if self.class.command_arguments[command_stack.last].include? m[0].downcase - token Keyword::Reserved - else - token Text - end + token Keyword::Reserved + else + token Text + end else token Text end @@ -404,23 +368,26 @@ def self.aggr_commands end token Punctuation pop! + pop! + push :command_start end # A subquery can occur anywhere rule %r/\[/, Text, :subquery - # If finding a closing bracket, popping twice to leave the current state AND the subquery state + # If finding a closing bracket, popping 3 times to leave the following states: :command_args :command :subquery rule %r/\]/ do |m| token Punctuation pop! pop! + pop! end # Some commands have specific operators available rule %r/[^ \t"'\d!<>=,()\[\]]+/m do |m| if self.class.command_operators.key? command_stack.last if self.class.command_operators[command_stack.last].include? m[0].downcase - token Operator::Word - else - token Text - end + token Operator::Word + else + token Text + end else token Text end @@ -432,6 +399,8 @@ def self.aggr_commands rule %r/./, Comment::Multiline end + # The comment macro is used the following way: + # `comment("Some comments")` state :comment_macro do rule %r/"\s*\)\s*`/, Comment::Preproc, :pop! rule %r/\\./, Comment::Single From da1c45d21ada0de9b27e822e251dd80f73b1ca42 Mon Sep 17 00:00:00 2001 From: Sylver Date: Wed, 7 Jun 2023 13:14:05 +0200 Subject: [PATCH 4/7] Bugfixes and improvements Fixed some issues, now covering some commands which were missing and now better highlighting some special operators, args and functions --- lib/rouge/lexers/spl.rb | 126 +++++++++++++++++++++++++++++++++++----- spec/visual/samples/spl | 2 +- 2 files changed, 111 insertions(+), 17 deletions(-) diff --git a/lib/rouge/lexers/spl.rb b/lib/rouge/lexers/spl.rb index a6d05cf26a..6f0ce16680 100644 --- a/lib/rouge/lexers/spl.rb +++ b/lib/rouge/lexers/spl.rb @@ -92,6 +92,8 @@ def self.command_arguments "metadata" => ['index','splunk_server','splunk_server_group','datatype','type'], "metasearch" => ['savedsearch','savedsplunk','field','eventtypetag','hosttag'], "meventcollect" => ['index','split','spool','prefix_field','host','source','sourcetype'], + "mpreview" => ['filter','splunk_server','splunk_server_group','earliest','latest','chunk_size','target_per_timeseries'], + "msearch" => ['filter','splunk_server','splunk_server_group','earliest','latest','chunk_size','target_per_timeseries'], "mstats" => ['prestats','append','backfill','update_period','span','savedsearch','savedsplunk','field'], "multikv" => ['conf','copyattrs','forceheader','multitable','noheader','rmorig','fields','filter'], "multisearch" => [''], @@ -104,7 +106,7 @@ def self.command_arguments "outputtelemetry" => ['input','type','component','support','anonymous','license','optinrequired'], "outputtext" => ['usexml'], "overlap" => [''], - "pivot" => [''], + "pivot" => ['start','end','max','size'], "predict" => ['correlate','future_timespan','holdback','period','suppress','algorithm','upper','lower'], "rangemap" => ['default','field'], "rare" => ['showcount','showperc','limit','countfield','percentfield','useother','otherstr'], @@ -114,6 +116,7 @@ def self.command_arguments "reltime" => [''], "rename" => [''], "replace" => [''], + "require" => [''], "rest" => ['count','splunk_server','splunk_server_group','timeout'], "return" => [''], "reverse" => [''], @@ -153,6 +156,7 @@ def self.command_arguments "union" => ['extendtimerange','maxtime','maxout','timeout'], "uniq" => [''], "untable" => [''], + "walklex" => ['type','prefix','pattern','splunk_server','splunk_server_group','index'], "where" => [''], "x11" => ['mult','add'], "xmlkv" => ['maxinputs'], @@ -173,20 +177,22 @@ def self.command_operators "eval" => ['and','or','xor','not','like'], "eventstats" => ['by','as'], "fieldformat" => ['and','or','xor','not','like'], - "geostats" => ['as'], + "from" => ['datamodel','lookup','savedsearch'], + "geostats" => ['as','by'], "head" => ['and','or','xor','not','like'], "inputcsv" => ['where'], "inputlookup" => ['where'], "join" => ['where'], "lookup" => ['outputnew','output','as'], "metasearch" => ['in'], - "mstats" => ['as'], + "mstats" => ['as','where','by','groupby'], + "pivot" => ['splitrow','splitcol','filter','limit','rowsummary','colsummary','showother','numcols','sort','range','period','truelabel','falselabel','as','by','top'], "predict" => ['as'], "rare" => ['by'], "redistribute" => ['by'], "replace" => ['with','in'], "rename" => ['as'], - "search" => ['by','where','over','and','or','xor','not','term','in','case'], + "search" => ['by','where','over','and','or','xor','not','in','term','case'], "set" => ['union','diff','intersect'], "sichart" => ['by','where','over','and','or','xor','not','as'], "sirare" => ['by'], @@ -194,12 +200,12 @@ def self.command_operators "sitimechart" => ['like','not','and','or','xor','where','like','by','as'], "sitop" => ['by'], "sort" => ['auto','str','ip','num','desc','d'], - "stats" => ['by','as'], - "stremstats" => ['like','not','and','or','xor','where','like','by','as'], + "stats" => ['by','as','and','or','xor','not','in','like'], + "streamstats" => ['like','not','and','or','xor','where','like','by','as'], "timechart" => ['like','not','and','or','xor','where','by','as'], "top" => ['by'], "trendline" => ['as'], - "tstats" => ['like','not','and','or','xor','where','by','in','groupby','as'], + "tstats" => ['like','not','and','or','xor','where','by','in','groupby','as','from','prefix'], "where" => ['like','not','and','or','xor','like'], "x11" => ['as'] }; @@ -222,7 +228,42 @@ def self.aggr_functions # Commands which support aggregation functions (and eval functions consequently through the "eval()" function) def self.aggr_commands - @aggr_commands = ['chart','eventstats','geostats','mstats','sichart','sistats','sitimechart','stats','streamstats','timechart','tstats'] + @aggr_commands = ['chart','eventstats','geostats','mstats','sichart','sistats','sitimechart','stats','streamstats','timechart','tstats','pivot'] + end + + # Available convertion functions (for the convert command) + def self.convert_functions + @convert_functions = ['auto', 'dur2sec', 'mstime', 'memk', 'none', 'num', 'rmunit', 'rmcomma', 'ctime', 'mktime'] + end + + # Commands which support aggregation functions (and eval functions consequently through the "eval()" function) + def self.convert_commands + @convert_commands = ['convert'] + end + + # Available advanced filter functions + def self.filter_functions + @filter_functions = { + "pivot" => ['is', 'contains', 'in', 'isNot', 'doesNotContain', 'startsWith', 'endsWith', 'isNull', 'isNotNull'], + "search" => ['term','case'], + "tstats" => ['prefix'] + }; + end + + # Some commands expect functions which name can vary in the shape "prefixXX", prefix being in a set of fixed values and XX a integer in a range of numbers + def self.dyn_functions + @dyn_functions = { + "predict" => {"upper" => [0,100], "lower" => [0,100]}, + "trendline" => {"sma" => [2,10000], "ema" => [2,10000], "wma" => [2,10000]}, + "x11" => {"add" => [5,1000], "mult" => [5,1000]} + }; + end + + # Some commands expect arguments which name can vary in the shape "prefixXX", prefix being in a set of fixed values and XX a integer in a range of numbers + def self.dyn_arguments + @dyn_arguments = { + "predict" => {"upper" => [0,100], "lower" => [0,100]} + }; end # Stack of commands being ran (usually only 1 but it can be more if can of subsearches) @@ -234,6 +275,9 @@ def self.aggr_commands state :query do rule %r/\|/, Text, :command_start + rule %r/```/, Comment::Multiline, :multiline_comments + rule %r/`\s*comment\s*\(\s*"/, Comment::Preproc, :comment_macro + rule %r/(`)(\s*\w+)([^`]*)(`)/, Comment::Preproc # By default, we assume it is an implict search command rule %r/(?=.)/ do |m| command_stack.push "search" @@ -252,6 +296,7 @@ def self.aggr_commands token Punctuation pop! end + rule %r/\s+/m, Text rule %r/\|/, Text, :command rule %r/\w+(?=[ \t]*)(?=\=)/ do |m| # We can find filters or arguments already @@ -299,6 +344,9 @@ def self.aggr_commands # Other commands not being implicit, we will here only handle the initial part "| command_name" and then jump into arguments if any state :command_start do rule %r/\s+/m, Text + rule %r/```/, Comment::Multiline, :multiline_comments + rule %r/`\s*comment\s*\(\s*"/, Comment::Preproc, :comment_macro + rule %r/(`)(\s*\w+)([^`]*)(`)/, Comment::Preproc # Highlighting only known Splunk commands rule %r/\w+/m do |m| if self.class.command_arguments.key? m[0].downcase @@ -331,31 +379,73 @@ def self.aggr_commands rule %r/```/, Comment::Multiline, :multiline_comments rule %r/`\s*comment\s*\(\s*"/, Comment::Preproc, :comment_macro rule %r/(`)(\s*\w+)([^`]*)(`)/, Comment::Preproc + rule %r/\<\<(FIELD|MATCHSTR|MATCHSEG1|MATCHSEG2|MATCHSEG3|ITEM)\>\>/, Keyword::Pseudo rule %r/\s+/m, Text rule %r/0[xX][0-9a-fA-F]*/, Num::Hex rule %r/[$][+-]*\d*(\.\d*)?/, Num + # Time modifiers are neither numbers of usual words + rule %r/[+-]?\d+(secs|seconds|second|sec|s|minutes|minute|mins|min|m|hours|hour|hrs|hr|h|days|day|d|weeks|week|w|months|month|mon|quarters|quarter|qtrs|qtr|q|years|year|yrs|yr|y)(@(secs|seconds|second|sec|s|minutes|minute|mins|min|m|hours|hour|hrs|hr|h|days|day|d|weeks|week|w|months|month|mon|quarters|quarter|qtrs|qtr|q|years|year|yrs|yr|y))?/, Num::Other rule %r/((\d+(\.\d*)?)|(\.\d+))([eE][\-+]?\d+)?/, Num - rule %r/[!<>=,%\+\.\*\-\/]+/, Punctuation + rule %r/(true|false)/, Num + rule %r/[!<>=,%\+\.\*\-\/:]+/, Punctuation rule %r/[()]/, Punctuation # Command arguments, checking it is a known argument for the current command - rule %r/\w+(?=[ \t]*)(?=\=)/ do |m| + rule %r/[\w\-]+(?=[ \t]*)(?=\=)/ do |m| + isDyn=false + # Some arguments are named dynamically like "prefixXX=" where prefix is fixed and XX is an integer in a known range + if m[0].match(/^[^0-9]+[0-9]+$/) + m[0].match(/^([^0-9]+)([0-9]+)$/) do |r| + type = r[1].downcase + value = r[2].to_i + if ( self.class.dyn_arguments.key?(command_stack.last) && self.class.dyn_arguments[command_stack.last].key?(type) ) + if (value >= self.class.dyn_arguments[command_stack.last][type][0] && value <= self.class.dyn_arguments[command_stack.last][type][1]) + token Name::Function + isDyn=true + else + token Text + end + end + end + end + # Standard arguments if self.class.command_arguments.key? command_stack.last if self.class.command_arguments[command_stack.last].include? m[0].downcase token Keyword::Reserved - else + elsif !isDyn token Text end - else + elsif !isDyn token Text end end rule %r/\w+(?=[ \t]*)(?=\()/ do |m| + isDyn=false + # Some functions are named dynamically like "prefixXX()" where prefix is fixed and XX is an integer in a known range + if m[0].match(/^[^0-9]+[0-9]+$/) + m[0].match(/^([^0-9]+)([0-9]+)$/) do |r| + type = r[1].downcase + value = r[2].to_i + if ( self.class.dyn_functions.key?(command_stack.last) && self.class.dyn_functions[command_stack.last].key?(type) ) + if (value >= self.class.dyn_functions[command_stack.last][type][0] && value <= self.class.dyn_functions[command_stack.last][type][1]) + token Name::Function + isDyn=true + else + token Text + end + end + end + end + # Standard functions if ( self.class.eval_commands.include?(command_stack.last) && self.class.eval_functions.include?(m[0].downcase) ) token Name::Function # Aggregation functions can use eval functions through the "eval()" function elsif ( self.class.aggr_commands.include?(command_stack.last) && ( self.class.aggr_functions.include?(m[0].downcase) || self.class.eval_functions.include?(m[0].downcase)) ) token Name::Function - else + elsif ( self.class.convert_commands.include?(command_stack.last) && self.class.convert_functions.include?(m[0].downcase) ) + token Name::Function + elsif ( self.class.filter_functions.key?(command_stack.last) && self.class.filter_functions[command_stack.last].include?(m[0].downcase) ) + token Name::Function + elsif !isDyn token Text end end @@ -381,7 +471,7 @@ def self.aggr_commands pop! end # Some commands have specific operators available - rule %r/[^ \t"'\d!<>=,()\[\]]+/m do |m| + rule %r/[^\s\t"'`!<>=,()\[\]\|\+\/\*\.\-:]+/m do |m| if self.class.command_operators.key? command_stack.last if self.class.command_operators[command_stack.last].include? m[0].downcase token Operator::Word @@ -409,6 +499,8 @@ def self.aggr_commands # When found in a rex/regex command, a double string will be a regex state :double_string do + rule %r/\<\<(FIELD|MATCHSTR|MATCHSEG1|MATCHSEG2|MATCHSEG3|ITEM)\>\>/, Keyword::Pseudo + rule %r/\\>/, Keyword::Pseudo rule %r/\\./, Str::Single + rule %r/\ 2.5 debug='on' flag=0x2F00 +index=_internal AND sourcetype=splunkd component="Metrics" NOT code=1 avg > 2.5 debug='on' flag=0x2F00 TERM(test) [index=authentications user IN ("admin","root") | stats count by user | fields user ] | eval user = coalesce(user,src_user) test=1 | rex field=_raw "Reason:(?[^\]]+)\]" From 9e2312ca89baa3785f35790fff985d33596c498f Mon Sep 17 00:00:00 2001 From: Sylver Date: Tue, 13 Jun 2023 19:29:58 +0200 Subject: [PATCH 5/7] Minor syntax error in example Fixed a minor syntax error in the example, for good measure --- lib/rouge/demos/spl | 2 +- spec/visual/samples/spl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/rouge/demos/spl b/lib/rouge/demos/spl index 55de49644a..328a367cea 100644 --- a/lib/rouge/demos/spl +++ b/lib/rouge/demos/spl @@ -1,6 +1,6 @@ index=_internal AND sourcetype=splunkd component="Metrics" NOT code=1 avg > 2.5 debug='on' flag=0x2F00 [index=authentications user IN ("admin","root") | stats count by user | fields user ] -| eval user = coalesce(user,src_user) test=1 +| eval user = coalesce(user,src_user), test=1 | rex field=_raw "Reason:(?[^\]]+)\]" | stats count AS metric_count dc(user) as dc_user dc(eval(if(status=404, clientip, NULL()))) BY host | join host type=left diff --git a/spec/visual/samples/spl b/spec/visual/samples/spl index eb99eb037b..8a2b80d42f 100644 --- a/spec/visual/samples/spl +++ b/spec/visual/samples/spl @@ -1,6 +1,6 @@ index=_internal AND sourcetype=splunkd component="Metrics" NOT code=1 avg > 2.5 debug='on' flag=0x2F00 TERM(test) [index=authentications user IN ("admin","root") | stats count by user | fields user ] -| eval user = coalesce(user,src_user) test=1 +| eval user = coalesce(user,src_user), test=1 | rex field=_raw "Reason:(?[^\]]+)\]" | stats count AS metric_count dc(user) as dc_user dc(eval(if(status=404, clientip, NULL()))) BY host | join host type=left From 8975e4c0605a0a3712e35586adbe6d5f53b5a649 Mon Sep 17 00:00:00 2001 From: Sylver Date: Wed, 14 Jun 2023 10:03:59 +0200 Subject: [PATCH 6/7] Small regex fix One of the regex for multiline comments did not have the multiline option and was raising an error on \n --- lib/rouge/lexers/spl.rb | 2 +- spec/lexers/spl_spec.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/rouge/lexers/spl.rb b/lib/rouge/lexers/spl.rb index 6f0ce16680..de390049a7 100644 --- a/lib/rouge/lexers/spl.rb +++ b/lib/rouge/lexers/spl.rb @@ -486,7 +486,7 @@ def self.dyn_arguments state :multiline_comments do rule %r(```), Comment::Multiline, :pop! - rule %r/./, Comment::Multiline + rule %r/./m, Comment::Multiline end # The comment macro is used the following way: diff --git a/spec/lexers/spl_spec.rb b/spec/lexers/spl_spec.rb index 2f68dc4b99..74f8d57db7 100644 --- a/spec/lexers/spl_spec.rb +++ b/spec/lexers/spl_spec.rb @@ -11,4 +11,4 @@ assert_guess :filename => 'foo.spl' end end -end \ No newline at end of file +end From ac0e7fbee76df1db386476b34d3bbbe076f8d10f Mon Sep 17 00:00:00 2001 From: Sylver Date: Wed, 5 Jul 2023 08:45:05 +0200 Subject: [PATCH 7/7] Added missing EOF newline Added missing EOF newline to comply to the linelint rule --- lib/rouge/demos/spl | 2 +- lib/rouge/lexers/spl.rb | 2 +- spec/visual/samples/spl | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/rouge/demos/spl b/lib/rouge/demos/spl index 328a367cea..af4bee07db 100644 --- a/lib/rouge/demos/spl +++ b/lib/rouge/demos/spl @@ -11,4 +11,4 @@ on multiple lines``` | table time host metric_count audit_count | lookup assets-list host OUTPUT asset_type | `ctime(time)` -`comment("END OF QUERY")` \ No newline at end of file +`comment("END OF QUERY")` diff --git a/lib/rouge/lexers/spl.rb b/lib/rouge/lexers/spl.rb index de390049a7..e50881a033 100644 --- a/lib/rouge/lexers/spl.rb +++ b/lib/rouge/lexers/spl.rb @@ -527,4 +527,4 @@ def self.dyn_arguments end end end -end \ No newline at end of file +end diff --git a/spec/visual/samples/spl b/spec/visual/samples/spl index 8a2b80d42f..fd24f8c6bb 100644 --- a/spec/visual/samples/spl +++ b/spec/visual/samples/spl @@ -11,4 +11,4 @@ on multiple lines``` | table time host metric_count audit_count | lookup assets-list host OUTPUT asset_type | `ctime(time)` -`comment("END OF QUERY")` \ No newline at end of file +`comment("END OF QUERY")`