[8.x] [Rule Migration] Improve rule translation prompts and processes (…

…elastic#204021) (elastic#204109) # Backport This will backport the following commits from `main` to `8.x`: - [[Rule Migration] Improve rule translation prompts and processes (elastic#204021)](elastic#204021)  ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport)  Co-authored-by: Marius Iversen <[email protected]>
tsullivan · Dec 12, 2024 · b9fce2f · b9fce2f
1 parent ef852fc
commit b9fce2f
Show file tree

Hide file tree

Showing 23 changed files with 560 additions and 89 deletions.
diff --git a/x-pack/plugins/security_solution/common/siem_migrations/model/rule_migration.gen.ts b/x-pack/plugins/security_solution/common/siem_migrations/model/rule_migration.gen.ts
@@ -103,9 +103,9 @@ export const ElasticRule = z.object({
    */
   prebuilt_rule_id: NonEmptyString.optional(),
   /**
-   * The Elastic integration IDs related to the rule.
+   * The Elastic integration ID found to be most relevant to the splunk rule.
    */
-  integration_ids: z.array(z.string()).optional(),
+  integration_id: z.string().optional(),
   /**
    * The Elastic rule id installed as a result.
    */

diff --git a/x-pack/plugins/security_solution/common/siem_migrations/model/rule_migration.schema.yaml b/x-pack/plugins/security_solution/common/siem_migrations/model/rule_migration.schema.yaml
@@ -83,11 +83,9 @@ components:
         prebuilt_rule_id:
           description: The Elastic prebuilt rule id matched.
           $ref: '../../../common/api/model/primitives.schema.yaml#/components/schemas/NonEmptyString'
-        integration_ids:
-          type: array
-          items:
-            type: string
-          description: The Elastic integration IDs related to the rule.
+        integration_id:
+          type: string
+          description: The Elastic integration ID found to be most relevant to the splunk rule.
         id:
           description: The Elastic rule id installed as a result.
           $ref: '../../../common/api/model/primitives.schema.yaml#/components/schemas/NonEmptyString'

diff --git a/x-pack/plugins/security_solution/docs/siem_migration/img/agent_graph.png b/x-pack/plugins/security_solution/docs/siem_migration/img/agent_graph.png
diff --git a/...ins/security_solution/server/lib/siem_migrations/rules/data/rule_migrations_field_maps.ts b/...ins/security_solution/server/lib/siem_migrations/rules/data/rule_migrations_field_maps.ts
@@ -27,7 +27,7 @@ export const ruleMigrationsFieldMap: FieldMap<SchemaFieldMapKeys<Omit<RuleMigrat
   'original_rule.annotations.mitre_attack': { type: 'keyword', array: true, required: false },
   elastic_rule: { type: 'nested', required: false },
   'elastic_rule.title': { type: 'text', required: true, fields: { keyword: { type: 'keyword' } } },
-  'elastic_rule.integration_ids': { type: 'keyword', array: true, required: false },
+  'elastic_rule.integration_id': { type: 'keyword', required: false },
   'elastic_rule.query': { type: 'text', required: true },
   'elastic_rule.query_language': { type: 'keyword', required: true },
   'elastic_rule.description': { type: 'text', required: false },

diff --git a/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/graph.ts b/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/graph.ts
@@ -6,6 +6,7 @@
  */
 
 import { END, START, StateGraph } from '@langchain/langgraph';
+import { SiemMigrationRuleTranslationResult } from '../../../../../../common/siem_migrations/constants';
 import { getCreateSemanticQueryNode } from './nodes/create_semantic_query';
 import { getMatchPrebuiltRuleNode } from './nodes/match_prebuilt_rule';
 import { getProcessQueryNode } from './nodes/process_query';
@@ -23,9 +24,11 @@ export function getRuleMigrationAgent({
 }: MigrateRuleGraphParams) {
   const matchPrebuiltRuleNode = getMatchPrebuiltRuleNode({
     model,
+    logger,
     ruleMigrationsRetriever,
   });
   const translationSubGraph = getTranslateRuleGraph({
+    model,
     inferenceClient,
     ruleMigrationsRetriever,
     connectorId,
@@ -41,23 +44,26 @@ export function getRuleMigrationAgent({
     .addNode('matchPrebuiltRule', matchPrebuiltRuleNode)
     .addNode('translationSubGraph', translationSubGraph)
     // Edges
-    .addEdge(START, 'processQuery')
-    .addEdge('processQuery', 'createSemanticQuery')
+    .addEdge(START, 'createSemanticQuery')
     .addEdge('createSemanticQuery', 'matchPrebuiltRule')
-    .addConditionalEdges('matchPrebuiltRule', matchedPrebuiltRuleConditional, [
-      'translationSubGraph',
-      END,
-    ])
+    .addConditionalEdges('matchPrebuiltRule', matchedPrebuiltRuleConditional, ['processQuery', END])
+    .addEdge('processQuery', 'translationSubGraph')
     .addEdge('translationSubGraph', END);
 
   const graph = siemMigrationAgentGraph.compile();
   graph.name = 'Rule Migration Graph'; // Customizes the name displayed in LangSmith
   return graph;
 }
 
+/*
+ * If the original splunk rule has no prebuilt rule match, we will start processing the query, unless it is related to input/outputlookups.
+ */
 const matchedPrebuiltRuleConditional = (state: MigrateRuleState) => {
   if (state.elastic_rule?.prebuilt_rule_id) {
     return END;
   }
-  return 'translationSubGraph';
+  if (state.translation_result === SiemMigrationRuleTranslationResult.UNTRANSLATABLE) {
+    return END;
+  }
+  return 'processQuery';
 };
diff --git a/...lution/server/lib/siem_migrations/rules/task/agent/nodes/create_semantic_query/prompts.ts b/...lution/server/lib/siem_migrations/rules/task/agent/nodes/create_semantic_query/prompts.ts
@@ -29,6 +29,7 @@ Go through the relevant title, description and data sources from the above query
 - Include keywords that are relevant to the use case.
 - Add related keywords you detected from the above query, like one or more vendor, product, cloud provider, OS platform etc.
 - Always reply with a JSON object with the key "semantic_query" and the value as the semantic search query inside three backticks as shown in the below example.
+- If the related query focuses on Endpoint datamodel, make sure that "endpoint", "security" keywords are included.
 </guidelines>
 
 <example_response>

diff --git a/...ver/lib/siem_migrations/rules/task/agent/nodes/match_prebuilt_rule/match_prebuilt_rule.ts b/...ver/lib/siem_migrations/rules/task/agent/nodes/match_prebuilt_rule/match_prebuilt_rule.ts
@@ -5,6 +5,7 @@
  * 2.0.
  */
 
+import type { Logger } from '@kbn/core/server';
 import { JsonOutputParser } from '@langchain/core/output_parsers';
 import { SiemMigrationRuleTranslationResult } from '../../../../../../../../common/siem_migrations/constants';
 import type { RuleMigrationsRetriever } from '../../../retrievers';
@@ -14,16 +15,20 @@ import { MATCH_PREBUILT_RULE_PROMPT } from './prompts';
 
 interface GetMatchPrebuiltRuleNodeParams {
   model: ChatModel;
+  logger: Logger;
   ruleMigrationsRetriever: RuleMigrationsRetriever;
 }
 
 interface GetMatchedRuleResponse {
   match: string;
 }
 
-export const getMatchPrebuiltRuleNode =
-  ({ model, ruleMigrationsRetriever }: GetMatchPrebuiltRuleNodeParams): GraphNode =>
-  async (state) => {
+export const getMatchPrebuiltRuleNode = ({
+  model,
+  ruleMigrationsRetriever,
+  logger,
+}: GetMatchPrebuiltRuleNodeParams): GraphNode => {
+  return async (state) => {
     const query = state.semantic_query;
     const techniqueIds = state.original_rule.annotations?.mitre_attack || [];
     const prebuiltRules = await ruleMigrationsRetriever.prebuiltRules.getRules(
@@ -32,7 +37,7 @@ export const getMatchPrebuiltRuleNode =
     );
 
     const outputParser = new JsonOutputParser();
-    const matchPrebuiltRule = MATCH_PREBUILT_RULE_PROMPT.pipe(model).pipe(outputParser);
+    const mostRelevantRule = MATCH_PREBUILT_RULE_PROMPT.pipe(model).pipe(outputParser);
 
     const elasticSecurityRules = prebuiltRules.map((rule) => {
       return {
@@ -41,9 +46,17 @@ export const getMatchPrebuiltRuleNode =
       };
     });
 
-    const response = (await matchPrebuiltRule.invoke({
+    const splunkRule = {
+      title: state.original_rule.title,
+      description: state.original_rule.description,
+    };
+
+    /*
+     * Takes the most relevant rule from the array of rule(s) returned by the semantic query, returns either the most relevant or none.
+     */
+    const response = (await mostRelevantRule.invoke({
       rules: JSON.stringify(elasticSecurityRules, null, 2),
-      ruleTitle: state.original_rule.title,
+      splunk_rule: JSON.stringify(splunkRule, null, 2),
     })) as GetMatchedRuleResponse;
 
     if (response.match) {
@@ -60,5 +73,16 @@ export const getMatchPrebuiltRuleNode =
         };
       }
     }
+    const lookupTypes = ['inputlookup', 'outputlookup'];
+    if (
+      state.original_rule?.query &&
+      lookupTypes.some((type) => state.original_rule.query.includes(type))
+    ) {
+      logger.debug(
+        `Rule: ${state.original_rule?.title} did not match any prebuilt rule, but contains inputlookup, dropping`
+      );
+      return { translation_result: SiemMigrationRuleTranslationResult.UNTRANSLATABLE };
+    }
     return {};
   };
+};
diff --git a/...solution/server/lib/siem_migrations/rules/task/agent/nodes/match_prebuilt_rule/prompts.ts b/...solution/server/lib/siem_migrations/rules/task/agent/nodes/match_prebuilt_rule/prompts.ts
@@ -23,21 +23,22 @@ Here are some context for you to reference for your task, read it carefully as y
   [
     'human',
     `See the below description of the relevant splunk rule and try to match it with any of the elastic detection rules with similar names.     
-<splunk_rule_name>
-{ruleTitle}
-</splunk_rule_name>
+<splunk_rule>
+{splunk_rule}
+</splunk_rule>
 
 <guidelines>
 - Always reply with a JSON object with the key "match" and the value being the most relevant matched elastic detection rule name. Do not reply with anything else.
 - Only reply with exact matches, if you are unsure or do not find a very confident match, always reply with an empty string value in the match key, do not guess or reply with anything else.
-- If there is one Elastic rule in the list that covers the same threat, set the name of the matching rule as a value of the match key. Do not reply with anything else.
-- If there are multiple rules in the list that cover the same threat, answer with the most specific of them, for example: "Linux User Account Creation" is more specific than "User Account Creation".
+- If there is one Elastic rule in the list that covers the same usecase, set the name of the matching rule as a value of the match key. Do not reply with anything else.
+- If there are multiple rules in the list that cover the same usecase, answer with the most specific of them, for example: "Linux User Account Creation" is more specific than "User Account Creation".
 </guidelines>
 
 <example_response>
-U: <splunk_rule_name>
-Linux Auditd Add User Account Type
-</splunk_rule_name>
+U: <splunk_rule>
+Title: Linux Auditd Add User Account Type
+Description: The following analytic detects the suspicious add user account type.
+</splunk_rule>
 A: Please find the match JSON object below:
 \`\`\`json
 {{"match": "Linux User Account Creation"}}

diff --git a/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/state.ts b/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/state.ts
@@ -13,7 +13,6 @@ import type {
   OriginalRule,
   RuleMigration,
 } from '../../../../../../common/siem_migrations/model/rule_migration.gen';
-import type { Integration } from '../../types';
 
 export const migrateRuleState = Annotation.Root({
   messages: Annotation<BaseMessage[]>({
@@ -32,10 +31,6 @@ export const migrateRuleState = Annotation.Root({
     reducer: (current, value) => value ?? current,
     default: () => '',
   }),
-  integrations: Annotation<Integration[]>({
-    reducer: (current, value) => value ?? current,
-    default: () => [],
-  }),
   translation_result: Annotation<SiemMigrationRuleTranslationResult>(),
   comments: Annotation<RuleMigration['comments']>({
     reducer: (current, value) => (value ? (current ?? []).concat(value) : current),

diff --git a/...y_solution/server/lib/siem_migrations/rules/task/agent/sub_graphs/translate_rule/graph.ts b/...y_solution/server/lib/siem_migrations/rules/task/agent/sub_graphs/translate_rule/graph.ts
@@ -8,6 +8,8 @@
 import { END, START, StateGraph } from '@langchain/langgraph';
 import { isEmpty } from 'lodash/fp';
 import { SiemMigrationRuleTranslationResult } from '../../../../../../../../common/siem_migrations/constants';
+import { getEcsMappingNode } from './nodes/ecs_mapping';
+import { getFilterIndexPatternsNode } from './nodes/filter_index_patterns';
 import { getFixQueryErrorsNode } from './nodes/fix_query_errors';
 import { getRetrieveIntegrationsNode } from './nodes/retrieve_integrations';
 import { getTranslateRuleNode } from './nodes/translate_rule';
@@ -19,6 +21,7 @@ import type { TranslateRuleGraphParams, TranslateRuleState } from './types';
 const MAX_VALIDATION_ITERATIONS = 3;
 
 export function getTranslateRuleGraph({
+  model,
   inferenceClient,
   connectorId,
   ruleMigrationsRetriever,
@@ -31,20 +34,30 @@ export function getTranslateRuleGraph({
   });
   const validationNode = getValidationNode({ logger });
   const fixQueryErrorsNode = getFixQueryErrorsNode({ inferenceClient, connectorId, logger });
-  const retrieveIntegrationsNode = getRetrieveIntegrationsNode({ ruleMigrationsRetriever });
+  const retrieveIntegrationsNode = getRetrieveIntegrationsNode({ model, ruleMigrationsRetriever });
+  const ecsMappingNode = getEcsMappingNode({ inferenceClient, connectorId, logger });
+  const filterIndexPatternsNode = getFilterIndexPatternsNode({ logger });
 
   const translateRuleGraph = new StateGraph(translateRuleState)
     // Nodes
     .addNode('translateRule', translateRuleNode)
     .addNode('validation', validationNode)
     .addNode('fixQueryErrors', fixQueryErrorsNode)
     .addNode('retrieveIntegrations', retrieveIntegrationsNode)
+    .addNode('ecsMapping', ecsMappingNode)
+    .addNode('filterIndexPatterns', filterIndexPatternsNode)
     // Edges
     .addEdge(START, 'retrieveIntegrations')
     .addEdge('retrieveIntegrations', 'translateRule')
     .addEdge('translateRule', 'validation')
     .addEdge('fixQueryErrors', 'validation')
-    .addConditionalEdges('validation', validationRouter, ['fixQueryErrors', END]);
+    .addEdge('ecsMapping', 'validation')
+    .addConditionalEdges('validation', validationRouter, [
+      'fixQueryErrors',
+      'ecsMapping',
+      'filterIndexPatterns',
+    ])
+    .addEdge('filterIndexPatterns', END);
 
   const graph = translateRuleGraph.compile();
   graph.name = 'Translate Rule Graph';
@@ -59,6 +72,9 @@ const validationRouter = (state: TranslateRuleState) => {
     if (!isEmpty(state.validation_errors?.esql_errors)) {
       return 'fixQueryErrors';
     }
+    if (!state.translation_finalized) {
+      return 'ecsMapping';
+    }
   }
-  return END;
+  return 'filterIndexPatterns';
 };