Skip to content

Commit

Permalink
[8.x] [Rule Migration] Improve rule translation prompts and processes (
Browse files Browse the repository at this point in the history
…elastic#204021) (elastic#204109)

# Backport

This will backport the following commits from `main` to `8.x`:
- [[Rule Migration] Improve rule translation prompts and processes
(elastic#204021)](elastic#204021)

<!--- Backport version: 9.4.3 -->

### Questions ?
Please refer to the [Backport tool
documentation](https://github.com/sqren/backport)

<!--BACKPORT [{"author":{"name":"Marius
Iversen","email":"[email protected]"},"sourceCommit":{"committedDate":"2024-12-12T18:32:04Z","message":"[Rule
Migration] Improve rule translation prompts and processes
(elastic#204021)\n\n## Summary\n\nThis PR performs multiple changes that all
focuses on improving the\nquality of the results returned when we
translate rules that do not\nmatch with a prebuilt rule and both
with/without related integrations.\n\nChanges include:\n\n- Add a
filter_index_patterns node, to always ensure `logs-*` is removed\nwith
our `[indexPattern:logs-*]` value, which is similar to how we\ndetect
missing lookups and macros.\n- Split `translate_rule` into another
`ecs_mapping` node, trying to\nensure translation focuses on changing
SPL to ESQL without any focus on\nactual field names, while the other
node focuses only on the ESQL query\nand changing field names.\n- The
summary now added in the comments have 1 for the translation and\none
for the ECS mapping.\n- Add default rule batch size `15` with PR
comment/question.\n- Ensure we only return one integration related
rather than an array for\nnow, to make ESQL more focused on one related
integration.\n- New prompt to filter out one or more integrations from
the returned\nRAG; similar to how its done for rules RAG results
already.","sha":"0a7262d0fc213148fd7e80d3dc65f79c7eeae244","branchLabelMapping":{"^v9.0.0$":"main","^v8.18.0$":"8.x","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["release_note:skip","v9.0.0","Team:
SecuritySolution","backport:prev-minor","v8.18.0"],"title":"[Rule
Migration] Improve rule translation prompts and
processes","number":204021,"url":"https://github.com/elastic/kibana/pull/204021","mergeCommit":{"message":"[Rule
Migration] Improve rule translation prompts and processes
(elastic#204021)\n\n## Summary\n\nThis PR performs multiple changes that all
focuses on improving the\nquality of the results returned when we
translate rules that do not\nmatch with a prebuilt rule and both
with/without related integrations.\n\nChanges include:\n\n- Add a
filter_index_patterns node, to always ensure `logs-*` is removed\nwith
our `[indexPattern:logs-*]` value, which is similar to how we\ndetect
missing lookups and macros.\n- Split `translate_rule` into another
`ecs_mapping` node, trying to\nensure translation focuses on changing
SPL to ESQL without any focus on\nactual field names, while the other
node focuses only on the ESQL query\nand changing field names.\n- The
summary now added in the comments have 1 for the translation and\none
for the ECS mapping.\n- Add default rule batch size `15` with PR
comment/question.\n- Ensure we only return one integration related
rather than an array for\nnow, to make ESQL more focused on one related
integration.\n- New prompt to filter out one or more integrations from
the returned\nRAG; similar to how its done for rules RAG results
already.","sha":"0a7262d0fc213148fd7e80d3dc65f79c7eeae244"}},"sourceBranch":"main","suggestedTargetBranches":["8.x"],"targetPullRequestStates":[{"branch":"main","label":"v9.0.0","branchLabelMappingKey":"^v9.0.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/204021","number":204021,"mergeCommit":{"message":"[Rule
Migration] Improve rule translation prompts and processes
(elastic#204021)\n\n## Summary\n\nThis PR performs multiple changes that all
focuses on improving the\nquality of the results returned when we
translate rules that do not\nmatch with a prebuilt rule and both
with/without related integrations.\n\nChanges include:\n\n- Add a
filter_index_patterns node, to always ensure `logs-*` is removed\nwith
our `[indexPattern:logs-*]` value, which is similar to how we\ndetect
missing lookups and macros.\n- Split `translate_rule` into another
`ecs_mapping` node, trying to\nensure translation focuses on changing
SPL to ESQL without any focus on\nactual field names, while the other
node focuses only on the ESQL query\nand changing field names.\n- The
summary now added in the comments have 1 for the translation and\none
for the ECS mapping.\n- Add default rule batch size `15` with PR
comment/question.\n- Ensure we only return one integration related
rather than an array for\nnow, to make ESQL more focused on one related
integration.\n- New prompt to filter out one or more integrations from
the returned\nRAG; similar to how its done for rules RAG results
already.","sha":"0a7262d0fc213148fd7e80d3dc65f79c7eeae244"}},{"branch":"8.x","label":"v8.18.0","branchLabelMappingKey":"^v8.18.0$","isSourceBranch":false,"state":"NOT_CREATED"}]}]
BACKPORT-->

Co-authored-by: Marius Iversen <[email protected]>
  • Loading branch information
kibanamachine and P1llus authored Dec 12, 2024
1 parent ef852fc commit b9fce2f
Show file tree
Hide file tree
Showing 23 changed files with 560 additions and 89 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,9 @@ export const ElasticRule = z.object({
*/
prebuilt_rule_id: NonEmptyString.optional(),
/**
* The Elastic integration IDs related to the rule.
* The Elastic integration ID found to be most relevant to the splunk rule.
*/
integration_ids: z.array(z.string()).optional(),
integration_id: z.string().optional(),
/**
* The Elastic rule id installed as a result.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,9 @@ components:
prebuilt_rule_id:
description: The Elastic prebuilt rule id matched.
$ref: '../../../common/api/model/primitives.schema.yaml#/components/schemas/NonEmptyString'
integration_ids:
type: array
items:
type: string
description: The Elastic integration IDs related to the rule.
integration_id:
type: string
description: The Elastic integration ID found to be most relevant to the splunk rule.
id:
description: The Elastic rule id installed as a result.
$ref: '../../../common/api/model/primitives.schema.yaml#/components/schemas/NonEmptyString'
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ export const ruleMigrationsFieldMap: FieldMap<SchemaFieldMapKeys<Omit<RuleMigrat
'original_rule.annotations.mitre_attack': { type: 'keyword', array: true, required: false },
elastic_rule: { type: 'nested', required: false },
'elastic_rule.title': { type: 'text', required: true, fields: { keyword: { type: 'keyword' } } },
'elastic_rule.integration_ids': { type: 'keyword', array: true, required: false },
'elastic_rule.integration_id': { type: 'keyword', required: false },
'elastic_rule.query': { type: 'text', required: true },
'elastic_rule.query_language': { type: 'keyword', required: true },
'elastic_rule.description': { type: 'text', required: false },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
*/

import { END, START, StateGraph } from '@langchain/langgraph';
import { SiemMigrationRuleTranslationResult } from '../../../../../../common/siem_migrations/constants';
import { getCreateSemanticQueryNode } from './nodes/create_semantic_query';
import { getMatchPrebuiltRuleNode } from './nodes/match_prebuilt_rule';
import { getProcessQueryNode } from './nodes/process_query';
Expand All @@ -23,9 +24,11 @@ export function getRuleMigrationAgent({
}: MigrateRuleGraphParams) {
const matchPrebuiltRuleNode = getMatchPrebuiltRuleNode({
model,
logger,
ruleMigrationsRetriever,
});
const translationSubGraph = getTranslateRuleGraph({
model,
inferenceClient,
ruleMigrationsRetriever,
connectorId,
Expand All @@ -41,23 +44,26 @@ export function getRuleMigrationAgent({
.addNode('matchPrebuiltRule', matchPrebuiltRuleNode)
.addNode('translationSubGraph', translationSubGraph)
// Edges
.addEdge(START, 'processQuery')
.addEdge('processQuery', 'createSemanticQuery')
.addEdge(START, 'createSemanticQuery')
.addEdge('createSemanticQuery', 'matchPrebuiltRule')
.addConditionalEdges('matchPrebuiltRule', matchedPrebuiltRuleConditional, [
'translationSubGraph',
END,
])
.addConditionalEdges('matchPrebuiltRule', matchedPrebuiltRuleConditional, ['processQuery', END])
.addEdge('processQuery', 'translationSubGraph')
.addEdge('translationSubGraph', END);

const graph = siemMigrationAgentGraph.compile();
graph.name = 'Rule Migration Graph'; // Customizes the name displayed in LangSmith
return graph;
}

/*
* If the original splunk rule has no prebuilt rule match, we will start processing the query, unless it is related to input/outputlookups.
*/
const matchedPrebuiltRuleConditional = (state: MigrateRuleState) => {
if (state.elastic_rule?.prebuilt_rule_id) {
return END;
}
return 'translationSubGraph';
if (state.translation_result === SiemMigrationRuleTranslationResult.UNTRANSLATABLE) {
return END;
}
return 'processQuery';
};
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ Go through the relevant title, description and data sources from the above query
- Include keywords that are relevant to the use case.
- Add related keywords you detected from the above query, like one or more vendor, product, cloud provider, OS platform etc.
- Always reply with a JSON object with the key "semantic_query" and the value as the semantic search query inside three backticks as shown in the below example.
- If the related query focuses on Endpoint datamodel, make sure that "endpoint", "security" keywords are included.
</guidelines>
<example_response>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
* 2.0.
*/

import type { Logger } from '@kbn/core/server';
import { JsonOutputParser } from '@langchain/core/output_parsers';
import { SiemMigrationRuleTranslationResult } from '../../../../../../../../common/siem_migrations/constants';
import type { RuleMigrationsRetriever } from '../../../retrievers';
Expand All @@ -14,16 +15,20 @@ import { MATCH_PREBUILT_RULE_PROMPT } from './prompts';

interface GetMatchPrebuiltRuleNodeParams {
model: ChatModel;
logger: Logger;
ruleMigrationsRetriever: RuleMigrationsRetriever;
}

interface GetMatchedRuleResponse {
match: string;
}

export const getMatchPrebuiltRuleNode =
({ model, ruleMigrationsRetriever }: GetMatchPrebuiltRuleNodeParams): GraphNode =>
async (state) => {
export const getMatchPrebuiltRuleNode = ({
model,
ruleMigrationsRetriever,
logger,
}: GetMatchPrebuiltRuleNodeParams): GraphNode => {
return async (state) => {
const query = state.semantic_query;
const techniqueIds = state.original_rule.annotations?.mitre_attack || [];
const prebuiltRules = await ruleMigrationsRetriever.prebuiltRules.getRules(
Expand All @@ -32,7 +37,7 @@ export const getMatchPrebuiltRuleNode =
);

const outputParser = new JsonOutputParser();
const matchPrebuiltRule = MATCH_PREBUILT_RULE_PROMPT.pipe(model).pipe(outputParser);
const mostRelevantRule = MATCH_PREBUILT_RULE_PROMPT.pipe(model).pipe(outputParser);

const elasticSecurityRules = prebuiltRules.map((rule) => {
return {
Expand All @@ -41,9 +46,17 @@ export const getMatchPrebuiltRuleNode =
};
});

const response = (await matchPrebuiltRule.invoke({
const splunkRule = {
title: state.original_rule.title,
description: state.original_rule.description,
};

/*
* Takes the most relevant rule from the array of rule(s) returned by the semantic query, returns either the most relevant or none.
*/
const response = (await mostRelevantRule.invoke({
rules: JSON.stringify(elasticSecurityRules, null, 2),
ruleTitle: state.original_rule.title,
splunk_rule: JSON.stringify(splunkRule, null, 2),
})) as GetMatchedRuleResponse;

if (response.match) {
Expand All @@ -60,5 +73,16 @@ export const getMatchPrebuiltRuleNode =
};
}
}
const lookupTypes = ['inputlookup', 'outputlookup'];
if (
state.original_rule?.query &&
lookupTypes.some((type) => state.original_rule.query.includes(type))
) {
logger.debug(
`Rule: ${state.original_rule?.title} did not match any prebuilt rule, but contains inputlookup, dropping`
);
return { translation_result: SiemMigrationRuleTranslationResult.UNTRANSLATABLE };
}
return {};
};
};
Original file line number Diff line number Diff line change
Expand Up @@ -23,21 +23,22 @@ Here are some context for you to reference for your task, read it carefully as y
[
'human',
`See the below description of the relevant splunk rule and try to match it with any of the elastic detection rules with similar names.
<splunk_rule_name>
{ruleTitle}
</splunk_rule_name>
<splunk_rule>
{splunk_rule}
</splunk_rule>
<guidelines>
- Always reply with a JSON object with the key "match" and the value being the most relevant matched elastic detection rule name. Do not reply with anything else.
- Only reply with exact matches, if you are unsure or do not find a very confident match, always reply with an empty string value in the match key, do not guess or reply with anything else.
- If there is one Elastic rule in the list that covers the same threat, set the name of the matching rule as a value of the match key. Do not reply with anything else.
- If there are multiple rules in the list that cover the same threat, answer with the most specific of them, for example: "Linux User Account Creation" is more specific than "User Account Creation".
- If there is one Elastic rule in the list that covers the same usecase, set the name of the matching rule as a value of the match key. Do not reply with anything else.
- If there are multiple rules in the list that cover the same usecase, answer with the most specific of them, for example: "Linux User Account Creation" is more specific than "User Account Creation".
</guidelines>
<example_response>
U: <splunk_rule_name>
Linux Auditd Add User Account Type
</splunk_rule_name>
U: <splunk_rule>
Title: Linux Auditd Add User Account Type
Description: The following analytic detects the suspicious add user account type.
</splunk_rule>
A: Please find the match JSON object below:
\`\`\`json
{{"match": "Linux User Account Creation"}}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import type {
OriginalRule,
RuleMigration,
} from '../../../../../../common/siem_migrations/model/rule_migration.gen';
import type { Integration } from '../../types';

export const migrateRuleState = Annotation.Root({
messages: Annotation<BaseMessage[]>({
Expand All @@ -32,10 +31,6 @@ export const migrateRuleState = Annotation.Root({
reducer: (current, value) => value ?? current,
default: () => '',
}),
integrations: Annotation<Integration[]>({
reducer: (current, value) => value ?? current,
default: () => [],
}),
translation_result: Annotation<SiemMigrationRuleTranslationResult>(),
comments: Annotation<RuleMigration['comments']>({
reducer: (current, value) => (value ? (current ?? []).concat(value) : current),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import { END, START, StateGraph } from '@langchain/langgraph';
import { isEmpty } from 'lodash/fp';
import { SiemMigrationRuleTranslationResult } from '../../../../../../../../common/siem_migrations/constants';
import { getEcsMappingNode } from './nodes/ecs_mapping';
import { getFilterIndexPatternsNode } from './nodes/filter_index_patterns';
import { getFixQueryErrorsNode } from './nodes/fix_query_errors';
import { getRetrieveIntegrationsNode } from './nodes/retrieve_integrations';
import { getTranslateRuleNode } from './nodes/translate_rule';
Expand All @@ -19,6 +21,7 @@ import type { TranslateRuleGraphParams, TranslateRuleState } from './types';
const MAX_VALIDATION_ITERATIONS = 3;

export function getTranslateRuleGraph({
model,
inferenceClient,
connectorId,
ruleMigrationsRetriever,
Expand All @@ -31,20 +34,30 @@ export function getTranslateRuleGraph({
});
const validationNode = getValidationNode({ logger });
const fixQueryErrorsNode = getFixQueryErrorsNode({ inferenceClient, connectorId, logger });
const retrieveIntegrationsNode = getRetrieveIntegrationsNode({ ruleMigrationsRetriever });
const retrieveIntegrationsNode = getRetrieveIntegrationsNode({ model, ruleMigrationsRetriever });
const ecsMappingNode = getEcsMappingNode({ inferenceClient, connectorId, logger });
const filterIndexPatternsNode = getFilterIndexPatternsNode({ logger });

const translateRuleGraph = new StateGraph(translateRuleState)
// Nodes
.addNode('translateRule', translateRuleNode)
.addNode('validation', validationNode)
.addNode('fixQueryErrors', fixQueryErrorsNode)
.addNode('retrieveIntegrations', retrieveIntegrationsNode)
.addNode('ecsMapping', ecsMappingNode)
.addNode('filterIndexPatterns', filterIndexPatternsNode)
// Edges
.addEdge(START, 'retrieveIntegrations')
.addEdge('retrieveIntegrations', 'translateRule')
.addEdge('translateRule', 'validation')
.addEdge('fixQueryErrors', 'validation')
.addConditionalEdges('validation', validationRouter, ['fixQueryErrors', END]);
.addEdge('ecsMapping', 'validation')
.addConditionalEdges('validation', validationRouter, [
'fixQueryErrors',
'ecsMapping',
'filterIndexPatterns',
])
.addEdge('filterIndexPatterns', END);

const graph = translateRuleGraph.compile();
graph.name = 'Translate Rule Graph';
Expand All @@ -59,6 +72,9 @@ const validationRouter = (state: TranslateRuleState) => {
if (!isEmpty(state.validation_errors?.esql_errors)) {
return 'fixQueryErrors';
}
if (!state.translation_finalized) {
return 'ecsMapping';
}
}
return END;
return 'filterIndexPatterns';
};
Loading

0 comments on commit b9fce2f

Please sign in to comment.