Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions phpstan-baseline.neon
Original file line number Diff line number Diff line change
Expand Up @@ -18480,18 +18480,6 @@ parameters:
count: 1
path: src/lib/Search/Common/EventSubscriber/UserEventSubscriber.php

-
message: '#^Method Ibexa\\Core\\Search\\Common\\FieldNameGenerator\:\:__construct\(\) has parameter \$fieldNameMapping with no value type specified in iterable type array\.$#'
identifier: missingType.iterableValue
count: 1
path: src/lib/Search/Common/FieldNameGenerator.php

-
message: '#^Property Ibexa\\Core\\Search\\Common\\FieldNameGenerator\:\:\$fieldNameMapping type has no value type specified in iterable type array\.$#'
identifier: missingType.iterableValue
count: 1
path: src/lib/Search/Common/FieldNameGenerator.php

-
message: '#^Method Ibexa\\Core\\Search\\Common\\FieldNameResolver\:\:getAggregationFieldName\(\) should return string\|null but returns false\.$#'
identifier: return.type
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ public function addSemanticConfig(NodeBuilder $nodeBuilder): void
->end()
->scalarNode('default_embedding_model')
->info('Default embedding model identifier')
->defaultValue('text-embedding-ada-002')
->end();
}

Expand All @@ -57,6 +56,8 @@ public function addSemanticConfig(NodeBuilder $nodeBuilder): void
public function preMap(array $config, ContextualizerInterface $contextualizer): void
{
$contextualizer->mapConfigArray('embedding_models', $config);
// Keep the default value in default settings only. Defining a scalar default on the semantic node would
// materialize it into every siteaccess/group scope and block inheritance from `system.default` overrides.
$contextualizer->mapSetting('default_embedding_model', $config);
}

Expand Down
4 changes: 4 additions & 0 deletions src/lib/Resources/settings/search_engines/common.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ parameters:
ibexa_dense_vector_ada002: 'ada002_dv'
ibexa_dense_vector_3small: '3small_dv'
ibexa_dense_vector_3large: '3large_dv'
# Allows generic mapping of embedding field types (`ibexa_dense_vector_<suffix>`) without per-model entries.
ibexa.search.common.field_name_generator.fallback_prefixes:
- 'ibexa_dense_vector_'

services:
# Note: services tagged with 'ibexa.field_type.indexable'
Expand All @@ -37,6 +40,7 @@ services:
class: Ibexa\Core\Search\Common\FieldNameGenerator
arguments:
- '%ibexa.search.common.field_name_generator.map%'
- '%ibexa.search.common.field_name_generator.fallback_prefixes%'

Ibexa\Core\Search\Common\FieldNameResolver:
class: Ibexa\Core\Search\Common\FieldNameResolver
Expand Down
53 changes: 29 additions & 24 deletions src/lib/Search/Common/FieldNameGenerator.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,32 +15,19 @@
class FieldNameGenerator
{
/**
* Simple mapping for our internal field types, consisting of an array
* of SPI Search FieldType identifier as key and search backend field type
* string as value.
* `$fieldNameMapping` maps internal search field type identifiers to backend
* suffixes (e.g. `ibexa_string` => `s`).
*
* We implement this mapping, because those dynamic fields are common to
* search backend configurations.
* `$fallbackPrefixes` defines type prefixes for generic fallback normalization
* when no explicit mapping exists (e.g. `ibexa_dense_vector_<suffix>`).
*
* @see \Ibexa\Contracts\Core\Search\FieldType
*
* Code example:
*
* <code>
* array(
* "ez_integer" => "i",
* "ez_string" => "s",
* ...
* )
* </code>
*
* @var array
* @param array<string, string> $fieldNameMapping
* @param string[] $fallbackPrefixes
*/
protected $fieldNameMapping;

public function __construct(array $fieldNameMapping)
{
$this->fieldNameMapping = $fieldNameMapping;
public function __construct(
protected array $fieldNameMapping,
private readonly array $fallbackPrefixes = []
) {
}

/**
Expand Down Expand Up @@ -76,8 +63,26 @@ public function getTypedName(string $name, FieldType $type): string
return $name;
}

$typeName = $this->fieldNameMapping[$type->getType()] ?? $type->getType();
$typeIdentifier = $type->getType();
$typeName = $this->fieldNameMapping[$typeIdentifier] ?? $this->normalizeUsingFallbackPrefixes($typeIdentifier);

return $name . '_' . $typeName;
}

/**
* Generic fallback for field type families that encode backend suffix in the type identifier.
*
* Example:
* - `ibexa_dense_vector_gemini_embedding_001_1536_dv` => `gemini_embedding_001_1536_dv`
*/
private function normalizeUsingFallbackPrefixes(string $typeIdentifier): string
{
foreach ($this->fallbackPrefixes as $prefix) {
if (str_starts_with($typeIdentifier, $prefix)) {
return substr($typeIdentifier, strlen($prefix));
}
}

return $typeIdentifier;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,34 @@ public function testDefaultEmbeddingsSettings(): void
$this->assertConfigResolverParameterValue('default_embedding_model', 'text-embedding-ada-002', 'ibexa_demo_site');
}

public function testDefaultEmbeddingModelSetInDefaultScopeIsNotMaterializedToOtherScopes(): void
{
$this->load([
'system' => [
'default' => [
'default_embedding_model' => 'gemini_embedding_001_1536',
],
],
]);

self::assertFalse(
$this->container->hasParameter('ibexa.site_access.config.ibexa_demo_group.default_embedding_model')
);
self::assertFalse(
$this->container->hasParameter('ibexa.site_access.config.ibexa_demo_site.default_embedding_model')
);

self::assertSame(
'gemini_embedding_001_1536',
$this->container->getParameter('ibexa.site_access.config.default.default_embedding_model')
);
$this->assertConfigResolverParameterValue(
'default_embedding_model',
'gemini_embedding_001_1536',
'ibexa_demo_site'
);
}

/**
* @param array<mixed> $config
* @param array<mixed> $expected
Expand Down
57 changes: 57 additions & 0 deletions tests/lib/Search/Common/FieldNameGeneratorTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
<?php

/**
* @copyright Copyright (C) Ibexa AS. All rights reserved.
* @license For full copyright and license information view LICENSE file distributed with this source code.
*/
declare(strict_types=1);

namespace Ibexa\Tests\Core\Search\Common;

use Ibexa\Contracts\Core\Search\FieldType;
use Ibexa\Core\Search\Common\FieldNameGenerator;
use PHPUnit\Framework\TestCase;

final class FieldNameGeneratorTest extends TestCase
{
public function testGetTypedNameUsesConfiguredMapping(): void
{
$generator = new FieldNameGenerator([
'ibexa_string' => 's',
]);

$fieldType = $this->createMock(FieldType::class);
$fieldType
->method('getType')
->willReturn('ibexa_string');

self::assertSame('title_s', $generator->getTypedName('title', $fieldType));
}

public function testGetTypedNameNormalizesEmbeddingFieldTypeWithoutExplicitMapping(): void
{
$generator = new FieldNameGenerator([], ['ibexa_dense_vector_']);

$fieldType = $this->createMock(FieldType::class);
$fieldType
->method('getType')
->willReturn('ibexa_dense_vector_gemini_embedding_001_1536_dv');

self::assertSame(
'taxonomy_embeddings_gemini_embedding_001_1536_dv',
$generator->getTypedName('taxonomy_embeddings', $fieldType)
);
}

public function testGetTypedNameReturnsOriginalTypeWhenNoFallbackPrefixMatches(): void
{
$generator = new FieldNameGenerator([], ['ibexa_dense_vector_']);

$fieldType = $this->createMock(FieldType::class);
$fieldType
->method('getType')
->willReturn('custom_type');

self::assertSame('foo_custom_type', $generator->getTypedName('foo', $fieldType));
}
}
Loading