Skip to content

Commit 2b49ce7

Browse files
committed
imp: Add optional seed parameter for deterministic results in MVA and Float Vector UDFs
1 parent 9dc1640 commit 2b49ce7

File tree

1 file changed

+217
-11
lines changed

1 file changed

+217
-11
lines changed

src/udfexample.c

Lines changed: 217 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
// CREATE FUNCTION makemva RETURNS MULTI SONAME 'udfexample.so';
1010
// CREATE FUNCTION makemva64 RETURNS MULTI64 SONAME 'udfexample.so';
1111
// CREATE FUNCTION makefloatvec RETURNS FLOAT_VECTOR SONAME 'udfexample.so';
12+
// Note: makemva, makemva64, and makefloatvec accept an optional integer seed parameter
13+
// for deterministic results: makemva(seed), makemva64(seed), makefloatvec(seed)
1214
//
1315
// Windows
1416
// cl /MTd /LD udfexample.c
@@ -18,6 +20,8 @@
1820
// CREATE FUNCTION makemva RETURNS MULTI SONAME 'udfexample.dll';
1921
// CREATE FUNCTION makemva64 RETURNS MULTI64 SONAME 'udfexample.dll';
2022
// CREATE FUNCTION makefloatvec RETURNS FLOAT_VECTOR SONAME 'udfexample.dll';
23+
// Note: makemva, makemva64, and makefloatvec accept an optional integer seed parameter
24+
// for deterministic results: makemva(seed), makemva64(seed), makefloatvec(seed)
2125
//
2226

2327
#include "sphinxudf.h"
@@ -314,22 +318,86 @@ DLLEXPORT void hideemail_deinit ( void * userdata )
314318

315319
//////////////////////////////////////////////////////////////////////////
316320
// MVA return example - returns a random MVA with 1-20 random values
321+
// Optional seed parameter for deterministic results
322+
323+
// Simple Linear Congruential Generator for deterministic random numbers
324+
static uint32_t lcg_next ( uint32_t * state )
325+
{
326+
*state = *state * 1103515245U + 12345U;
327+
return *state;
328+
}
329+
330+
typedef struct { uint32_t seed; uint32_t counter; } SeedData_t;
317331

318332
DLLEXPORT int makemva_init ( SPH_UDF_INIT * init, SPH_UDF_ARGS * args, char * error_message )
319333
{
320334
UdfLog ( "Called makemva_init" );
321-
if ( args->arg_count!=0 )
335+
if ( args->arg_count > 1 )
322336
{
323-
snprintf ( error_message, SPH_UDF_ERROR_LEN, "MAKEMVA() takes no arguments" );
337+
snprintf ( error_message, SPH_UDF_ERROR_LEN, "MAKEMVA() takes 0 or 1 argument (optional seed)" );
338+
return 1;
339+
}
340+
341+
// Allocate storage for seed value and call counter
342+
SeedData_t * pData = (SeedData_t *) malloc ( sizeof(SeedData_t) );
343+
if ( !pData )
344+
{
345+
snprintf ( error_message, SPH_UDF_ERROR_LEN, "malloc() failed" );
324346
return 1;
325347
}
348+
349+
// Initialize: seed=0 means not set, counter starts at 0
350+
pData->seed = 0;
351+
pData->counter = 0;
352+
353+
init->func_data = pData;
326354
return 0;
327355
}
328356

357+
DLLEXPORT void makemva_deinit ( SPH_UDF_INIT * init )
358+
{
359+
if ( init->func_data )
360+
{
361+
free ( init->func_data );
362+
init->func_data = NULL;
363+
}
364+
}
365+
329366
DLLEXPORT ByteBlob_t makemva ( SPH_UDF_INIT * init, SPH_UDF_ARGS * args, char * error_flag )
330367
{
368+
SeedData_t * pData = (SeedData_t *)init->func_data;
369+
uint32_t state;
370+
371+
// Check if seed was explicitly provided (arg_values not available during _init)
372+
if ( args->arg_count == 1 && args->arg_values && args->arg_values[0] )
373+
{
374+
// Seed was provided - use it deterministically for all rows
375+
if ( pData->seed == 0 )
376+
{
377+
// First call with seed - initialize it
378+
if ( args->arg_types[0] == SPH_UDF_TYPE_UINT32 )
379+
pData->seed = *(uint32_t*)args->arg_values[0];
380+
else if ( args->arg_types[0] == SPH_UDF_TYPE_INT64 )
381+
pData->seed = (uint32_t)(*(sphinx_int64_t*)args->arg_values[0]);
382+
else
383+
pData->seed = (uint32_t)time(NULL);
384+
// Ensure seed is non-zero (0 is our "not initialized" marker)
385+
if ( pData->seed == 0 )
386+
pData->seed = 1;
387+
}
388+
// Use the stored seed for deterministic results
389+
state = pData->seed;
390+
}
391+
else
392+
{
393+
// No seed provided - generate new random seed for each row
394+
// Use time + counter + pointer address for variation
395+
pData->counter++;
396+
state = (uint32_t)time(NULL) ^ (uint32_t)((uintptr_t)init->func_data) ^ pData->counter;
397+
}
398+
331399
// Generate random array length from 1 to 20
332-
int count = (rand() % 20) + 1;
400+
int count = (lcg_next(&state) % 20) + 1;
333401

334402
// Allocate memory for random number of uint32 values using the provided malloc function
335403
uint32_t * pValues = (uint32_t *) args->fn_malloc ( count * sizeof(uint32_t) );
@@ -341,29 +409,98 @@ DLLEXPORT ByteBlob_t makemva ( SPH_UDF_INIT * init, SPH_UDF_ARGS * args, char *
341409

342410
// Fill with random values
343411
for ( int i = 0; i < count; i++ )
344-
pValues[i] = (uint32_t)(rand() % 1000) + 1; // Random values from 1 to 1000
412+
pValues[i] = (lcg_next(&state) % 1000) + 1; // Random values from 1 to 1000
413+
414+
// Sort values (required for any() function to work with binary search)
415+
// Simple bubble sort for small arrays (typically 1-20 elements)
416+
for ( int i = 0; i < count - 1; i++ )
417+
{
418+
for ( int j = 0; j < count - i - 1; j++ )
419+
{
420+
if ( pValues[j] > pValues[j+1] )
421+
{
422+
uint32_t temp = pValues[j];
423+
pValues[j] = pValues[j+1];
424+
pValues[j+1] = temp;
425+
}
426+
}
427+
}
345428

346429
// Return as ByteBlob_t
347430
return (ByteBlob_t){(const BYTE*)pValues, count * sizeof(uint32_t)};
348431
}
349432

350433
//////////////////////////////////////////////////////////////////////////
351434
// MVA64 return example - returns a random MVA64 with 1-20 random values
435+
// Optional seed parameter for deterministic results
352436

353437
DLLEXPORT int makemva64_init ( SPH_UDF_INIT * init, SPH_UDF_ARGS * args, char * error_message )
354438
{
355-
if ( args->arg_count!=0 )
439+
if ( args->arg_count > 1 )
356440
{
357-
snprintf ( error_message, SPH_UDF_ERROR_LEN, "MAKEMVA64() takes no arguments" );
441+
snprintf ( error_message, SPH_UDF_ERROR_LEN, "MAKEMVA64() takes 0 or 1 argument (optional seed)" );
358442
return 1;
359443
}
444+
445+
// Allocate storage for seed value and call counter
446+
SeedData_t * pData = (SeedData_t *) malloc ( sizeof(SeedData_t) );
447+
if ( !pData )
448+
{
449+
snprintf ( error_message, SPH_UDF_ERROR_LEN, "malloc() failed" );
450+
return 1;
451+
}
452+
453+
// Initialize: seed=0 means not set, counter starts at 0
454+
pData->seed = 0;
455+
pData->counter = 0;
456+
457+
init->func_data = pData;
360458
return 0;
361459
}
362460

461+
DLLEXPORT void makemva64_deinit ( SPH_UDF_INIT * init )
462+
{
463+
if ( init->func_data )
464+
{
465+
free ( init->func_data );
466+
init->func_data = NULL;
467+
}
468+
}
469+
363470
DLLEXPORT ByteBlob_t makemva64 ( SPH_UDF_INIT * init, SPH_UDF_ARGS * args, char * error_flag )
364471
{
472+
SeedData_t * pData = (SeedData_t *)init->func_data;
473+
uint32_t state;
474+
475+
// Check if seed was explicitly provided (arg_values not available during _init)
476+
if ( args->arg_count == 1 && args->arg_values && args->arg_values[0] )
477+
{
478+
// Seed was provided - use it deterministically for all rows
479+
if ( pData->seed == 0 )
480+
{
481+
// First call with seed - initialize it
482+
if ( args->arg_types[0] == SPH_UDF_TYPE_UINT32 )
483+
pData->seed = *(uint32_t*)args->arg_values[0];
484+
else if ( args->arg_types[0] == SPH_UDF_TYPE_INT64 )
485+
pData->seed = (uint32_t)(*(sphinx_int64_t*)args->arg_values[0]);
486+
else
487+
pData->seed = (uint32_t)time(NULL);
488+
// Ensure seed is non-zero (0 is our "not initialized" marker)
489+
if ( pData->seed == 0 )
490+
pData->seed = 1;
491+
}
492+
// Use the stored seed for deterministic results
493+
state = pData->seed;
494+
}
495+
else
496+
{
497+
// No seed provided - generate new random seed for each row
498+
pData->counter++;
499+
state = (uint32_t)time(NULL) ^ (uint32_t)((uintptr_t)init->func_data) ^ pData->counter;
500+
}
501+
365502
// Generate random array length from 1 to 20
366-
int count = (rand() % 20) + 1;
503+
int count = (lcg_next(&state) % 20) + 1;
367504

368505
// Allocate memory for random number of int64 values using the provided malloc function
369506
int64_t * pValues = (int64_t *) args->fn_malloc ( count * sizeof(int64_t) );
@@ -375,27 +512,96 @@ DLLEXPORT ByteBlob_t makemva64 ( SPH_UDF_INIT * init, SPH_UDF_ARGS * args, char
375512

376513
// Fill with random values
377514
for ( int i = 0; i < count; i++ )
378-
pValues[i] = (int64_t)(rand() % 10000) + 1; // Random values from 1 to 10000
515+
pValues[i] = (int64_t)(lcg_next(&state) % 10000) + 1; // Random values from 1 to 10000
516+
517+
// Sort values (required for any() function to work with binary search)
518+
// Simple bubble sort for small arrays (typically 1-20 elements)
519+
for ( int i = 0; i < count - 1; i++ )
520+
{
521+
for ( int j = 0; j < count - i - 1; j++ )
522+
{
523+
if ( pValues[j] > pValues[j+1] )
524+
{
525+
int64_t temp = pValues[j];
526+
pValues[j] = pValues[j+1];
527+
pValues[j+1] = temp;
528+
}
529+
}
530+
}
379531

380532
// Return as ByteBlob_t
381533
return (ByteBlob_t){(const BYTE*)pValues, count * sizeof(int64_t)};
382534
}
383535

384536
//////////////////////////////////////////////////////////////////////////
385537
// Float vector return example - returns a random float vector with 128 values
538+
// Optional seed parameter for deterministic results
386539

387540
DLLEXPORT int makefloatvec_init ( SPH_UDF_INIT * init, SPH_UDF_ARGS * args, char * error_message )
388541
{
389-
if ( args->arg_count!=0 )
542+
if ( args->arg_count > 1 )
543+
{
544+
snprintf ( error_message, SPH_UDF_ERROR_LEN, "MAKEFLOATVEC() takes 0 or 1 argument (optional seed)" );
545+
return 1;
546+
}
547+
548+
// Allocate storage for seed value and call counter
549+
SeedData_t * pData = (SeedData_t *) malloc ( sizeof(SeedData_t) );
550+
if ( !pData )
390551
{
391-
snprintf ( error_message, SPH_UDF_ERROR_LEN, "MAKEFLOATVEC() takes no arguments" );
552+
snprintf ( error_message, SPH_UDF_ERROR_LEN, "malloc() failed" );
392553
return 1;
393554
}
555+
556+
// Initialize: seed=0 means not set, counter starts at 0
557+
pData->seed = 0;
558+
pData->counter = 0;
559+
560+
init->func_data = pData;
394561
return 0;
395562
}
396563

564+
DLLEXPORT void makefloatvec_deinit ( SPH_UDF_INIT * init )
565+
{
566+
if ( init->func_data )
567+
{
568+
free ( init->func_data );
569+
init->func_data = NULL;
570+
}
571+
}
572+
397573
DLLEXPORT ByteBlob_t makefloatvec ( SPH_UDF_INIT * init, SPH_UDF_ARGS * args, char * error_flag )
398574
{
575+
SeedData_t * pData = (SeedData_t *)init->func_data;
576+
uint32_t state;
577+
578+
// Check if seed was explicitly provided (arg_values not available during _init)
579+
if ( args->arg_count == 1 && args->arg_values && args->arg_values[0] )
580+
{
581+
// Seed was provided - use it deterministically for all rows
582+
if ( pData->seed == 0 )
583+
{
584+
// First call with seed - initialize it
585+
if ( args->arg_types[0] == SPH_UDF_TYPE_UINT32 )
586+
pData->seed = *(uint32_t*)args->arg_values[0];
587+
else if ( args->arg_types[0] == SPH_UDF_TYPE_INT64 )
588+
pData->seed = (uint32_t)(*(sphinx_int64_t*)args->arg_values[0]);
589+
else
590+
pData->seed = (uint32_t)time(NULL);
591+
// Ensure seed is non-zero (0 is our "not initialized" marker)
592+
if ( pData->seed == 0 )
593+
pData->seed = 1;
594+
}
595+
// Use the stored seed for deterministic results
596+
state = pData->seed;
597+
}
598+
else
599+
{
600+
// No seed provided - generate new random seed for each row
601+
pData->counter++;
602+
state = (uint32_t)time(NULL) ^ (uint32_t)((uintptr_t)init->func_data) ^ pData->counter;
603+
}
604+
399605
// Constant array length of 128 for float vector
400606
const int count = 128;
401607

@@ -409,7 +615,7 @@ DLLEXPORT ByteBlob_t makefloatvec ( SPH_UDF_INIT * init, SPH_UDF_ARGS * args, ch
409615

410616
// Fill with random values
411617
for ( int i = 0; i < count; i++ )
412-
pValues[i] = (float)(rand() % 1000) / 10.0f; // Random values from 0.0 to 99.9
618+
pValues[i] = (float)(lcg_next(&state) % 1000) / 10.0f; // Random values from 0.0 to 99.9
413619

414620
// Return as ByteBlob_t
415621
return (ByteBlob_t){(const BYTE*)pValues, count * sizeof(float)};

0 commit comments

Comments
 (0)