Skip to content

Commit 05ea21c

Browse files
committed
WIP: Prototype of wrapping NRRD parsers in C++ locale
Used Claude to implement a prototype/test of using a string stream with a set locale instead of sscanf.
1 parent 1096547 commit 05ea21c

5 files changed

Lines changed: 288 additions & 23 deletions

File tree

Modules/IO/NRRD/test/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,3 +356,8 @@ itk_add_test(
356356
itkNrrdMetaDataTest
357357
${ITK_TEST_OUTPUT_DIR}
358358
)
359+
360+
# GTest tests
361+
set(ITKIONRRDGTests itkNrrdLocaleGTest.cxx)
362+
363+
creategoogletestdriver(ITKIONRRD "${ITKIONRRD-Test_LIBRARIES}" "${ITKIONRRDGTests}")
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
/*=========================================================================
2+
*
3+
* Copyright NumFOCUS
4+
*
5+
* Licensed under the Apache License, Version 2.0 (the "License");
6+
* you may not use this file except in compliance with the License.
7+
* You may obtain a copy of the License at
8+
*
9+
* https://www.apache.org/licenses/LICENSE-2.0.txt
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*
17+
*=========================================================================*/
18+
19+
#include <locale.h>
20+
#include "itkImage.h"
21+
#include "itkImageFileReader.h"
22+
#include "itkImageFileWriter.h"
23+
#include "itkNrrdImageIO.h"
24+
#include "itksys/SystemTools.hxx"
25+
26+
#include "itkGTest.h"
27+
28+
#define _STRING(s) #s
29+
#define TOSTRING(s) std::string(_STRING(s))
30+
31+
namespace
32+
{
33+
34+
constexpr unsigned int Dimension = 3;
35+
using PixelType = float;
36+
using ImageType = itk::Image<PixelType, Dimension>;
37+
38+
class NrrdLocaleTest : public ::testing::Test
39+
{
40+
public:
41+
void
42+
SetUp() override
43+
{
44+
itksys::SystemTools::ChangeDirectory(TOSTRING(ITK_TEST_OUTPUT_DIR));
45+
}
46+
47+
ImageType::Pointer
48+
CreateTestImage()
49+
{
50+
auto image = ImageType::New();
51+
52+
ImageType::SizeType size;
53+
size.Fill(16);
54+
55+
ImageType::IndexType start;
56+
start.Fill(0);
57+
58+
ImageType::RegionType region(start, size);
59+
image->SetRegions(region);
60+
image->Allocate();
61+
image->FillBuffer(42.0f);
62+
63+
// Set spacing with fractional values that would be misparsed
64+
// in locales using comma as decimal separator
65+
ImageType::SpacingType spacing;
66+
spacing[0] = 3.5; // Would be parsed correctly even with truncation
67+
spacing[1] = 0.878906; // Would become 0.0 in de_DE locale, causing error
68+
spacing[2] = 2.2; // Would be parsed incorrectly
69+
70+
image->SetSpacing(spacing);
71+
72+
// Set origin with fractional values
73+
ImageType::PointType origin;
74+
origin[0] = 1.5;
75+
origin[1] = 2.75;
76+
origin[2] = 0.5;
77+
image->SetOrigin(origin);
78+
79+
return image;
80+
}
81+
};
82+
83+
TEST_F(NrrdLocaleTest, ReadWriteWithCLocale)
84+
{
85+
auto image = CreateTestImage();
86+
const auto expectedSpacing = image->GetSpacing();
87+
const auto expectedOrigin = image->GetOrigin();
88+
89+
const std::string filename = "locale_test.nrrd";
90+
91+
// Write the image with C locale
92+
setlocale(LC_NUMERIC, "C");
93+
94+
auto writer = itk::ImageFileWriter<ImageType>::New();
95+
writer->SetFileName(filename);
96+
writer->SetInput(image);
97+
writer->SetImageIO(itk::NrrdImageIO::New());
98+
EXPECT_NO_THROW(writer->Update());
99+
100+
// Read with C locale
101+
auto reader = itk::ImageFileReader<ImageType>::New();
102+
reader->SetFileName(filename);
103+
reader->SetImageIO(itk::NrrdImageIO::New());
104+
EXPECT_NO_THROW(reader->Update());
105+
106+
const auto readImage = reader->GetOutput();
107+
const auto readSpacing = readImage->GetSpacing();
108+
const auto readOrigin = readImage->GetOrigin();
109+
110+
// Verify spacing
111+
for (unsigned int i = 0; i < Dimension; ++i)
112+
{
113+
EXPECT_NEAR(readSpacing[i], expectedSpacing[i], 1e-6) << "Spacing mismatch in C locale at index " << i;
114+
}
115+
116+
// Verify origin
117+
for (unsigned int i = 0; i < Dimension; ++i)
118+
{
119+
EXPECT_NEAR(readOrigin[i], expectedOrigin[i], 1e-6) << "Origin mismatch in C locale at index " << i;
120+
}
121+
}
122+
123+
TEST_F(NrrdLocaleTest, ReadWithGermanLocale)
124+
{
125+
auto image = CreateTestImage();
126+
const auto expectedSpacing = image->GetSpacing();
127+
const auto expectedOrigin = image->GetOrigin();
128+
129+
const std::string filename = "locale_test_de.nrrd";
130+
131+
// Write the image with C locale
132+
setlocale(LC_NUMERIC, "C");
133+
134+
auto writer = itk::ImageFileWriter<ImageType>::New();
135+
writer->SetFileName(filename);
136+
writer->SetInput(image);
137+
writer->SetImageIO(itk::NrrdImageIO::New());
138+
EXPECT_NO_THROW(writer->Update());
139+
140+
// Try to set German locale; skip test if not available
141+
if (setlocale(LC_NUMERIC, "de_DE.UTF-8") == nullptr)
142+
{
143+
GTEST_SKIP() << "de_DE.UTF-8 locale not available, skipping locale-specific test";
144+
}
145+
146+
// Read with de_DE locale - this is the critical test
147+
auto reader = itk::ImageFileReader<ImageType>::New();
148+
reader->SetFileName(filename);
149+
reader->SetImageIO(itk::NrrdImageIO::New());
150+
EXPECT_NO_THROW(reader->Update());
151+
152+
const auto readImage = reader->GetOutput();
153+
const auto readSpacing = readImage->GetSpacing();
154+
const auto readOrigin = readImage->GetOrigin();
155+
156+
// Verify spacing - this would fail without locale-independent parsing
157+
for (unsigned int i = 0; i < Dimension; ++i)
158+
{
159+
EXPECT_NEAR(readSpacing[i], expectedSpacing[i], 1e-6)
160+
<< "Spacing mismatch in de_DE locale at index " << i
161+
<< ". This indicates locale-dependent parsing is still occurring!";
162+
}
163+
164+
// Verify origin
165+
for (unsigned int i = 0; i < Dimension; ++i)
166+
{
167+
EXPECT_NEAR(readOrigin[i], expectedOrigin[i], 1e-6)
168+
<< "Origin mismatch in de_DE locale at index " << i
169+
<< ". This indicates locale-dependent parsing is still occurring!";
170+
}
171+
172+
// Restore C locale
173+
setlocale(LC_NUMERIC, "C");
174+
}
175+
176+
} // namespace

Modules/ThirdParty/NrrdIO/src/NrrdIO/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ INCLUDE_REGULAR_EXPRESSION("^.*.h$")
88
# package. See http://teem.sourceforge.net for more information.
99
#
1010

11-
SET(nrrdio_SRCS 754.c mop.c array.c parseAir.c dio.c sane.c endianAir.c
11+
SET(nrrdio_SRCS 754.c mop.c array.c parseAir.cxx dio.c sane.c endianAir.c
1212
string.c enum.c miscAir.c biffbiff.c biffmsg.c accessors.c defaultsNrrd.c
1313
enumsNrrd.c arraysNrrd.c methodsNrrd.c reorder.c axis.c simple.c comment.c
1414
keyvalue.c endianNrrd.c parseNrrd.c gzio.c read.c write.c format.c

Modules/ThirdParty/NrrdIO/src/NrrdIO/parseAir.c renamed to Modules/ThirdParty/NrrdIO/src/NrrdIO/parseAir.cxx

Lines changed: 59 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,14 @@
2323
3. This notice may not be removed or altered from any source distribution.
2424
*/
2525

26+
#include <locale>
27+
#include <sstream>
28+
#include <string>
29+
#include <stdexcept>
30+
31+
extern "C" {
2632
#include "NrrdIO.h"
33+
}
2734

2835
static const char *
2936
_airBoolStr[] = {
@@ -105,10 +112,29 @@ airSingleSscanf(const char *str, const char *fmt, void *ptr) {
105112
val = AIR_POS_INF;
106113
}
107114
else {
108-
/* nothing special matched; pass it off to sscanf() */
109-
ret = sscanf(str, fmt, ptr);
115+
/* nothing special matched; use C++ stream with classic locale */
116+
std::istringstream iss(str);
117+
iss.imbue(std::locale::classic());
118+
119+
if (!strncmp(fmt, "%l", 2)) {
120+
/* we were given a double pointer */
121+
double dval;
122+
if (!(iss >> dval)) {
123+
free(tmp);
124+
return 0;
125+
}
126+
*((double *)(ptr)) = dval;
127+
} else {
128+
/* we were given a float pointer */
129+
float fval;
130+
if (!(iss >> fval)) {
131+
free(tmp);
132+
return 0;
133+
}
134+
*((float *)(ptr)) = fval;
135+
}
110136
free(tmp);
111-
return ret;
137+
return 1;
112138
}
113139
/* else we matched "nan", "-inf", or "inf", and set val accordingly */
114140
if (!strncmp(fmt, "%l", 2)) {
@@ -138,8 +164,36 @@ airSingleSscanf(const char *str, const char *fmt, void *ptr) {
138164
*((size_t *)(ptr)) = tsz;
139165
return 1;
140166
} else {
141-
/* not a float, double, or size_t, let sscanf handle it */
142-
return sscanf(str, fmt, ptr);
167+
/* not a float, double, or size_t, use C++ stream with classic locale */
168+
std::istringstream iss(str);
169+
iss.imbue(std::locale::classic());
170+
171+
if (!strcmp(fmt, "%d")) {
172+
int ival;
173+
if (!(iss >> ival)) return 0;
174+
*((int *)(ptr)) = ival;
175+
return 1;
176+
} else if (!strcmp(fmt, "%u")) {
177+
unsigned int uival;
178+
if (!(iss >> uival)) return 0;
179+
*((unsigned int *)(ptr)) = uival;
180+
return 1;
181+
} else if (!strcmp(fmt, "%ld")) {
182+
long lival;
183+
if (!(iss >> lival)) return 0;
184+
*((long *)(ptr)) = lival;
185+
return 1;
186+
} else if (!strcmp(fmt, "%lu")) {
187+
unsigned long ulval;
188+
if (!(iss >> ulval)) return 0;
189+
*((unsigned long *)(ptr)) = ulval;
190+
return 1;
191+
}
192+
/* fallback to sscanf for unknown formats - should never reach here! */
193+
std::string error_msg = std::string("airSingleSscanf: Unsupported format '") +
194+
fmt + "' for string '" + str +
195+
"' - fallback to locale-dependent sscanf would be used!";
196+
throw std::runtime_error(error_msg);
143197
}
144198
}
145199

Modules/ThirdParty/NrrdIO/src/NrrdIO/parseNrrd.c

Lines changed: 47 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ _nrrdReadNrrdParse_type(FILE *file, Nrrd *nrrd,
206206
}
207207

208208
#define _PARSE_ONE_VAL(FIELD, CONV, TYPE) \
209-
if (1 != sscanf(info, CONV, &(FIELD))) { \
209+
if (1 != airSingleSscanf(info, CONV, &(FIELD))) { \
210210
biffMaybeAddf(useBiff, NRRD, "%s: couldn't parse " TYPE \
211211
" from \"%s\"", me, info); \
212212
return 1; \
@@ -1299,24 +1299,54 @@ _nrrdReadNrrdParse_data_file(FILE *ffile, Nrrd *nrrd,
12991299
sspn = strspn(nums, _nrrdFieldSep);
13001300
nums[0] = 0; /* terminate so that format is now in info */
13011301
nums += sspn;
1302-
if (!( 3 == sscanf(nums, "%d %d %d",&(nio->dataFNMin),
1303-
&(nio->dataFNMax), &(nio->dataFNStep)) )) {
1304-
biffMaybeAddf(useBiff, NRRD,
1305-
"%s: couldn't parse three ints (min, max, step) after "
1306-
"data filename template", me);
1307-
airMopError(mop); return 1;
1308-
}
1309-
if ( 4 == sscanf(nums, "%d %d %d %u", &(nio->dataFNMin),
1310-
&(nio->dataFNMax), &(nio->dataFNStep),
1311-
&(nio->dataFileDim)) ) {
1312-
if (!AIR_IN_CL(1, nio->dataFileDim, nrrd->dim)) {
1302+
/* Parse three integers one at a time for locale independence */
1303+
{
1304+
char *numsCopy = airStrdup(nums);
1305+
char *ptr = numsCopy;
1306+
int parsed = 0;
1307+
if (1 == airSingleSscanf(ptr, "%d", &(nio->dataFNMin))) {
1308+
while (*ptr && !isspace(*ptr)) ptr++;
1309+
while (*ptr && isspace(*ptr)) ptr++;
1310+
if (1 == airSingleSscanf(ptr, "%d", &(nio->dataFNMax))) {
1311+
while (*ptr && !isspace(*ptr)) ptr++;
1312+
while (*ptr && isspace(*ptr)) ptr++;
1313+
if (1 == airSingleSscanf(ptr, "%d", &(nio->dataFNStep))) {
1314+
parsed = 3;
1315+
}
1316+
}
1317+
}
1318+
airFree(numsCopy);
1319+
if (3 != parsed) {
13131320
biffMaybeAddf(useBiff, NRRD,
1314-
"%s: datafile dimension %u outside valid range [1,%u]",
1315-
me, nio->dataFileDim, nrrd->dim);
1321+
"%s: couldn't parse three ints (min, max, step) after "
1322+
"data filename template", me);
13161323
airMopError(mop); return 1;
13171324
}
1318-
} else {
1319-
nio->dataFileDim = nrrd->dim-1;
1325+
}
1326+
/* Try to parse optional fourth integer */
1327+
{
1328+
char *numsCopy = airStrdup(nums);
1329+
char *ptr = numsCopy;
1330+
int parsed = 0;
1331+
/* Skip past the three already parsed integers */
1332+
for (int i = 0; i < 3 && *ptr; i++) {
1333+
while (*ptr && !isspace(*ptr)) ptr++;
1334+
while (*ptr && isspace(*ptr)) ptr++;
1335+
}
1336+
if (*ptr && 1 == airSingleSscanf(ptr, "%u", &(nio->dataFileDim))) {
1337+
parsed = 4;
1338+
if (!AIR_IN_CL(1, nio->dataFileDim, nrrd->dim)) {
1339+
biffMaybeAddf(useBiff, NRRD,
1340+
"%s: datafile dimension %u outside valid range [1,%u]",
1341+
me, nio->dataFileDim, nrrd->dim);
1342+
airFree(numsCopy);
1343+
airMopError(mop); return 1;
1344+
}
1345+
}
1346+
airFree(numsCopy);
1347+
if (4 != parsed) {
1348+
nio->dataFileDim = nrrd->dim-1;
1349+
}
13201350
}
13211351
if (0 == nio->dataFNStep) {
13221352
biffMaybeAddf(useBiff, NRRD,
@@ -1353,7 +1383,7 @@ _nrrdReadNrrdParse_data_file(FILE *ffile, Nrrd *nrrd,
13531383
}
13541384
info += strlen(NRRD_LIST_FLAG);
13551385
if (info[0]) {
1356-
if (1 == sscanf(info, "%u", &(nio->dataFileDim))) {
1386+
if (1 == airSingleSscanf(info, "%u", &(nio->dataFileDim))) {
13571387
if (!AIR_IN_CL(1, nio->dataFileDim, nrrd->dim)) {
13581388
biffMaybeAddf(useBiff, NRRD, "%s: datafile dimension %u outside "
13591389
"valid range [1,%u]",

0 commit comments

Comments
 (0)