Skip to content

Commit 23270d7

Browse files
committed
Add identify filter for ISO-8859-3 (Latin-3)
There are some bytes in this encoding which are not mapped to any character. Notably, MicroSoft added their own mappings for these 'unused' bits in their version of Latin-3, called CP28593.
1 parent 7b9bed0 commit 23270d7

File tree

1 file changed

+11
-1
lines changed

1 file changed

+11
-1
lines changed

ext/mbstring/libmbfl/filters/mbfilter_iso8859_3.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
#include "mbfilter_iso8859_3.h"
3232
#include "unicode_table_iso8859_3.h"
3333

34+
static int mbfl_filt_ident_iso8859_3(int c, mbfl_identify_filter *filter);
35+
3436
static const char *mbfl_encoding_8859_3_aliases[] = {"ISO8859-3", "latin3", NULL};
3537

3638
const mbfl_encoding mbfl_encoding_8859_3 = {
@@ -47,7 +49,7 @@ const mbfl_encoding mbfl_encoding_8859_3 = {
4749
const struct mbfl_identify_vtbl vtbl_identify_8859_3 = {
4850
mbfl_no_encoding_8859_3,
4951
mbfl_filt_ident_common_ctor,
50-
mbfl_filt_ident_true
52+
mbfl_filt_ident_iso8859_3
5153
};
5254

5355
const struct mbfl_convert_vtbl vtbl_8859_3_wchar = {
@@ -132,3 +134,11 @@ int mbfl_filt_conv_wchar_8859_3(int c, mbfl_convert_filter *filter)
132134

133135
return c;
134136
}
137+
138+
static int mbfl_filt_ident_iso8859_3(int c, mbfl_identify_filter *filter)
139+
{
140+
if (c >= 0xA0 && !iso8859_3_ucs_table[c - 0xA0]) {
141+
filter->status = 1;
142+
}
143+
return c;
144+
}

0 commit comments

Comments
 (0)