Skip to content

Commit 505ef45

Browse files
committed
Merge pull request #242 from go-sql-driver/collation
Connection collation DSN parameter
2 parents acbe68a + a6f01ea commit 505ef45

File tree

9 files changed

+336
-40
lines changed

9 files changed

+336
-40
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ Changes:
1515

1616
New Features:
1717

18+
- Setting the connection collation is possible with the `collation` DSN parameter. This parameter should be preferred over the `charset` parameter
1819
- Logging of critical errors is configurable with `SetLogger`
1920
- Google CloudSQL support
2021

README.md

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,22 @@ Valid Values: <name>
141141
Default: none
142142
```
143143

144-
Sets the charset used for client-server interaction (`"SET NAMES <value>"`). If multiple charsets are set (separated by a comma), the following charset is used if setting the charset failes. This enables support for `utf8mb4` ([introduced in MySQL 5.5.3](http://dev.mysql.com/doc/refman/5.5/en/charset-unicode-utf8mb4.html)) with fallback to `utf8` for older servers (`charset=utf8mb4,utf8`).
144+
Sets the charset used for client-server interaction (`"SET NAMES <value>"`). If multiple charsets are set (separated by a comma), the following charset is used if setting the charset failes. This enables for example support for `utf8mb4` ([introduced in MySQL 5.5.3](http://dev.mysql.com/doc/refman/5.5/en/charset-unicode-utf8mb4.html)) with fallback to `utf8` for older servers (`charset=utf8mb4,utf8`).
145145

146+
Usage of the `charset` parameter is discouraged because it issues additional queries to the server.
147+
Unless you need the fallback behavior, please use `collation` instead.
148+
149+
##### `collation`
150+
151+
```
152+
Type: string
153+
Valid Values: <name>
154+
Default: utf8_general_ci
155+
```
156+
157+
Sets the collation used for client-server interaction on connection. In contrast to `charset`, `collation` does not issue additional queries. If the specified collation is unavailable on the target server, the connection will fail.
158+
159+
A list of valid charsets for a server is retrievable with `SHOW COLLATION`.
146160

147161
##### `clientFoundRows`
148162

@@ -243,7 +257,7 @@ user:password@/dbname?strict=true&sql_notes=false
243257

244258
TCP via IPv6:
245259
```
246-
user:password@tcp([de:ad:be:ef::ca:fe]:80)/dbname?timeout=90s
260+
user:password@tcp([de:ad:be:ef::ca:fe]:80)/dbname?timeout=90s&collation=utf8mb4_unicode_ci
247261
```
248262

249263
TCP on a remote host, e.g. Amazon RDS:
@@ -295,7 +309,11 @@ Alternatively you can use the [`NullTime`](http://godoc.org/github.com/go-sql-dr
295309

296310

297311
### Unicode support
298-
Since version 1.1 Go-MySQL-Driver automatically uses the collation `utf8_general_ci` by default. Adding `&charset=utf8` (alias for `SET NAMES utf8`) to the DSN is not necessary anymore in most cases.
312+
Since version 1.1 Go-MySQL-Driver automatically uses the collation `utf8_general_ci` by default.
313+
314+
Other collations / charsets can be set using the [`collation`](#collation) DSN parameter.
315+
316+
Version 1.0 of the driver recommended adding `&charset=utf8` (alias for `SET NAMES utf8`) to the DSN to enable proper UTF-8 support. This is not necessary anymore. The [`collation`](#collation) parameter should be preferred to set another collation / charset than the default.
299317

300318
See http://dev.mysql.com/doc/refman/5.7/en/charset-unicode.html for more details on MySQL's Unicode support.
301319

collations.go

Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
// Go MySQL Driver - A MySQL-Driver for Go's database/sql package
2+
//
3+
// Copyright 2014 The Go-MySQL-Driver Authors. All rights reserved.
4+
//
5+
// This Source Code Form is subject to the terms of the Mozilla Public
6+
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
7+
// You can obtain one at http://mozilla.org/MPL/2.0/.
8+
9+
package mysql
10+
11+
const defaultCollation byte = 33 // utf8_general_ci
12+
13+
// A list of available collations mapped to the internal ID.
14+
// To update this map use the following MySQL query:
15+
// SELECT COLLATION_NAME, ID FROM information_schema.COLLATIONS
16+
var collations = map[string]byte{
17+
"big5_chinese_ci": 1,
18+
"latin2_czech_cs": 2,
19+
"dec8_swedish_ci": 3,
20+
"cp850_general_ci": 4,
21+
"latin1_german1_ci": 5,
22+
"hp8_english_ci": 6,
23+
"koi8r_general_ci": 7,
24+
"latin1_swedish_ci": 8,
25+
"latin2_general_ci": 9,
26+
"swe7_swedish_ci": 10,
27+
"ascii_general_ci": 11,
28+
"ujis_japanese_ci": 12,
29+
"sjis_japanese_ci": 13,
30+
"cp1251_bulgarian_ci": 14,
31+
"latin1_danish_ci": 15,
32+
"hebrew_general_ci": 16,
33+
"tis620_thai_ci": 18,
34+
"euckr_korean_ci": 19,
35+
"latin7_estonian_cs": 20,
36+
"latin2_hungarian_ci": 21,
37+
"koi8u_general_ci": 22,
38+
"cp1251_ukrainian_ci": 23,
39+
"gb2312_chinese_ci": 24,
40+
"greek_general_ci": 25,
41+
"cp1250_general_ci": 26,
42+
"latin2_croatian_ci": 27,
43+
"gbk_chinese_ci": 28,
44+
"cp1257_lithuanian_ci": 29,
45+
"latin5_turkish_ci": 30,
46+
"latin1_german2_ci": 31,
47+
"armscii8_general_ci": 32,
48+
"utf8_general_ci": 33,
49+
"cp1250_czech_cs": 34,
50+
"ucs2_general_ci": 35,
51+
"cp866_general_ci": 36,
52+
"keybcs2_general_ci": 37,
53+
"macce_general_ci": 38,
54+
"macroman_general_ci": 39,
55+
"cp852_general_ci": 40,
56+
"latin7_general_ci": 41,
57+
"latin7_general_cs": 42,
58+
"macce_bin": 43,
59+
"cp1250_croatian_ci": 44,
60+
"utf8mb4_general_ci": 45,
61+
"utf8mb4_bin": 46,
62+
"latin1_bin": 47,
63+
"latin1_general_ci": 48,
64+
"latin1_general_cs": 49,
65+
"cp1251_bin": 50,
66+
"cp1251_general_ci": 51,
67+
"cp1251_general_cs": 52,
68+
"macroman_bin": 53,
69+
"utf16_general_ci": 54,
70+
"utf16_bin": 55,
71+
"utf16le_general_ci": 56,
72+
"cp1256_general_ci": 57,
73+
"cp1257_bin": 58,
74+
"cp1257_general_ci": 59,
75+
"utf32_general_ci": 60,
76+
"utf32_bin": 61,
77+
"utf16le_bin": 62,
78+
"binary": 63,
79+
"armscii8_bin": 64,
80+
"ascii_bin": 65,
81+
"cp1250_bin": 66,
82+
"cp1256_bin": 67,
83+
"cp866_bin": 68,
84+
"dec8_bin": 69,
85+
"greek_bin": 70,
86+
"hebrew_bin": 71,
87+
"hp8_bin": 72,
88+
"keybcs2_bin": 73,
89+
"koi8r_bin": 74,
90+
"koi8u_bin": 75,
91+
"latin2_bin": 77,
92+
"latin5_bin": 78,
93+
"latin7_bin": 79,
94+
"cp850_bin": 80,
95+
"cp852_bin": 81,
96+
"swe7_bin": 82,
97+
"utf8_bin": 83,
98+
"big5_bin": 84,
99+
"euckr_bin": 85,
100+
"gb2312_bin": 86,
101+
"gbk_bin": 87,
102+
"sjis_bin": 88,
103+
"tis620_bin": 89,
104+
"ucs2_bin": 90,
105+
"ujis_bin": 91,
106+
"geostd8_general_ci": 92,
107+
"geostd8_bin": 93,
108+
"latin1_spanish_ci": 94,
109+
"cp932_japanese_ci": 95,
110+
"cp932_bin": 96,
111+
"eucjpms_japanese_ci": 97,
112+
"eucjpms_bin": 98,
113+
"cp1250_polish_ci": 99,
114+
"utf16_unicode_ci": 101,
115+
"utf16_icelandic_ci": 102,
116+
"utf16_latvian_ci": 103,
117+
"utf16_romanian_ci": 104,
118+
"utf16_slovenian_ci": 105,
119+
"utf16_polish_ci": 106,
120+
"utf16_estonian_ci": 107,
121+
"utf16_spanish_ci": 108,
122+
"utf16_swedish_ci": 109,
123+
"utf16_turkish_ci": 110,
124+
"utf16_czech_ci": 111,
125+
"utf16_danish_ci": 112,
126+
"utf16_lithuanian_ci": 113,
127+
"utf16_slovak_ci": 114,
128+
"utf16_spanish2_ci": 115,
129+
"utf16_roman_ci": 116,
130+
"utf16_persian_ci": 117,
131+
"utf16_esperanto_ci": 118,
132+
"utf16_hungarian_ci": 119,
133+
"utf16_sinhala_ci": 120,
134+
"utf16_german2_ci": 121,
135+
"utf16_croatian_ci": 122,
136+
"utf16_unicode_520_ci": 123,
137+
"utf16_vietnamese_ci": 124,
138+
"ucs2_unicode_ci": 128,
139+
"ucs2_icelandic_ci": 129,
140+
"ucs2_latvian_ci": 130,
141+
"ucs2_romanian_ci": 131,
142+
"ucs2_slovenian_ci": 132,
143+
"ucs2_polish_ci": 133,
144+
"ucs2_estonian_ci": 134,
145+
"ucs2_spanish_ci": 135,
146+
"ucs2_swedish_ci": 136,
147+
"ucs2_turkish_ci": 137,
148+
"ucs2_czech_ci": 138,
149+
"ucs2_danish_ci": 139,
150+
"ucs2_lithuanian_ci": 140,
151+
"ucs2_slovak_ci": 141,
152+
"ucs2_spanish2_ci": 142,
153+
"ucs2_roman_ci": 143,
154+
"ucs2_persian_ci": 144,
155+
"ucs2_esperanto_ci": 145,
156+
"ucs2_hungarian_ci": 146,
157+
"ucs2_sinhala_ci": 147,
158+
"ucs2_german2_ci": 148,
159+
"ucs2_croatian_ci": 149,
160+
"ucs2_unicode_520_ci": 150,
161+
"ucs2_vietnamese_ci": 151,
162+
"ucs2_general_mysql500_ci": 159,
163+
"utf32_unicode_ci": 160,
164+
"utf32_icelandic_ci": 161,
165+
"utf32_latvian_ci": 162,
166+
"utf32_romanian_ci": 163,
167+
"utf32_slovenian_ci": 164,
168+
"utf32_polish_ci": 165,
169+
"utf32_estonian_ci": 166,
170+
"utf32_spanish_ci": 167,
171+
"utf32_swedish_ci": 168,
172+
"utf32_turkish_ci": 169,
173+
"utf32_czech_ci": 170,
174+
"utf32_danish_ci": 171,
175+
"utf32_lithuanian_ci": 172,
176+
"utf32_slovak_ci": 173,
177+
"utf32_spanish2_ci": 174,
178+
"utf32_roman_ci": 175,
179+
"utf32_persian_ci": 176,
180+
"utf32_esperanto_ci": 177,
181+
"utf32_hungarian_ci": 178,
182+
"utf32_sinhala_ci": 179,
183+
"utf32_german2_ci": 180,
184+
"utf32_croatian_ci": 181,
185+
"utf32_unicode_520_ci": 182,
186+
"utf32_vietnamese_ci": 183,
187+
"utf8_unicode_ci": 192,
188+
"utf8_icelandic_ci": 193,
189+
"utf8_latvian_ci": 194,
190+
"utf8_romanian_ci": 195,
191+
"utf8_slovenian_ci": 196,
192+
"utf8_polish_ci": 197,
193+
"utf8_estonian_ci": 198,
194+
"utf8_spanish_ci": 199,
195+
"utf8_swedish_ci": 200,
196+
"utf8_turkish_ci": 201,
197+
"utf8_czech_ci": 202,
198+
"utf8_danish_ci": 203,
199+
"utf8_lithuanian_ci": 204,
200+
"utf8_slovak_ci": 205,
201+
"utf8_spanish2_ci": 206,
202+
"utf8_roman_ci": 207,
203+
"utf8_persian_ci": 208,
204+
"utf8_esperanto_ci": 209,
205+
"utf8_hungarian_ci": 210,
206+
"utf8_sinhala_ci": 211,
207+
"utf8_german2_ci": 212,
208+
"utf8_croatian_ci": 213,
209+
"utf8_unicode_520_ci": 214,
210+
"utf8_vietnamese_ci": 215,
211+
"utf8_general_mysql500_ci": 223,
212+
"utf8mb4_unicode_ci": 224,
213+
"utf8mb4_icelandic_ci": 225,
214+
"utf8mb4_latvian_ci": 226,
215+
"utf8mb4_romanian_ci": 227,
216+
"utf8mb4_slovenian_ci": 228,
217+
"utf8mb4_polish_ci": 229,
218+
"utf8mb4_estonian_ci": 230,
219+
"utf8mb4_spanish_ci": 231,
220+
"utf8mb4_swedish_ci": 232,
221+
"utf8mb4_turkish_ci": 233,
222+
"utf8mb4_czech_ci": 234,
223+
"utf8mb4_danish_ci": 235,
224+
"utf8mb4_lithuanian_ci": 236,
225+
"utf8mb4_slovak_ci": 237,
226+
"utf8mb4_spanish2_ci": 238,
227+
"utf8mb4_roman_ci": 239,
228+
"utf8mb4_persian_ci": 240,
229+
"utf8mb4_esperanto_ci": 241,
230+
"utf8mb4_hungarian_ci": 242,
231+
"utf8mb4_sinhala_ci": 243,
232+
"utf8mb4_german2_ci": 244,
233+
"utf8mb4_croatian_ci": 245,
234+
"utf8mb4_unicode_520_ci": 246,
235+
"utf8mb4_vietnamese_ci": 247,
236+
}

connection.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,15 @@ type config struct {
3939
dbname string
4040
params map[string]string
4141
loc *time.Location
42-
timeout time.Duration
4342
tls *tls.Config
43+
timeout time.Duration
44+
collation uint8
4445
allowAllFiles bool
4546
allowOldPasswords bool
4647
clientFoundRows bool
4748
}
4849

49-
// Handles parameters set in DSN
50+
// Handles parameters set in DSN after the connection is established
5051
func (mc *mysqlConn) handleParams() (err error) {
5152
for param, val := range mc.cfg.params {
5253
switch param {

const.go

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -130,13 +130,3 @@ const (
130130
flagUnknown3
131131
flagUnknown4
132132
)
133-
134-
const (
135-
collation_ascii_general_ci byte = 11
136-
collation_utf8_general_ci byte = 33
137-
collation_utf8mb4_general_ci byte = 45
138-
collation_utf8mb4_bin byte = 46
139-
collation_latin1_general_ci byte = 48
140-
collation_binary byte = 63
141-
collation_utf8mb4_unicode_ci byte = 224
142-
)

driver_test.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -938,6 +938,44 @@ func TestFailingCharset(t *testing.T) {
938938
})
939939
}
940940

941+
func TestCollation(t *testing.T) {
942+
if !available {
943+
t.Skipf("MySQL-Server not running on %s", netAddr)
944+
}
945+
946+
defaultCollation := "utf8_general_ci"
947+
testCollations := []string{
948+
"", // do not set
949+
defaultCollation, // driver default
950+
"latin1_general_ci",
951+
"binary",
952+
"utf8_unicode_ci",
953+
"utf8mb4_general_ci",
954+
}
955+
956+
for _, collation := range testCollations {
957+
var expected, tdsn string
958+
if collation != "" {
959+
tdsn = dsn + "&collation=" + collation
960+
expected = collation
961+
} else {
962+
tdsn = dsn
963+
expected = defaultCollation
964+
}
965+
966+
runTests(t, tdsn, func(dbt *DBTest) {
967+
var got string
968+
if err := dbt.db.QueryRow("SELECT @@collation_connection").Scan(&got); err != nil {
969+
dbt.Fatal(err)
970+
}
971+
972+
if got != expected {
973+
dbt.Fatalf("Expected connection collation %s but got %s", expected, got)
974+
}
975+
})
976+
}
977+
}
978+
941979
func TestRawBytesResultExceedsBuffer(t *testing.T) {
942980
runTests(t, dsn, func(dbt *DBTest) {
943981
// defaultBufSize from buffer.go

packets.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ func (mc *mysqlConn) writeAuthPacket(cipher []byte) error {
257257
data[11] = 0x00
258258

259259
// Charset [1 byte]
260-
data[12] = collation_utf8_general_ci
260+
data[12] = mc.cfg.collation
261261

262262
// SSL Connection Request Packet
263263
// http://dev.mysql.com/doc/internals/en/connection-phase-packets.html#packet-Protocol::SSLRequest

0 commit comments

Comments
 (0)