-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathstring_utils.h
219 lines (193 loc) · 7.33 KB
/
string_utils.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
#pragma once
#include <string>
#include <vector>
#include <cassert>
#include <comutil.h>
#pragma comment(lib, "comsuppw.lib")
/*
// 移除字符串两侧的空白字符 '\t', '\n', ' '
static std::string trim(std::string& s);
//字符串截取
//eg.
auto x = sub_string("Hello Apple!", "Hello", "!"); // x=" Apple";
static std::string sub_string(const std::string& src, const std::string& s, const std::string& e);
// 全局替换
static std::string replace_all(std::string str, const std::string& from, const std::string& to);
// 字符串startsWith, endsWith
static bool starts_with(const std::string& str, const std::string& with);
static bool ends_with(const std::string& str, const std::string& with);
// 字符串编码转换
static bool is_utf8(const std::string& str);
static std::wstring mutibyte_to_unicode(const std::string& str, int cp);
static std::string unicode_to_mutibyte(const std::wstring& wstr, int cp);
static std::wstring utf8_to_unicode(const std::string& str);
static std::string unicode_to_utf8(const std::wstring& str);
static std::string ws2s(const std::wstring & s); // 慎用,依赖系统默认编码,多语言环境可能产生乱码
static std::wstring s2ws(const std::string & s); // 慎用,依赖系统默认编码,多语言环境可能产生乱码
// URI编解码
static std::string uri_decode(const std::string& str);
static std::string uri_decode(const std::string& str);
// split截取
static std::vector<S> split(const S& s, const S& delim)
*/
namespace common {
struct string_utils {
template<class T>
static T trim(T& s) {
T::value_type _t[] = { '\t', '\n', ' ', '\0' };
s.erase(0, s.find_first_not_of(_t));
s.erase(s.find_last_not_of(_t) + 1);
return s;
}
template<class T>
static T sub_string(const T& src, const T& s, const T& e) {
auto pos1 = src.find(s);
if (pos1 == T::npos) {
return T();
}
auto pos2 = src.find(e, pos1 + s.length());
if (pos2 == T::npos) {
return T();
}
return src.substr(pos1 + s.length(), pos2 - pos1 - s.length());
}
template<class T, class C, class D>
static T sub_string(const T& src, C s, D e) {
return sub_string(src, T(s), T(e));
}
template<class T>
static T replace_all(T str, const T& from, const T& to) {
size_t start_pos = 0;
while ((start_pos = str.find(from, start_pos)) != T::npos) {
str.replace(start_pos, from.length(), to);
start_pos += to.length();
}
return str;
}
template<class T, class C, class D>
static T replace_all(T str, C from, D to) {
return replace_all(str, T(from), T(to));
}
template<class T, class C>
static bool starts_with(const T& src, C w) {
return src.compare(0, T(w).length(), T(w).c_str()) == 0;
}
template<class T, class C>
static bool ends_with(const T& src, C w) {
return src.compare(src.length() - T(w).length(), T(w).length(), T(w).c_str()) == 0;
}
static bool is_utf8(const std::string& str) {
MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, str.c_str(), -1, NULL, 0);
if (GetLastError() == ERROR_NO_UNICODE_TRANSLATION) {
return false;
}
return true;
}
static std::wstring utf8_to_unicode(const std::string& str) {
return mutibyte_to_unicode(str, CP_UTF8);
}
static std::string unicode_to_utf8(const std::wstring& str) {
return unicode_to_mutibyte(str, CP_UTF8);
}
// cp == 65001 CP_UTF8
// cp == 936 gb2312
static std::wstring mutibyte_to_unicode(const std::string& str, int cp = CP_ACP) {
int nwLen = MultiByteToWideChar(cp, 0, str.c_str(), -1, NULL, 0);
wchar_t * pwBuf = new wchar_t[nwLen + 1];
ZeroMemory(pwBuf, nwLen * 2 + 2);
MultiByteToWideChar(cp, 0, str.c_str(), -1, pwBuf, nwLen);
std::wstring s(pwBuf);
delete[] pwBuf;
return s;
}
// cp == 65001 CP_UTF8
// cp == 936 gb2312
static std::string unicode_to_mutibyte(const std::wstring& wstr, int cp = CP_ACP) {
int nLen = WideCharToMultiByte(cp, 0, wstr.c_str(), -1, NULL, NULL, NULL, NULL);
char * pBuf = new char[nLen + 1];
ZeroMemory(pBuf, nLen + 1);
WideCharToMultiByte(cp, 0, wstr.c_str(), -1, pBuf, nLen, NULL, NULL);
std::string s(pBuf);
delete[] pBuf;
return s;
}
static std::string ws2s(const std::wstring & s) {
_bstr_t t = s.c_str();
return std::string(t);
}
static std::wstring s2ws(const std::string & s) {
_bstr_t t = s.c_str();
return std::wstring(t);
}
static std::string uri_encode(const std::string& str) {
auto _to_hex = [](unsigned char x) {
return x > 9 ? x + 55 : x + 48;
};
std::string strTemp = "";
size_t length = str.length();
for (size_t i = 0; i < length; i++)
{
if (isalnum((unsigned char)str[i]) ||
(str[i] == '-') ||
(str[i] == '_') ||
(str[i] == '.') ||
(str[i] == '~'))
strTemp += str[i];
else if (str[i] == ' ')
strTemp += "+";
else
{
strTemp += '%';
strTemp += _to_hex((unsigned char)str[i] >> 4);
strTemp += _to_hex((unsigned char)str[i] % 16);
}
}
return strTemp;
}
static std::string uri_decode(const std::string& str) {
auto _from_hex = [](unsigned char x) {
unsigned char y;
if (x >= 'A' && x <= 'Z') y = x - 'A' + 10;
else if (x >= 'a' && x <= 'z') y = x - 'a' + 10;
else if (x >= '0' && x <= '9') y = x - '0';
else assert(0);
return y;
};
std::string strTemp = "";
size_t length = str.length();
for (size_t i = 0; i < length; i++)
{
if (str[i] == '+') strTemp += ' ';
else if (str[i] == '%')
{
assert(i + 2 < length);
unsigned char high = _from_hex((unsigned char)str[++i]);
unsigned char low = _from_hex((unsigned char)str[++i]);
strTemp += high * 16 + low;
}
else strTemp += str[i];
}
return strTemp;
}
// S should be std::string or std::wstring
template<class S>
static std::vector<S> split(const S& s, const S& delim)
{
std::vector<S> ret;
auto start = 0U;
auto end = s.find(delim);
while (end != S::npos)
{
ret.push_back(s.substr(start, end - start));
start = end + delim.length();
end = s.find(delim, start);
}
ret.push_back(s.substr(start));
return ret;
}
template<class S, class D>
static std::vector<S> split(S s, D d) {
return split(s, S(d));
}
}; // end string utils
}; // end common namespace