1 Windows/Linux跨平台gbk与utf8字符集编码转换
在Windows系统上使用系统api,在linux上使用iconv进行gbk与utf8字符集编码转换
我已经将上述功能封装为single header的工具类,具体代码如下
#ifndef _CHARACTERSET_CONVERT_H_
#define _CHARACTERSET_CONVERT_H_
#include <string>
#if defined(WIN32) || defined(_WIN32) || defined(__WIN32__) || defined(__NT__)
#include <Windows.h>
#elif defined(linux) || defined(__linux)
#include <iconv.h>
#include <malloc.h>
#endif
namespace stubbornhuang
{
class CharactersetConvert
{
public:
#if defined(WIN32) || defined(_WIN32) || defined(__WIN32__) || defined(__NT__)
static std::string gbk_to_utf8(const std::string& gbk_str)
{
int len = MultiByteToWideChar(CP_ACP, 0, gbk_str.c_str(), -1, NULL, 0);
wchar_t* wstr = new wchar_t[len + 1];
memset(wstr, 0, len + 1);
MultiByteToWideChar(CP_ACP, 0, gbk_str.c_str(), -1, wstr, len);
len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL);
char* str = new char[len + 1];
memset(str, 0, len + 1);
WideCharToMultiByte(CP_UTF8, 0, wstr, -1, str, len, NULL, NULL);
std::string strTemp = str;
if (wstr) delete[] wstr;
if (str) delete[] str;
return strTemp;
}
static std::string utf8_to_gbk(const std::string& utf8_str)
{
int len = MultiByteToWideChar(CP_UTF8, 0, utf8_str.c_str(), -1, NULL, 0);
wchar_t* wszGBK = new wchar_t[len + 1];
memset(wszGBK, 0, len * 2 + 2);
MultiByteToWideChar(CP_UTF8, 0, utf8_str.c_str(), -1, wszGBK, len);
len = WideCharToMultiByte(CP_ACP, 0, wszGBK, -1, NULL, 0, NULL, NULL);
char* szGBK = new char[len + 1];
memset(szGBK, 0, len + 1);
WideCharToMultiByte(CP_ACP, 0, wszGBK, -1, szGBK, len, NULL, NULL);
std::string strTemp(szGBK);
if (wszGBK) delete[] wszGBK;
if (szGBK) delete[] szGBK;
return strTemp;
}
#elif defined(linux) || defined(__linux)
static int code_convert(
const char* from_charset,
const char* to_charset,
char* inbuf, size_t inlen,
char* outbuf, size_t outlen
) {
iconv_t cd;
char** pin = &inbuf;
char** pout = &outbuf;
cd = iconv_open(to_charset, from_charset);
if (cd == 0)
return -1;
memset(outbuf, 0, outlen);
if ((int)iconv(cd, pin, &inlen, pout, &outlen) == -1)
{
iconv_close(cd);
return -1;
}
iconv_close(cd);
*pout = '\0';
return 0;
}
static int u2g(char* inbuf, size_t inlen, char* outbuf, size_t outlen) {
return code_convert("utf-8", "gb2312", inbuf, inlen, outbuf, outlen);
}
static int g2u(char* inbuf, size_t inlen, char* outbuf, size_t outlen) {
return code_convert("gb2312", "utf-8", inbuf, inlen, outbuf, outlen);
}
static std::string gbk_to_utf8(const std::string& gbk_str)
{
int length = gbk_str.size() * 2 + 1;
char* temp = (char*)malloc(sizeof(char) * length);
if (g2u((char*)gbk_str.c_str(), gbk_str.size(), temp, length) >= 0)
{
std::string str_result;
str_result.append(temp);
free(temp);
return str_result;
}
else
{
free(temp);
return "";
}
}
static std::string utf8_to_gbk(const std::string& utf8_str)
{
int length = strlen(utf8_str);
char* temp = (char*)malloc(sizeof(char) * length);
if (u2g((char*)utf8_str, length, temp, length) >= 0)
{
std::string str_result;
str_result.append(temp);
free(temp);
return str_result;
}
else
{
free(temp);
return "";
}
}
#endif
};
}
#endif // !_CHARACTERSET_CONVERT_H_
使用示例
#include <iostream>
#include "CharactersetConvert.h"
using namespace std;
int main()
{
std::string nihao = "你好";
std::string nihao_utf8 = stubbornhuang::CharactersetConvert::gbk_to_utf8(nihao);
std::cout << nihao_utf8 << std::endl;
std::string nihao_gbk = stubbornhuang::CharactersetConvert::utf8_to_gbk(nihao_utf8);
std::cout << nihao_gbk << std::endl;
return 0;
}
参考链接
本文作者:StubbornHuang
版权声明:本文为站长原创文章,如果转载请注明原文链接!
原文标题:C++ – Windows/Linux跨平台gbk与utf8字符集编码转换
原文链接:https://www.stubbornhuang.com/2524/
发布于:2023年02月27日 17:37:17
修改于:2023年06月21日 17:08:26
声明:本站所有文章,如无特殊说明或标注,均为本站原创发布。任何个人或组织,在未征得本站同意时,禁止复制、盗用、采集、发布本站内容到任何网站、书籍等各类媒体平台。如若本站内容侵犯了原著者的合法权益,可联系我们进行处理。
评论
52