本文主要以实例形式讨论了VC编译环境下,实现字符串和文件编码方式转换的方法,在linux下请使用Strconv来实现。具体方法如下:
一、文件编码格式转换
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
|
if ((file_handle = fopen (filenam, "rb" )) != NULL) { //从GB2312源文件以二进制的方式读取buffer numread = fread (str_buf_pool, sizeof ( char ),POOL_BUFF_SIZE,file_handle); fclose (file_handle); //GB2312文件buffer转换成UNICODE nLen =MultiByteToWideChar(CP_ACP,0,str_buf_pool,-1,NULL,0); MultiByteToWideChar(CP_ACP,0,str_buf_pool,-1,( LPWSTR )str_unicode_buf_pool,nLen); //组装UNICODE Little Endian编码文件文件头标示符"0xFF 0xFE" //备注:UNICODE Big Endian编码文件文件头标示符"0xFF 0xFE" //Little Endian与Big Endian编码差异此处不详述 unicode_little_file_header[0]=0xFF; unicode_little_file_header[1]=0xFE; //存储目标文件 if ((file_handle= fopen (filenewname, "wb+" )) != NULL) { fwrite (unicode_little_file_header, sizeof ( char ),2,file_handle); numwrite = fwrite (str_unicode_buf_pool, sizeof ( LPWSTR ),nLen,file_handle); fclose (file_handle); } } |
二、字符串编码格式转换
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
|
//GB2312 转换成 Unicode: wchar_t * GB2312ToUnicode( const char * szGBString) { UINT nCodePage = 936; //GB2312 int nLength=MultiByteToWideChar(nCodePage,0,szGBString,-1,NULL,0); wchar_t * pBuffer = new wchar_t [nLength+1]; MultiByteToWideChar(nCodePage,0,szGBString,-1,pBuffer,nLength); pBuffer[nLength]=0; return pBuffer; } //BIG5 转换成 Unicode: wchar_t * BIG5ToUnicode( const char * szBIG5String) { UINT nCodePage = 950; //BIG5 int nLength=MultiByteToWideChar(nCodePage,0,szBIG5String,-1,NULL,0); wchar_t * pBuffer = new wchar_t [nLength+1]; MultiByteToWideChar(nCodePage,0,szBIG5String,-1,pBuffer,nLength); pBuffer[nLength]=0; return pBuffer; } //Unicode 转换成 GB2312: char * UnicodeToGB2312( const wchar_t * szUnicodeString) { UINT nCodePage = 936; //GB2312 int nLength=WideCharToMultiByte(nCodePage,0,szUnicodeString,-1,NULL,0,NULL,NULL); char * pBuffer= new char [nLength+1]; WideCharToMultiByte(nCodePage,0,szUnicodeString,-1,pBuffer,nLength,NULL,NULL); pBuffer[nLength]=0; return pBuffer; } //Unicode 转换成 BIG5: char * UnicodeToBIG5( const wchar_t * szUnicodeString) { UINT nCodePage = 950; //BIG5 int nLength=WideCharToMultiByte(nCodePage,0,szUnicodeString,-1,NULL,0,NULL,NULL); char * pBuffer= new char [nLength+1]; WideCharToMultiByte(nCodePage,0,szUnicodeString,-1,pBuffer,nLength,NULL,NULL); pBuffer[nLength]=0; return pBuffer; } //繁体中文BIG5 转换成 简体中文 GB2312 char * BIG5ToGB2312( const char * szBIG5String) { LCID lcid = MAKELCID(MAKELANGID(LANG_CHINESE,SUBLANG_CHINESE_SIMPLIFIED),SORT_CHINESE_PRC); wchar_t * szUnicodeBuff = BIG5ToUnicode(szBIG5String); char * szGB2312Buff = UnicodeToGB2312(szUnicodeBuff); int nLength = LCMapString(lcid,LCMAP_SIMPLIFIED_CHINESE, szGB2312Buff,-1,NULL,0); char * pBuffer = new char [nLength + 1]; LCMapString(0x0804,LCMAP_SIMPLIFIED_CHINESE,szGB2312Buff,-1,pBuffer,nLength); pBuffer[nLength] = 0; delete [] szUnicodeBuff; delete [] szGB2312Buff; return pBuffer; } //简体中文 GB2312 转换成 繁体中文BIG5 char * GB2312ToBIG5( const char * szGBString) { LCID lcid = MAKELCID(MAKELANGID(LANG_CHINESE,SUBLANG_CHINESE_SIMPLIFIED),SORT_CHINESE_PRC); int nLength = LCMapString(lcid,LCMAP_TRADITIONAL_CHINESE,szGBString,-1,NULL,0); char * pBuffer= new char [nLength+1]; LCMapString(lcid,LCMAP_TRADITIONAL_CHINESE,szGBString,-1,pBuffer,nLength); pBuffer[nLength]=0; wchar_t * pUnicodeBuff = GB2312ToUnicode(pBuffer); char * pBIG5Buff = UnicodeToBIG5(pUnicodeBuff); delete [] pBuffer; delete [] pUnicodeBuff; return pBIG5Buff; } |
三、API 函数:MultiByteToWideChar参数说明
第一个参数为代码页, 用 GetLocaleInfo 函数获取当前系统的代码页,936: 简体中文, 950: 繁体中文
第二个参数为选项,一般用 0 就可以了
第三个参数为 ANSI 字符串的地址, 这个字符串是第一个参数指定的语言的 ANSI 字符串 (AnsiString)
第四个参数为 ANSI 字符串的长度,如果用 -1, 就表示是用 0 作为结束符的字符串
第五个参数为转化生成的 unicode 字符串 (WideString) 的地址, 如果为 NULL, 就是代表计算生成的字符串的长度
第六个参数为转化生成的 unicode 字符串缓存的容量,也就是有多少个UNICODE字符。