//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
// CodeConvert.h: interface for the CCodeConvert class.
//
//////////////////////////////////////////////////////////////////////
#if !defined(AFX_CODECONVERT_H__9ECD30AF_5D65_436C_95E4_E412B19529D2__INCLUDED_)
#define AFX_CODECONVERT_H__9ECD30AF_5D65_436C_95E4_E412B19529D2__INCLUDED_
#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000
//实现不同字符编码之间的转换
class CCodeConvert
{
public:
void ConvertGBKToUtf8(CString& strGBK);
static void ConvertUtf8ToGBK(CString& strUtf8);
static enum {
UTF8_TO_GB2312,
GB2312_TO_UTF8,
};
static void Convert(LPCTSTR srcfile, LPCTSTR destfile, DWORD dwFlag=UTF8_TO_GB2312);
static void UTF_8ToGB2312(string& pOut,char *pText, int pLen);
static void GB2312ToUTF_8(string& pOut,char *pText, int pLen);
// Unicode 转换成UTF-8
static void UnicodeToUTF_8(char* pOut,WCHAR* pText);
// GB2312 转换成 Unicode
static void Gb2312ToUnicode(WCHAR* pOut,char *gbBuffer);
// 把Unicode 转换成 GB2312
static void UnicodeToGB2312(char* pOut,unsigned short uData);
// 把UTF-8转换成Unicode
static void UTF_8ToUnicode(WCHAR* pOut,char* pText);
CCodeConvert();
virtual ~CCodeConvert();
};
#endif // !defined(AFX_CODECONVERT_H__9ECD30AF_5D65_436C_95E4_E412B19529D2__INCLUDED_)
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
// CodeConvert.cpp: implementation of the CCodeConvert class.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "popsvr.h"
#include "CodeConvert.h"
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
CCodeConvert::CCodeConvert()
{
}
CCodeConvert::~CCodeConvert()
{
}
void CCodeConvert::UTF_8ToUnicode(WCHAR* pOut,char *pText)
{
char* uchar = (char *)pOut;
uchar[1] = ((pText[0] & 0x0F) <<4) + ((pText[1] >> 2) & 0x0F);
uchar[0] = ((pText[1] & 0x03) <<6) + (pText[2] & 0x3F);
return;
}
void CCodeConvert::UnicodeToGB2312(char* pOut,unsigned short uData)
{
//WideCharToMultiByte(CP_ACP,NULL,&uData,1,pOut,sizeof(WCHAR),NULL,NULL);
WideCharToMultiByte(CP_ACP,WC_COMPOSITECHECK,&uData,1,pOut,sizeof(WCHAR),NULL,NULL);
return;
}
void CCodeConvert::Gb2312ToUnicode(WCHAR* pOut,char *gbBuffer)
{
::MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED,gbBuffer,2,pOut,1);
return;
}
void CCodeConvert::UnicodeToUTF_8(char* pOut,WCHAR* pText)
{
// 注意 WCHAR高低字的顺序,低字节在前,高字节在后
char* pchar = (char *)pText;
pOut[0] = (0xE0 | ((pchar[1] & 0xF0) >> 4));
pOut[1] = (0x80 | ((pchar[1] & 0x0F) <<2)) + ((pchar[1] & 0xC0) >> 6);
pOut[2] = (0x80 | (pchar[0] & 0x3F));
return;
}
// GB2312 =>UTF-8
void CCodeConvert::GB2312ToUTF_8(string& pOut,char *pText, int pLen)
{
char buf[4];
char* rst = new char[pLen + (pLen >> 2) + 2];
memset(buf,0,4);
memset(rst,0,pLen + (pLen >> 2) + 2);
int i = 0;
int j = 0;
while(i {
//如果是英文直接复制就可以
if( *(pText + i) >= 0)
{
rst[j++] = pText[i++];
}
else
{
WCHAR pbuffer;
Gb2312ToUnicode(&pbuffer,pText+i);
UnicodeToUTF_8(buf,&pbuffer);
unsigned short int tmp = 0;
tmp = rst[j] = buf[0];
tmp = rst[j+1] = buf[1];
tmp = rst[j+2] = buf[2];
j += 3;
i += 2;
}
}
rst[j] = '/0';
//返回结果
pOut = rst;
delete []rst;
return;
}
//UTF-8 => GB2312
void CCodeConvert::UTF_8ToGB2312(string &pOut, char *pText, int pLen)
{
TRACE("/r/nCCodeConvert::UTF_8ToGB2312");
char * newBuf = new char[pLen+1];
newBuf[pLen]=0x00;
char Ctemp[4];
memset(Ctemp,0,4);
int i =0;
int j = 0;
while(i {
if(pText[i] > 0)
{
newBuf[j++] = pText[i++];
}
else
{
WCHAR Wtemp;
UTF_8ToUnicode(&Wtemp,pText + i);
UnicodeToGB2312(Ctemp,Wtemp);
newBuf[j] = Ctemp[0];
newBuf[j + 1] = Ctemp[1];
i += 3;
j += 2;
}
}//end while
newBuf[j] = '/0';
pOut = newBuf;
delete []newBuf;
return;
//////////////////////////////////////////////////////////////////////////
}
//UTF-8 => GBK
void CCodeConvert::ConvertUtf8ToGBK(CString& strUtf8)
{
TRACE("/r/nCCodeConvert::UTF_8ToGBK");
int len=MultiByteToWideChar(CP_UTF8, 0, (LPCTSTR)strUtf8, -1, NULL,0);
unsigned short * wszGBK = new unsigned short[len+1];
memset(wszGBK, 0, len * 2 + 2);
MultiByteToWideChar(CP_UTF8, 0, (LPCTSTR)strUtf8, -1, wszGBK, len);
len = WideCharToMultiByte(CP_ACP, 0, wszGBK, -1, NULL, 0, NULL, NULL);
char *szGBK=new char[len + 1];
memset(szGBK, 0, len + 1);
WideCharToMultiByte (CP_ACP, 0, wszGBK, -1, szGBK, len, NULL,NULL);
strUtf8 = szGBK;
delete[] szGBK;
delete[] wszGBK;
}
void CCodeConvert::Convert(LPCTSTR srcfile, LPCTSTR destfile, DWORD dwFlag)
{
TRACE("/r/nCCodeConvert::Convert");
CFile file(srcfile,CFile::modeReadWrite);
DWORD nlen = file.GetLength();
char *s = new char[nlen+1];
s[nlen]=0x00;
file.ReadHuge(s,nlen);
file.Close();
switch(dwFlag)
{
case 0: //UTF-8 => GB2312
{
CFile newfile(destfile,CFile::modeCreate | CFile::modeWrite);
string str;
UTF_8ToGB2312(str,s,nlen);
newfile.WriteHuge(str.c_str(),str.length());
newfile.Close();
}
break;
case 1://UTF-8 => GBK
{
CFile newfile(destfile,CFile::modeCreate | CFile::modeWrite);
CString strBuf(' ',nlen);
strBuf.Format("%s",s);
ConvertUtf8ToGBK(strBuf);
newfile.WriteHuge(strBuf.GetBuffer(0),strBuf.GetLength());
newfile.Close();
}
break;
}
delete[] s;
}
//GBK => UTF-8
void CCodeConvert::ConvertGBKToUtf8(CString &strGBK)
{
int len=MultiByteToWideChar(CP_ACP, 0, (LPCTSTR)strGBK, -1, NULL,0);
unsigned short * wszUtf8 = new unsigned short[len+1];
memset(wszUtf8, 0, len * 2 + 2);
MultiByteToWideChar(CP_ACP, 0, (LPCTSTR)strGBK, -1, wszUtf8, len);
len = WideCharToMultiByte(CP_UTF8, 0, wszUtf8, -1, NULL, 0, NULL, NULL);
char *szUtf8=new char[len + 1];
memset(szUtf8, 0, len + 1);
WideCharToMultiByte (CP_UTF8, 0, wszUtf8, -1, szUtf8, len, NULL,NULL);
strGBK = szUtf8;
delete[] szUtf8;
delete[] wszUtf8;
}