Home » U++ Library support » U++ Libraries and TheIDE: i18n, Unicode and Internationalization » 16 bits wchar
Re: 16 bits wchar [message #12302 is a reply to message #8036] |
Wed, 24 October 2007 11:58   |
cbpporter
Messages: 1428 Registered: September 2007
|
Ultimate Contributor |
|
|
I've been sick and I didn't leave the house so I couldn't post. But here is my code:
int utf8codepointEE(const byte *s, const byte *z, int &lmod, int & dep)
{
if (s < z)
{
dword code = (byte)*s++;
int codePoint = 0;
if(code < 0x80)
{
dep = 1;
lmod = 1;
return code;
}
else if (code < 0xC2)
{
dep = 1;
lmod = 3;
return 0xEE00 + code;
}
else if (code < 0xE0)
{
if(s >= z)
{
dep = 1;
lmod = 3;
return 0xEE00 + code;
}
if (s[0] < 0x80 || s[0] >= 0xC0)
{
dep = 1;
lmod = 3;
return 0xEE00 + code;
}
codePoint = ((code - 0xC0) << 6) + *s - 0x80;
if(codePoint < 0x80 || codePoint > 0x07FF)
{
dep = 1;
lmod = 3;
return 0xEE00 + code;
}
else
{
dep = 2;
lmod = 2;
return codePoint;
}
}
else if (code < 0xF0)
{
if(s + 1 >= z)
{
dep = 1;
lmod = 3;
return 0xEE00 + code;
}
if(s[0] < 0x80 || s[0] >= 0xC0 || s[1] < 0x80 || s[1] >= 0xC0)
{
dep = 1;
lmod = 3;
return 0xEE00 + code;
}
codePoint = ((code - 0xE0) << 12) + ((s[0] - 0x80) << 6) + s[1] - 0x80;
if(codePoint < 0x0800 || codePoint > 0xFFFF)
{
dep = 1;
lmod = 3;
return 0xEE00 + code;
}
else
{
dep = 3;
lmod = 3;
return codePoint;
}
}
else if (code < 0xF5)
{
if(s + 2 >= z)
{
dep = 1;
lmod = 3;
return 0xEE00 + code;
}
if(s[0] < 0x80 || s[0] >= 0xc0 || s[1] < 0x80 || s[1] >= 0xc0 ||
s[2] < 0x80 || s[2] >= 0xc0)
{
dep = 1;
lmod = 3;
return 0xEE00 + code;
}
codePoint = ((code - 0xf0) << 18) + ((s[0] - 0x80) << 12) +
((s[1] - 0x80) << 6) + s[2] - 0x80;
if(codePoint < 0x010000 || codePoint > 0x10FFFF)
{
dep = 1;
lmod = 3;
return 0xEE00 + code;
}
else
{
dep = 4;
lmod = 4;
return codePoint;
}
}
else
{
dep = 1;
lmod = 3;
return 0xEE00 + code;
}
}
else
return -1;
}
int utf8lenEE(const char *_s, int len)
{
const byte *s = (const byte *)_s;
const byte *lim = s + len;
int codePoint = 0;
int length = 0;
while(s < lim) {
int lmod, dep;
int codePoint = utf8codepointEE(s, lim, lmod, dep);
ASSERT(codePoint != -1);
length += lmod;
s += dep;
}
return length;
}
int utf8lenDeEE(const char *_s, int len)
{
const byte *s = (const byte *)_s;
const byte *lim = s + len;
int codePoint = 0;
int length = 0;
while(s < lim) {
int lmod, dep;
int codePoint = utf8codepointEE(s, lim, lmod, dep);
ASSERT(codePoint != -1);
if ((codePoint & 0xFFFFFF00) == 0xEE00)
{
length++;
s += dep;
}
else
{
length += lmod;
s += dep;
}
}
return length;
}
inline byte * putUtf8(byte *s, int codePoint)
{
if (codePoint < 0x80)
*s++ = codePoint;
else if (codePoint < 0x0800)
{
*s++ = 0xC0 | (codePoint >> 6);
*s++ = 0x80 | (codePoint & 0x3f);
}
else if (codePoint < 0xFFFF)
{
*s++ = 0xE0 | (codePoint >> 12);
*s++ = 0x80 | (codePoint >> 6) & 0x3F;
*s++ = 0x80 | (codePoint & 0x3F);
}
else
{
*s++ = 0xF0 | (codePoint >> 18);
*s++ = 0x80 | (codePoint >> 12) & 0x3F;
*s++ = 0x80 | (codePoint >> 6) & 0x3F;
*s++ = 0x80 | (codePoint & 0x3F);
}
return s;
}
String ToUtf8EE(const char *_s, int _len)
{
int tlen = utf8lenEE(_s, _len);
if (tlen == -1)
return "";
StringBuffer result(tlen);
byte *s = (byte *) _s;
const byte *lim = s + _len;
byte *z = (byte *) ~result;
int length = 0;
while(s < lim) {
int lmod, dep;
int codePoint = utf8codepointEE(s, lim, lmod, dep);
if (codePoint == -1)
return "";
length += lmod;
s += dep;
z = putUtf8(z, codePoint);
}
ASSERT(length == tlen);
return result;
}
String FromUtf8EE(const char *_s, int _len)
{
int tlen = utf8lenDeEE(_s, _len);
if (tlen == -1)
return "";
StringBuffer result(tlen);
byte *s = (byte *) _s;
const byte *lim = s + _len;
byte *z = (byte *) ~result;
int length = 0;
while(s < lim) {
int lmod, dep;
int codePoint = utf8codepointEE(s, lim, lmod, dep);
if (codePoint == -1)
return "";
if ((codePoint & 0xFFFFFF00) == 0xEE00)
{
codePoint -= 0xEE00;
*z++ = codePoint;
lmod = 1;
}
else
z = putUtf8(z, codePoint);
length += lmod;
s += dep;
}
ASSERT(length == tlen);
return result;
}
It is up to you to decide what exactly you want to do with Unicode. And if you let me know, I could help. So please decide, and if you want to leave it as it is, I will find something else to work on.
|
|
|
 |
|
16 bits wchar
By: riri on Mon, 05 February 2007 17:19
|
 |
|
Re: 16 bits wchar
By: mirek on Mon, 05 February 2007 23:07
|
 |
|
Re: 16 bits wchar
By: cbpporter on Tue, 25 September 2007 22:03
|
 |
|
Re: 16 bits wchar
By: mirek on Tue, 25 September 2007 23:18
|
 |
|
Re: 16 bits wchar
By: cbpporter on Wed, 26 September 2007 07:43
|
 |
|
Re: 16 bits wchar
By: mirek on Wed, 26 September 2007 08:48
|
 |
|
Re: 16 bits wchar
By: sergei on Wed, 26 September 2007 14:55
|
 |
|
Re: 16 bits wchar
By: cbpporter on Wed, 26 September 2007 15:37
|
 |
|
Re: 16 bits wchar
By: mirek on Wed, 26 September 2007 22:40
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
By: mirek on Mon, 01 October 2007 14:28
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
By: mirek on Wed, 03 October 2007 10:11
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
By: mirek on Wed, 03 October 2007 10:42
|
 |
|
Re: 16 bits wchar
By: mirek on Wed, 03 October 2007 10:26
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
By: mirek on Wed, 03 October 2007 12:10
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
By: mirek on Wed, 03 October 2007 21:40
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
By: mirek on Thu, 04 October 2007 17:33
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
By: mirek on Fri, 12 October 2007 11:52
|
 |
|
Re: 16 bits wchar
By: mirek on Fri, 12 October 2007 11:59
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
By: mirek on Fri, 12 October 2007 17:03
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
By: mirek on Sun, 21 October 2007 20:19
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
By: mirek on Sun, 21 October 2007 23:57
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
By: mirek on Mon, 22 October 2007 10:47
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
By: mirek on Mon, 22 October 2007 19:37
|
 |
|
Re: 16 bits wchar
By: mirek on Sun, 21 October 2007 20:14
|
 |
|
Re: 16 bits wchar
By: sergei on Wed, 26 September 2007 01:56
|
 |
|
Re: 16 bits wchar
By: sergei on Wed, 26 September 2007 16:54
|
 |
|
Re: 16 bits wchar
By: cbpporter on Wed, 26 September 2007 19:11
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
By: mirek on Wed, 24 October 2007 13:27
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
By: mirek on Sat, 27 October 2007 11:11
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
By: mirek on Fri, 09 November 2007 10:39
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
By: mirek on Sun, 11 November 2007 18:45
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
By: mirek on Wed, 23 July 2008 22:04
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
By: mirek on Mon, 04 August 2008 15:07
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
By: mirek on Mon, 04 August 2008 17:14
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
By: mirek on Tue, 05 August 2008 00:03
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
By: mirek on Tue, 05 August 2008 00:14
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
By: mirek on Tue, 05 August 2008 00:20
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
By: mirek on Tue, 05 August 2008 00:26
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
By: mirek on Tue, 05 August 2008 00:51
|
 |
|
Re: 16 bits wchar
By: mirek on Tue, 05 August 2008 10:42
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
By: mirek on Tue, 05 August 2008 15:12
|
 |
|
Re: 16 bits wchar
By: mirek on Tue, 05 August 2008 15:19
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
By: mirek on Thu, 07 August 2008 16:10
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
By: mirek on Thu, 07 August 2008 17:40
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
By: mirek on Thu, 07 August 2008 20:01
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
By: mirek on Fri, 08 August 2008 15:32
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
By: mirek on Fri, 08 August 2008 18:25
|
 |
|
Re: 16 bits wchar
|
 |
|
Re: 16 bits wchar
By: cbpporter on Fri, 05 September 2008 19:13
|
 |
|
Re: 16 bits wchar
By: mirek on Sun, 07 September 2008 13:24
|
 |
|
Re: 16 bits wchar
By: mirek on Mon, 04 August 2008 15:03
|
 |
|
Re: 16 bits wchar
By: mirek on Sat, 27 October 2007 11:01
|
Goto Forum:
Current Time: Mon Aug 25 03:17:20 CEST 2025
Total time taken to generate the page: 0.06278 seconds
|