Go Back   TechArena Community > Software > Software Development
Become a Member!
Forgot your username/password?
Register Tags Active Topics RSS Search Mark Forums Read

Sponsored Links



Convert UTF-8 to ANSI

Software Development


Reply
 
Thread Tools Search this Thread
  #1  
Old 04-04-2009
Member
 
Join Date: Nov 2008
Posts: 1,185
Convert UTF-8 to ANSI
  

I needed to convert UTF-8 to ANSI and after a lot of research I have found nothing conclusive, so I scrambled from the RFC. I put the code here in the hope that it will one day be used to someone. Source code free.

Test code:
Code:
Sub main () 
Debug.Print Encode_UTF8("" )
Debug.Print Decode_UTF8 (Encode_UTF8 ( "")) 
Debug.Print Decode_UTF8("a" )
Debug.Print isUTF8("a" )
Debug.Print isUTF8("abcde" )
End Sub

Main code:
Code:
Option Explicit 

'Char. number range | UTF-8 octet sequence 
'(Hexadecimal) | (binary) 
'--------------------+---------------------------- ----------------- 
"0000 0000-0000 007 F | 0xxxxxxx 
'0000 0080-0000 07FF | 110xxxxx 10xxxxxx 
'0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx 
'0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 
Public Function Encode_UTF8 (astr) 
Dim c 
Dim n 
Dim utftext 

utftext = "" 
n = 1 
Do While n <= Len (astr) 
c = AscW (Mid (astr, n, 1)) 
If c <128 Then 
utftext = utftext + Chr (c) 
ElseIf ((c> = 128) And (c <2048)) Then 
utftext = utftext + Chr (((c \ 64) Or 192)) 
utftext = utftext + Chr (((c And 63) Or 128)) 
ElseIf ((c> = 2048) And (c <65536)) Then 
utftext = utftext + Chr (((c \ 4096) Or 224)) 
utftext = utftext + Chr ((((c \ 64) And 63) Or 128)) 
utftext = utftext + Chr (((c And 63) Or 128)) 
Else 'c> = 65536 
utftext = utftext + Chr (((c \ 262144) Or 240)) 
utftext = utftext + Chr(((((c \ 4096) And 63)) Or 128))
utftext = utftext + Chr ((((c \ 64) And 63) Or 128)) 
utftext = utftext + Chr (((c And 63) Or 128)) 
End If 
n = n + 1 
Loop 
Encode_UTF8 = utftext 
End Function 

'Char. number range | UTF-8 octet sequence 
'(Hexadecimal) | (binary) 
'--------------------+---------------------------- ----------------- 
"0000 0000-0000 007 F | 0xxxxxxx 
'0000 0080-0000 07FF | 110xxxxx 10xxxxxx 
'0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx 
'0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 
Public Function Decode_UTF8 (astr) 
Dim c0, c1, c2, c3 
Dim n 
Dim unitext 

If isUTF8 (astr) = False Then 
Decode_UTF8 = astr
Exit Function 
End If 

unitext = "" 
n = 1 
Do While n <= Len (astr) 
c0 = Asc (Mid (astr, n, 1)) 
If n <= Len (astr) - 1 Then 
c1 = Asc (Mid (astr, n + 1, 1)) 
Else 
c1 = 0 
End If 
If n <= Len (astr) - 2 Then 
c2 = Asc (Mid (astr, n + 2, 1)) 
Else 
c2 = 0 
End If 
If n <= Len (astr) - 3 Then 
c3 = Asc (Mid (astr, n + 3, 1)) 
Else 
c3 = 0 
End If 

If (c0 And 240) = 240 And (c1 And 128) = 128 And (c2 And 128) = 128 And (C3 and 128) = 128 Then 
unitext = unitext + ChrW((c0 - 240) * 65536 + (c1 - 128) * 4096) + (c2 - 128) * 64 + (c3 - 128)
n = n + 4 
ElseIf (c0 And 224) = 224 And (c1 And 128) = 128 And (c2 And 128) = 128 Then 
unitext = unitext + CHRW ((c0 - 224) * 4096 + (c1 - 128) * 64 + (c2 - 128)) 
n = n + 3 
ElseIf (c0 And 192) = 192 And (c1 And 128) = 128 Then 
unitext = unitext + CHRW ((c0 - 192) * 64 + (c1 - 128)) 
n = n + 2 
ElseIf (c0 And 128) = 128 Then 
unitext = unitext + ChrW(c0 And 127)
n = n + 1 
Else 'c0 <128 
unitext = unitext + CHRW (c0) 
n = n + 1 
End If 
Loop 

Decode_UTF8 = unitext 
End Function 

'Char. number range | UTF-8 octet sequence 
'(Hexadecimal) | (binary) 
'--------------------+---------------------------- ----------------- 
"0000 0000-0000 007 F | 0xxxxxxx 
'0000 0080-0000 07FF | 110xxxxx 10xxxxxx 
'0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx 
'0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 
Public Function isUTF8 (astr) 
Dim c0, c1, c2, c3 
Dim n 

isUTF8 = True 
n = 1 
Do While n <= Len (astr) 
c0 = Asc (Mid (astr, n, 1)) 
If n <= Len (astr) - 1 Then 
c1 = Asc (Mid (astr, n + 1, 1)) 
Else 
c1 = 0 
End If 
If n <= Len (astr) - 2 Then 
c2 = Asc (Mid (astr, n + 2, 1)) 
Else 
c2 = 0 
End If 
If n <= Len (astr) - 3 Then 
c3 = Asc (Mid (astr, n + 3, 1)) 
Else 
c3 = 0 
End If 

If (c0 And 240) = 240 Then 
If (c1 And 128) = 128 And (c2 And 128) = 128 And (C3 and 128) = 128 Then 
n = n + 4 
Else 
isUTF8 = False 
Exit Function 
End If 
ElseIf (c0 And 224) = 224 Then 
If (c1 And 128) = 128 And (c2 And 128) = 128 Then 
n = n + 3 
Else 
isUTF8 = False 
Exit Function 
End If 
ElseIf (c0 And 192) = 192 Then 
If (c1 And 128) = 128 Then 
n = n + 2 
Else 
isUTF8 = False 
Exit Function 
End If 
ElseIf (c0 And 128) = 0 Then 
n = n + 1 
Else 
isUTF8 = False 
Exit Function 
End If 
Loop 
End Function

Reply With Quote
  #2  
Old 04-04-2009
Member
 
Join Date: May 2008
Posts: 685
Re: Convert UTF-8 to ANSI

A big thank you for this code, it was well repaired. Yes, you just save me some time! Thank you
Reply With Quote
  #3  
Old 04-04-2009
XSI XSI is offline
Member
 
Join Date: May 2008
Posts: 271
Re: Convert UTF-8 to ANSI

A big thank you. That is exactly what I was looking for. Almost 3 years later (from my research), this code has been very useful. Thank you!
Reply With Quote
Reply

  TechArena Community > Software > Software Development
Tags: , ,



Thread Tools Search this Thread
Search this Thread:

Advanced Search


Similar Threads for: "Convert UTF-8 to ANSI"
Thread Thread Starter Forum Replies Last Post
How to use ANSI.SYS in Windows 7 NamJam Operating Systems 3 23-12-2010 06:22 AM
Programming in ANSI C by E Balagurusamy free download Mr Alfa Ebooks 1 16-07-2010 12:27 PM
Ansi.sys and Windows XP issue AMISH Windows Software 3 13-07-2009 07:07 PM
Join SQL Traditional or ANSI Deep23 Software Development 2 23-04-2009 12:07 AM
Convert video to iPod, Convert iTunes video, Convert DivX to MP4 tommyhills MediaCenter 1 26-07-2008 02:30 AM


All times are GMT +5.5. The time now is 10:39 AM.