Results 1 to 3 of 3

Thread: Convert UTF-8 to ANSI

  1. #1
    Join Date
    Nov 2008
    Posts
    1,185

    Convert UTF-8 to ANSI

    I needed to convert UTF-8 to ANSI and after a lot of research I have found nothing conclusive, so I scrambled from the RFC. I put the code here in the hope that it will one day be used to someone. Source code free.

    Test code:
    Code:
    Sub main () 
    Debug.Print Encode_UTF8("" )
    Debug.Print Decode_UTF8 (Encode_UTF8 ( "")) 
    Debug.Print Decode_UTF8("a" )
    Debug.Print isUTF8("a" )
    Debug.Print isUTF8("abcde" )
    End Sub

    Main code:
    Code:
    Option Explicit 
    
    'Char. number range | UTF-8 octet sequence 
    '(Hexadecimal) | (binary) 
    '--------------------+---------------------------- ----------------- 
    "0000 0000-0000 007 F | 0xxxxxxx 
    '0000 0080-0000 07FF | 110xxxxx 10xxxxxx 
    '0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx 
    '0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 
    Public Function Encode_UTF8 (astr) 
    Dim c 
    Dim n 
    Dim utftext 
    
    utftext = "" 
    n = 1 
    Do While n <= Len (astr) 
    c = AscW (Mid (astr, n, 1)) 
    If c <128 Then 
    utftext = utftext + Chr (c) 
    ElseIf ((c> = 128) And (c <2048)) Then 
    utftext = utftext + Chr (((c \ 64) Or 192)) 
    utftext = utftext + Chr (((c And 63) Or 128)) 
    ElseIf ((c> = 2048) And (c <65536)) Then 
    utftext = utftext + Chr (((c \ 4096) Or 224)) 
    utftext = utftext + Chr ((((c \ 64) And 63) Or 128)) 
    utftext = utftext + Chr (((c And 63) Or 128)) 
    Else 'c> = 65536 
    utftext = utftext + Chr (((c \ 262144) Or 240)) 
    utftext = utftext + Chr(((((c \ 4096) And 63)) Or 128))
    utftext = utftext + Chr ((((c \ 64) And 63) Or 128)) 
    utftext = utftext + Chr (((c And 63) Or 128)) 
    End If 
    n = n + 1 
    Loop 
    Encode_UTF8 = utftext 
    End Function 
    
    'Char. number range | UTF-8 octet sequence 
    '(Hexadecimal) | (binary) 
    '--------------------+---------------------------- ----------------- 
    "0000 0000-0000 007 F | 0xxxxxxx 
    '0000 0080-0000 07FF | 110xxxxx 10xxxxxx 
    '0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx 
    '0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 
    Public Function Decode_UTF8 (astr) 
    Dim c0, c1, c2, c3 
    Dim n 
    Dim unitext 
    
    If isUTF8 (astr) = False Then 
    Decode_UTF8 = astr
    Exit Function 
    End If 
    
    unitext = "" 
    n = 1 
    Do While n <= Len (astr) 
    c0 = Asc (Mid (astr, n, 1)) 
    If n <= Len (astr) - 1 Then 
    c1 = Asc (Mid (astr, n + 1, 1)) 
    Else 
    c1 = 0 
    End If 
    If n <= Len (astr) - 2 Then 
    c2 = Asc (Mid (astr, n + 2, 1)) 
    Else 
    c2 = 0 
    End If 
    If n <= Len (astr) - 3 Then 
    c3 = Asc (Mid (astr, n + 3, 1)) 
    Else 
    c3 = 0 
    End If 
    
    If (c0 And 240) = 240 And (c1 And 128) = 128 And (c2 And 128) = 128 And (C3 and 128) = 128 Then 
    unitext = unitext + ChrW((c0 - 240) * 65536 + (c1 - 128) * 4096) + (c2 - 128) * 64 + (c3 - 128)
    n = n + 4 
    ElseIf (c0 And 224) = 224 And (c1 And 128) = 128 And (c2 And 128) = 128 Then 
    unitext = unitext + CHRW ((c0 - 224) * 4096 + (c1 - 128) * 64 + (c2 - 128)) 
    n = n + 3 
    ElseIf (c0 And 192) = 192 And (c1 And 128) = 128 Then 
    unitext = unitext + CHRW ((c0 - 192) * 64 + (c1 - 128)) 
    n = n + 2 
    ElseIf (c0 And 128) = 128 Then 
    unitext = unitext + ChrW(c0 And 127)
    n = n + 1 
    Else 'c0 <128 
    unitext = unitext + CHRW (c0) 
    n = n + 1 
    End If 
    Loop 
    
    Decode_UTF8 = unitext 
    End Function 
    
    'Char. number range | UTF-8 octet sequence 
    '(Hexadecimal) | (binary) 
    '--------------------+---------------------------- ----------------- 
    "0000 0000-0000 007 F | 0xxxxxxx 
    '0000 0080-0000 07FF | 110xxxxx 10xxxxxx 
    '0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx 
    '0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 
    Public Function isUTF8 (astr) 
    Dim c0, c1, c2, c3 
    Dim n 
    
    isUTF8 = True 
    n = 1 
    Do While n <= Len (astr) 
    c0 = Asc (Mid (astr, n, 1)) 
    If n <= Len (astr) - 1 Then 
    c1 = Asc (Mid (astr, n + 1, 1)) 
    Else 
    c1 = 0 
    End If 
    If n <= Len (astr) - 2 Then 
    c2 = Asc (Mid (astr, n + 2, 1)) 
    Else 
    c2 = 0 
    End If 
    If n <= Len (astr) - 3 Then 
    c3 = Asc (Mid (astr, n + 3, 1)) 
    Else 
    c3 = 0 
    End If 
    
    If (c0 And 240) = 240 Then 
    If (c1 And 128) = 128 And (c2 And 128) = 128 And (C3 and 128) = 128 Then 
    n = n + 4 
    Else 
    isUTF8 = False 
    Exit Function 
    End If 
    ElseIf (c0 And 224) = 224 Then 
    If (c1 And 128) = 128 And (c2 And 128) = 128 Then 
    n = n + 3 
    Else 
    isUTF8 = False 
    Exit Function 
    End If 
    ElseIf (c0 And 192) = 192 Then 
    If (c1 And 128) = 128 Then 
    n = n + 2 
    Else 
    isUTF8 = False 
    Exit Function 
    End If 
    ElseIf (c0 And 128) = 0 Then 
    n = n + 1 
    Else 
    isUTF8 = False 
    Exit Function 
    End If 
    Loop 
    End Function

  2. #2
    Join Date
    May 2008
    Posts
    685

    Re: Convert UTF-8 to ANSI

    A big thank you for this code, it was well repaired. Yes, you just save me some time! Thank you

  3. #3
    Join Date
    May 2008
    Posts
    271

    Re: Convert UTF-8 to ANSI

    A big thank you. That is exactly what I was looking for. Almost 3 years later (from my research), this code has been very useful. Thank you!

Similar Threads

  1. How to use ANSI.SYS in Windows 7
    By NamJam in forum Operating Systems
    Replies: 3
    Last Post: 23-12-2010, 06:22 AM
  2. Replies: 1
    Last Post: 16-07-2010, 12:27 PM
  3. Ansi.sys and Windows XP issue
    By AMISH in forum Windows Software
    Replies: 3
    Last Post: 13-07-2009, 07:07 PM
  4. Join SQL Traditional or ANSI
    By Deep23 in forum Software Development
    Replies: 2
    Last Post: 23-04-2009, 12:07 AM
  5. Replies: 1
    Last Post: 26-07-2008, 02:30 AM

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •