I needed to convert UTF-8 to ANSI and after a lot of research I have found nothing conclusive, so I scrambled from the RFC. I put the code here in the hope that it will one day be used to someone. Source code free.
Test code:
Code:
Sub main ()
Debug.Print Encode_UTF8("œ" )
Debug.Print Decode_UTF8 (Encode_UTF8 ( "œ"))
Debug.Print Decode_UTF8("éa" )
Debug.Print isUTF8("éa" )
Debug.Print isUTF8("abcde" )
End Sub
Main code:
Code:
Option Explicit
'Char. number range | UTF-8 octet sequence
'(Hexadecimal) | (binary)
'--------------------+---------------------------- -----------------
"0000 0000-0000 007 F | 0xxxxxxx
'0000 0080-0000 07FF | 110xxxxx 10xxxxxx
'0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
'0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
Public Function Encode_UTF8 (astr)
Dim c
Dim n
Dim utftext
utftext = ""
n = 1
Do While n <= Len (astr)
c = AscW (Mid (astr, n, 1))
If c <128 Then
utftext = utftext + Chr (c)
ElseIf ((c> = 128) And (c <2048)) Then
utftext = utftext + Chr (((c \ 64) Or 192))
utftext = utftext + Chr (((c And 63) Or 128))
ElseIf ((c> = 2048) And (c <65536)) Then
utftext = utftext + Chr (((c \ 4096) Or 224))
utftext = utftext + Chr ((((c \ 64) And 63) Or 128))
utftext = utftext + Chr (((c And 63) Or 128))
Else 'c> = 65536
utftext = utftext + Chr (((c \ 262144) Or 240))
utftext = utftext + Chr(((((c \ 4096) And 63)) Or 128))
utftext = utftext + Chr ((((c \ 64) And 63) Or 128))
utftext = utftext + Chr (((c And 63) Or 128))
End If
n = n + 1
Loop
Encode_UTF8 = utftext
End Function
'Char. number range | UTF-8 octet sequence
'(Hexadecimal) | (binary)
'--------------------+---------------------------- -----------------
"0000 0000-0000 007 F | 0xxxxxxx
'0000 0080-0000 07FF | 110xxxxx 10xxxxxx
'0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
'0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
Public Function Decode_UTF8 (astr)
Dim c0, c1, c2, c3
Dim n
Dim unitext
If isUTF8 (astr) = False Then
Decode_UTF8 = astr
Exit Function
End If
unitext = ""
n = 1
Do While n <= Len (astr)
c0 = Asc (Mid (astr, n, 1))
If n <= Len (astr) - 1 Then
c1 = Asc (Mid (astr, n + 1, 1))
Else
c1 = 0
End If
If n <= Len (astr) - 2 Then
c2 = Asc (Mid (astr, n + 2, 1))
Else
c2 = 0
End If
If n <= Len (astr) - 3 Then
c3 = Asc (Mid (astr, n + 3, 1))
Else
c3 = 0
End If
If (c0 And 240) = 240 And (c1 And 128) = 128 And (c2 And 128) = 128 And (C3 and 128) = 128 Then
unitext = unitext + ChrW((c0 - 240) * 65536 + (c1 - 128) * 4096) + (c2 - 128) * 64 + (c3 - 128)
n = n + 4
ElseIf (c0 And 224) = 224 And (c1 And 128) = 128 And (c2 And 128) = 128 Then
unitext = unitext + CHRW ((c0 - 224) * 4096 + (c1 - 128) * 64 + (c2 - 128))
n = n + 3
ElseIf (c0 And 192) = 192 And (c1 And 128) = 128 Then
unitext = unitext + CHRW ((c0 - 192) * 64 + (c1 - 128))
n = n + 2
ElseIf (c0 And 128) = 128 Then
unitext = unitext + ChrW(c0 And 127)
n = n + 1
Else 'c0 <128
unitext = unitext + CHRW (c0)
n = n + 1
End If
Loop
Decode_UTF8 = unitext
End Function
'Char. number range | UTF-8 octet sequence
'(Hexadecimal) | (binary)
'--------------------+---------------------------- -----------------
"0000 0000-0000 007 F | 0xxxxxxx
'0000 0080-0000 07FF | 110xxxxx 10xxxxxx
'0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
'0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
Public Function isUTF8 (astr)
Dim c0, c1, c2, c3
Dim n
isUTF8 = True
n = 1
Do While n <= Len (astr)
c0 = Asc (Mid (astr, n, 1))
If n <= Len (astr) - 1 Then
c1 = Asc (Mid (astr, n + 1, 1))
Else
c1 = 0
End If
If n <= Len (astr) - 2 Then
c2 = Asc (Mid (astr, n + 2, 1))
Else
c2 = 0
End If
If n <= Len (astr) - 3 Then
c3 = Asc (Mid (astr, n + 3, 1))
Else
c3 = 0
End If
If (c0 And 240) = 240 Then
If (c1 And 128) = 128 And (c2 And 128) = 128 And (C3 and 128) = 128 Then
n = n + 4
Else
isUTF8 = False
Exit Function
End If
ElseIf (c0 And 224) = 224 Then
If (c1 And 128) = 128 And (c2 And 128) = 128 Then
n = n + 3
Else
isUTF8 = False
Exit Function
End If
ElseIf (c0 And 192) = 192 Then
If (c1 And 128) = 128 Then
n = n + 2
Else
isUTF8 = False
Exit Function
End If
ElseIf (c0 And 128) = 0 Then
n = n + 1
Else
isUTF8 = False
Exit Function
End If
Loop
End Function
Bookmarks