Back on 3/28/2008, we published a blog titled
How to check for hexidecimal characters. I used this code as a basis to write a method to ensure only valid UTF-8 characters are in a string. If invalid characters are in an XML document, the document cannot be consumed by an application and can cause a complete website outage (depending on how it's used). The following method looks for invalid XML characters.
1private static bool IsValidString(string input)
2{
3 try
4 {
5 //Trim input string
6 input = input.Trim();
7
8 //If blank, no need to validate
9 if (input.Length == 0)
10 return true;
11
12 //Loop through characters
13 foreach (char currentChar in input)
14 {
15 if (currentChar == 0x9 || // \t = 9
16 currentChar == 0xA || // \n = 10
17 currentChar == 0xD || // \r = 13
18 (currentChar >= 0x20 && currentChar <= 0xD7FF) ||
19 (currentChar >= 0xE000 && currentChar <= 0xFFFD) ||
20 (currentChar >= 0x10000 && currentChar <= 0x10FFFF))
21 {
22 //Valid character
23 }
24 else
25 return false;
26 }
27 }
28 catch
29 {
30 return false;
31 }
32 return true;
33}
34
Comments
Leave a Comment