The below code checks if the BOM
character is present in a string. If present, removes and prints the string with skipped bom.
import java.io.UnsupportedEncodingException; /** * File: BOM.java * * check if the bom character is present in the given string print the string * after skipping the utf-8 bom characters print the string as utf-8 string on a * utf-8 console */ public class BOM { private final static String BOM_STRING = "Hello World"; private final static String ISO_ENCODING = "ISO-8859-1"; private final static String UTF8_ENCODING = "UTF-8"; private final static int UTF8_BOM_LENGTH = 3; public static void main(String[] args) throws UnsupportedEncodingException { final byte[] bytes = BOM_STRING.getBytes(ISO_ENCODING); if (isUTF8(bytes)) { printSkippedBomString(bytes); printUTF8String(bytes); } } private static void printSkippedBomString(final byte[] bytes) throws UnsupportedEncodingException { int length = bytes.length - UTF8_BOM_LENGTH; byte[] barray = new byte[length]; System.arraycopy(bytes, UTF8_BOM_LENGTH, barray, 0, barray.length); System.out.println(new String(barray, ISO_ENCODING)); } private static void printUTF8String(final byte[] bytes) throws UnsupportedEncodingException { System.out.println(new String(bytes, UTF8_ENCODING)); } private static boolean isUTF8(byte[] bytes) { if ((bytes[0] & 0xFF) == 0xEF && (bytes[1] & 0xFF) == 0xBB && (bytes[2] & 0xFF) == 0xBF) { return true; } return false; } }
The following stackoverflow article talks in detail about detecting and skipping the BOM
No comments :
Post a Comment