package net.argius.frui.io;

import java.io.*;

/**
 * {GR[fBO^CvoB
 */
public class JapaneseEncodingTypeDetection extends EncodingTypeDetection {

    private static final String ISO8859_1 = "ISO8859-1";
    private static final String ISO2022JP = "ISO2022JP";
    private static final String EUC_JP = "EUC_JP";
    private static final String SHIFT_JIS = "Shift_JIS";
    private static final String MS932 = "MS932";
    private static final String UTF_8 = "UTF-8";
    private static final String UTF_16 = "UTF-16";

    private static final int LIMIT = 2048 * 1024;

    /* (overridden)
     * @see net.argius.frui.io.EncodingTypeDetection#detect(java.io.InputStream)
     */
    public EncodingType detect(InputStream is) throws IOException {
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        boolean isOverLimit = false;
        final int bufferSize = 0x10000;
        byte[] buffer = new byte[bufferSize];
        for (int length; (length = is.read(buffer)) >= 0;) {
            if (bos.size() + length > LIMIT) {
                isOverLimit = true;
                break;
            }
            bos.write(buffer, 0, length);
        }
        bos.flush();
        byte[] bytes = bos.toByteArray();
        if (isAscii(bytes)) {
            return new EncodingType(ISO8859_1, true);
        }
        String[] encs = {ISO2022JP, EUC_JP, SHIFT_JIS, MS932, UTF_8, UTF_16,};
        for (int i = 0; i < encs.length; i++) {
            String enc = encs[i];
            String s = new String(bytes, enc);
            if (isOverLimit) {
                s = s.substring(0, s.length() - 16);
            }
            if (!hasUnassignedCode(s)) {
                return new EncodingType(enc, true);
            }
        }
        return EncodingType.DEFAULT;
    }

    /**
     * f[^AXL[R[hǂ𒲍B
     * @param bytes oCgz
     * @return f[^AXL[R[hȂ<code>true</code>AłȂ<code>false</code>
     */
    private boolean isAscii(byte[] bytes) {
        for (int i = 0; i < bytes.length; i++) {
            byte b = bytes[i];
            if ((b < 0x20 && b != 0x09 && b != 0x0A && b != 0x0C && b != 0x0D)
                || 0x7E < b) {
                return false;
            }
        }
        return true;
    }

    /**
     * 蓖ĂȂ邩ǂ𒲍B
     * @param cs V[PX
     * @return 蓖ĂȂȂ<code>true</code>AłȂ<code>false</code>
     */
    private static boolean hasUnassignedCode(CharSequence cs) {
        for (int i = 0, n = cs.length(); i < n; i++) {
            char c = cs.charAt(i);
            if (c < 0x0020
                && c != 0x0009
                && c != 0x000A
                && c != 0x000C
                && c != 0x000D) {
                return true;
            }
            if (c == REPLACEMENT_CHARACTER) {
                return true;
            }
        }
        return false;
    }

}