从PHP迁移到Java遇到的问题

有一段PHP代码是这样的：

1
2
3


$s='%Qw%e1%16%11%4';
$r = urldecode($s);
$result = md5($r);

乍看之下迁移到Java好像很容易嘛，可是实际操作了一下才发现，这个问题并不好解决。

为什么呢？

问题就出在PHP的urldecode函数，并不需要字符串是符合urlencode规范的。

比如我示例中的字符串，%Qw不是一个合理的十六进制数字，Java版本会报错，中止decode；但是PHP并不会，它把不符合条件的都当作单独的字符来解析，比如%Qw就不再是一个转义的字符了，而是三个单独的字符。

那么这么一段urldecode代码迁移到Java后就换成下述代码：

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127



package me.shrp.blog;

import java.io.UnsupportedEncodingException;
import java.util.LinkedList;
import java.util.List;

/**
 * Created by shangrenpeng on 16/2/18.
 */
public class PHPUrlDecoder {
    public static byte[] decode(String s, String enc)
            throws UnsupportedEncodingException {

        int numChars = s.length();
        List<Byte> sb = new LinkedList<>();
        int i = 0;

        if (enc.length() == 0) {
            throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter");
        }

        char c;
        byte[] bytes = null;
        while (i < numChars) {
            c = s.charAt(i);
            switch (c) {
                case '+':
                    addBytes(sb,String.valueOf(' ').getBytes("UTF-8"));
                    i++;
                    break;
                case '%':
                /*
                 * Starting with this instance of %, process all
                 * consecutive substrings of the form %xy. Each
                 * substring %xy will yield a byte. Convert all
                 * consecutive  bytes obtained this way to whatever
                 * character(s) they represent in the provided
                 * encoding.
                 */

                    try {
                        if (!canParse(s ,i)){
                            addBytes(sb,String.valueOf(c).getBytes("UTF-8"));
                            i++;
                            break;
                        }

                        // (numChars-i)/3 is an upper bound for the number
                        // of remaining bytes
                        if (bytes == null)
                            bytes = new byte[(numChars-i)/3];
                        int pos = 0;

                        while ( ((i+2) < numChars) &&
                                (c=='%')) {

                            int v = Integer.parseInt(s.substring(i+1,i+3),16);
//                            if (v < 0){
//                                throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape (%) pattern - negative value");
//                            }

                            bytes[pos++] = (byte) v;
                            i+= 3;
                            if (i < numChars)
                                c = s.charAt(i);
                        }
                        addBytes(sb, bytes,pos);
                    } catch (NumberFormatException e) {
                        throw new IllegalArgumentException(
                                "URLDecoder: Illegal hex characters in escape (%) pattern - "
                                        + e.getMessage());
                    }
                    break;
                default:
                    addBytes(sb,String.valueOf(c).getBytes("UTF-8"));
                    i++;
                    break;
            }
        }

        final Byte[] ret = sb.toArray(new Byte[sb.size()]);

        return toPrimitive(ret);
    }

    public static final byte[] EMPTY_BYTE_ARRAY = new byte[0];

    public static byte[] toPrimitive(Byte[] array) {
        if(array == null) {
            return null;
        } else if(array.length == 0) {
            return EMPTY_BYTE_ARRAY;
        } else {
            byte[] result = new byte[array.length];

            for(int i = 0; i < array.length; ++i) {
                result[i] = array[i].byteValue();
            }

            return result;
        }
    }

    private static void addBytes(final List<Byte> sb, final byte[] bytes,int pos) {
        for (int j = 0; j < pos; j++){
            sb.add(bytes[j]);
        }
    }

    private static void addBytes(final List<Byte> sb, final byte[] bytes) {
        for (int j = 0; j < bytes.length; j++){
            sb.add(bytes[j]);
        }
    }

    private static boolean canParse(final String s, final int i) {
        try {
            int v = Integer.parseInt(s.substring(i+1,i+3),16);
            return true;
        } catch (Exception e){
            return false;
        }
    }
}

文章目录