【注意】最后更新于 June 12, 2021,文中内容可能已过时,请谨慎使用。
有一段PHP代码是这样的:
1
2
3
|
$s='%Qw%e1%16%11%4';
$r = urldecode($s);
$result = md5($r);
|
乍看之下迁移到Java好像很容易嘛,可是实际操作了一下才发现,这个问题并不好解决。
为什么呢?
问题就出在PHP的urldecode函数,并不需要字符串是符合urlencode规范的。
比如我示例中的字符串,%Qw不是一个合理的十六进制数字,Java版本会报错,中止decode;但是PHP并不会,它把不符合条件的都当作单独的字符来解析,比如%Qw就不再是一个转义的字符了,而是三个单独的字符。
那么这么一段urldecode代码迁移到Java后就换成下述代码:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
package me.shrp.blog;
import java.io.UnsupportedEncodingException;
import java.util.LinkedList;
import java.util.List;
/**
* Created by shangrenpeng on 16/2/18.
*/
public class PHPUrlDecoder {
public static byte[] decode(String s, String enc)
throws UnsupportedEncodingException {
int numChars = s.length();
List<Byte> sb = new LinkedList<>();
int i = 0;
if (enc.length() == 0) {
throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter");
}
char c;
byte[] bytes = null;
while (i < numChars) {
c = s.charAt(i);
switch (c) {
case '+':
addBytes(sb,String.valueOf(' ').getBytes("UTF-8"));
i++;
break;
case '%':
/*
* Starting with this instance of %, process all
* consecutive substrings of the form %xy. Each
* substring %xy will yield a byte. Convert all
* consecutive bytes obtained this way to whatever
* character(s) they represent in the provided
* encoding.
*/
try {
if (!canParse(s ,i)){
addBytes(sb,String.valueOf(c).getBytes("UTF-8"));
i++;
break;
}
// (numChars-i)/3 is an upper bound for the number
// of remaining bytes
if (bytes == null)
bytes = new byte[(numChars-i)/3];
int pos = 0;
while ( ((i+2) < numChars) &&
(c=='%')) {
int v = Integer.parseInt(s.substring(i+1,i+3),16);
// if (v < 0){
// throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape (%) pattern - negative value");
// }
bytes[pos++] = (byte) v;
i+= 3;
if (i < numChars)
c = s.charAt(i);
}
addBytes(sb, bytes,pos);
} catch (NumberFormatException e) {
throw new IllegalArgumentException(
"URLDecoder: Illegal hex characters in escape (%) pattern - "
+ e.getMessage());
}
break;
default:
addBytes(sb,String.valueOf(c).getBytes("UTF-8"));
i++;
break;
}
}
final Byte[] ret = sb.toArray(new Byte[sb.size()]);
return toPrimitive(ret);
}
public static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
public static byte[] toPrimitive(Byte[] array) {
if(array == null) {
return null;
} else if(array.length == 0) {
return EMPTY_BYTE_ARRAY;
} else {
byte[] result = new byte[array.length];
for(int i = 0; i < array.length; ++i) {
result[i] = array[i].byteValue();
}
return result;
}
}
private static void addBytes(final List<Byte> sb, final byte[] bytes,int pos) {
for (int j = 0; j < pos; j++){
sb.add(bytes[j]);
}
}
private static void addBytes(final List<Byte> sb, final byte[] bytes) {
for (int j = 0; j < bytes.length; j++){
sb.add(bytes[j]);
}
}
private static boolean canParse(final String s, final int i) {
try {
int v = Integer.parseInt(s.substring(i+1,i+3),16);
return true;
} catch (Exception e){
return false;
}
}
}
|
文章作者
尚弟
上次更新
2021-06-12
(7e4096a)
许可协议
© Copyright 2021 by 尚弟