You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1341 lines
39 KiB
1341 lines
39 KiB
|
|
import java.io.IOException;
|
|
|
|
import org.jruby.Ruby;
|
|
import org.jruby.RubyClass;
|
|
import org.jruby.RubyHash;
|
|
import org.jruby.RubyModule;
|
|
import org.jruby.RubyNumeric;
|
|
import org.jruby.RubyString;
|
|
import org.jruby.runtime.Block;
|
|
import org.jruby.runtime.CallbackFactory;
|
|
import org.jruby.runtime.builtin.IRubyObject;
|
|
import org.jruby.exceptions.RaiseException;
|
|
import org.jruby.runtime.load.BasicLibraryService;
|
|
|
|
public class HpricotScanService implements BasicLibraryService {
|
|
public static String NO_WAY_SERIOUSLY="*** This should not happen, please send a bug report with the HTML you're parsing to why@whytheluckystiff.net. So sorry!";
|
|
|
|
public void ELE(IRubyObject N) {
|
|
if (tokend > tokstart || text) {
|
|
IRubyObject raw_string = runtime.getNil();
|
|
ele_open = false; text = false;
|
|
if (tokstart != -1 && N != cdata && N != sym_text && N != procins && N != comment) {
|
|
raw_string = runtime.newString(new String(buf,tokstart,tokend-tokstart));
|
|
}
|
|
rb_yield_tokens(N, tag[0], attr, raw_string, taint);
|
|
}
|
|
}
|
|
|
|
public void SET(IRubyObject[] N, int E) {
|
|
int mark = 0;
|
|
if(N == tag) {
|
|
if(mark_tag == -1 || E == mark_tag) {
|
|
tag[0] = runtime.newString("");
|
|
} else if(E > mark_tag) {
|
|
tag[0] = runtime.newString(new String(buf,mark_tag, E-mark_tag));
|
|
}
|
|
} else if(N == akey) {
|
|
if(mark_akey == -1 || E == mark_akey) {
|
|
akey[0] = runtime.newString("");
|
|
} else if(E > mark_akey) {
|
|
akey[0] = runtime.newString(new String(buf,mark_akey, E-mark_akey));
|
|
}
|
|
} else if(N == aval) {
|
|
if(mark_aval == -1 || E == mark_aval) {
|
|
aval[0] = runtime.newString("");
|
|
} else if(E > mark_aval) {
|
|
aval[0] = runtime.newString(new String(buf,mark_aval, E-mark_aval));
|
|
}
|
|
}
|
|
}
|
|
|
|
public void CAT(IRubyObject[] N, int E) {
|
|
if(N[0].isNil()) {
|
|
SET(N,E);
|
|
} else {
|
|
int mark = 0;
|
|
if(N == tag) {
|
|
mark = mark_tag;
|
|
} else if(N == akey) {
|
|
mark = mark_akey;
|
|
} else if(N == aval) {
|
|
mark = mark_aval;
|
|
}
|
|
((RubyString)(N[0])).append(runtime.newString(new String(buf, mark, E-mark)));
|
|
}
|
|
}
|
|
|
|
public void SLIDE(Object N) {
|
|
int mark = 0;
|
|
if(N == tag) {
|
|
mark = mark_tag;
|
|
} else if(N == akey) {
|
|
mark = mark_akey;
|
|
} else if(N == aval) {
|
|
mark = mark_aval;
|
|
}
|
|
if(mark > tokstart) {
|
|
if(N == tag) {
|
|
mark_tag -= tokstart;
|
|
} else if(N == akey) {
|
|
mark_akey -= tokstart;
|
|
} else if(N == aval) {
|
|
mark_aval -= tokstart;
|
|
}
|
|
}
|
|
}
|
|
|
|
public void ATTR(IRubyObject K, IRubyObject V) {
|
|
if(!K.isNil()) {
|
|
if(attr.isNil()) {
|
|
attr = RubyHash.newHash(runtime);
|
|
}
|
|
((RubyHash)attr).aset(K,V);
|
|
}
|
|
}
|
|
|
|
public void ATTR(IRubyObject[] K, IRubyObject V) {
|
|
ATTR(K[0],V);
|
|
}
|
|
|
|
public void ATTR(IRubyObject K, IRubyObject[] V) {
|
|
ATTR(K,V[0]);
|
|
}
|
|
|
|
public void ATTR(IRubyObject[] K, IRubyObject[] V) {
|
|
ATTR(K[0],V[0]);
|
|
}
|
|
|
|
public void TEXT_PASS() {
|
|
if(!text) {
|
|
if(ele_open) {
|
|
ele_open = false;
|
|
if(tokstart > -1) {
|
|
mark_tag = tokstart;
|
|
}
|
|
} else {
|
|
mark_tag = p;
|
|
}
|
|
attr = runtime.getNil();
|
|
tag[0] = runtime.getNil();
|
|
text = true;
|
|
}
|
|
}
|
|
|
|
public void EBLK(IRubyObject N, int T) {
|
|
CAT(tag, p - T + 1);
|
|
ELE(N);
|
|
}
|
|
|
|
|
|
public void rb_raise(RubyClass error, String message) {
|
|
throw new RaiseException(runtime, error, message, true);
|
|
}
|
|
|
|
public IRubyObject rb_str_new2(String s) {
|
|
return runtime.newString(s);
|
|
}
|
|
|
|
|
|
|
|
|
|
static final byte[] _hpricot_scan_actions = {
|
|
0, 1, 1, 1, 2, 1, 4, 1,
|
|
5, 1, 6, 1, 7, 1, 8, 1,
|
|
9, 1, 10, 1, 11, 1, 12, 1,
|
|
14, 1, 16, 1, 20, 1, 21, 1,
|
|
22, 1, 24, 1, 25, 1, 26, 1,
|
|
28, 1, 29, 1, 30, 1, 32, 1,
|
|
33, 1, 38, 1, 39, 1, 40, 1,
|
|
41, 1, 42, 1, 43, 1, 44, 1,
|
|
45, 1, 46, 1, 47, 1, 48, 1,
|
|
49, 1, 50, 2, 2, 5, 2, 2,
|
|
6, 2, 2, 11, 2, 2, 12, 2,
|
|
2, 14, 2, 4, 39, 2, 4, 40,
|
|
2, 4, 41, 2, 5, 2, 2, 6,
|
|
14, 2, 7, 6, 2, 7, 14, 2,
|
|
11, 12, 2, 13, 3, 2, 14, 6,
|
|
2, 14, 40, 2, 15, 24, 2, 15,
|
|
28, 2, 15, 32, 2, 15, 45, 2,
|
|
17, 23, 2, 18, 27, 2, 19, 31,
|
|
2, 22, 34, 2, 22, 36, 3, 2,
|
|
6, 14, 3, 2, 14, 6, 3, 6,
|
|
7, 14, 3, 6, 14, 40, 3, 7,
|
|
14, 40, 3, 14, 6, 40, 3, 14,
|
|
13, 3, 3, 22, 0, 37, 3, 22,
|
|
2, 34, 3, 22, 14, 35, 4, 2,
|
|
14, 13, 3, 4, 6, 7, 14, 40,
|
|
4, 22, 2, 14, 35, 4, 22, 6,
|
|
14, 35, 4, 22, 7, 14, 35, 4,
|
|
22, 14, 6, 35, 5, 22, 2, 6,
|
|
14, 35, 5, 22, 2, 14, 6, 35,
|
|
5, 22, 6, 7, 14, 35
|
|
};
|
|
|
|
static final short[] _hpricot_scan_key_offsets = {
|
|
0, 3, 4, 5, 6, 7, 8, 9,
|
|
10, 13, 22, 37, 44, 45, 46, 47,
|
|
48, 49, 52, 57, 69, 81, 86, 93,
|
|
94, 95, 100, 101, 105, 106, 107, 121,
|
|
135, 152, 169, 186, 203, 210, 212, 214,
|
|
220, 222, 227, 232, 238, 240, 245, 251,
|
|
265, 266, 267, 268, 269, 270, 271, 272,
|
|
273, 274, 275, 276, 282, 296, 300, 313,
|
|
326, 340, 354, 355, 366, 375, 388, 405,
|
|
423, 441, 450, 461, 480, 499, 510, 521,
|
|
536, 538, 540, 556, 572, 575, 587, 599,
|
|
619, 639, 658, 677, 697, 717, 728, 739,
|
|
751, 763, 775, 791, 794, 809, 811, 813,
|
|
829, 845, 848, 860, 871, 890, 910, 930,
|
|
941, 952, 964, 984, 1004, 1016, 1036, 1057,
|
|
1074, 1091, 1095, 1098, 1110, 1122, 1142, 1162,
|
|
1182, 1194, 1206, 1226, 1242, 1258, 1270, 1291,
|
|
1310, 1313, 1328, 1340, 1355, 1358, 1369, 1371,
|
|
1373, 1384, 1391, 1404, 1418, 1432, 1445, 1446,
|
|
1447, 1448, 1449, 1450, 1451, 1455, 1460, 1469,
|
|
1479, 1484, 1491, 1492, 1493, 1494, 1495, 1496,
|
|
1497, 1498, 1499, 1503, 1508, 1512, 1522, 1527,
|
|
1533, 1534, 1535, 1536, 1537, 1538, 1539, 1540,
|
|
1541, 1542, 1546, 1551, 1553, 1554, 1555, 1560,
|
|
1561, 1562, 1564, 1565, 1566, 1567, 1568, 1572,
|
|
1582, 1591, 1601, 1602, 1603, 1605, 1614, 1615,
|
|
1616, 1617, 1619, 1621, 1624, 1627, 1631, 1633,
|
|
1634, 1636, 1637, 1640
|
|
};
|
|
|
|
static final char[] _hpricot_scan_trans_keys = {
|
|
45, 68, 91, 45, 79, 67, 84, 89,
|
|
80, 69, 32, 9, 13, 32, 58, 95,
|
|
9, 13, 65, 90, 97, 122, 32, 62,
|
|
63, 91, 95, 9, 13, 45, 46, 48,
|
|
58, 65, 90, 97, 122, 32, 62, 80,
|
|
83, 91, 9, 13, 85, 66, 76, 73,
|
|
67, 32, 9, 13, 32, 34, 39, 9,
|
|
13, 9, 34, 61, 95, 32, 37, 39,
|
|
59, 63, 90, 97, 122, 9, 34, 61,
|
|
95, 32, 37, 39, 59, 63, 90, 97,
|
|
122, 32, 62, 91, 9, 13, 32, 34,
|
|
39, 62, 91, 9, 13, 34, 34, 32,
|
|
62, 91, 9, 13, 93, 32, 62, 9,
|
|
13, 39, 39, 9, 39, 61, 95, 32,
|
|
33, 35, 37, 40, 59, 63, 90, 97,
|
|
122, 9, 39, 61, 95, 32, 33, 35,
|
|
37, 40, 59, 63, 90, 97, 122, 9,
|
|
32, 33, 39, 62, 91, 95, 10, 13,
|
|
35, 37, 40, 59, 61, 90, 97, 122,
|
|
9, 32, 34, 39, 62, 91, 95, 10,
|
|
13, 33, 37, 40, 59, 61, 90, 97,
|
|
122, 9, 32, 33, 39, 62, 91, 95,
|
|
10, 13, 35, 37, 40, 59, 61, 90,
|
|
97, 122, 9, 32, 34, 39, 62, 91,
|
|
95, 10, 13, 33, 37, 40, 59, 61,
|
|
90, 97, 122, 32, 34, 39, 62, 91,
|
|
9, 13, 34, 39, 34, 39, 32, 39,
|
|
62, 91, 9, 13, 39, 93, 32, 62,
|
|
93, 9, 13, 32, 39, 62, 9, 13,
|
|
32, 34, 62, 91, 9, 13, 34, 93,
|
|
32, 34, 62, 9, 13, 32, 39, 62,
|
|
91, 9, 13, 9, 39, 61, 95, 32,
|
|
33, 35, 37, 40, 59, 63, 90, 97,
|
|
122, 89, 83, 84, 69, 77, 67, 68,
|
|
65, 84, 65, 91, 58, 95, 65, 90,
|
|
97, 122, 32, 62, 63, 95, 9, 13,
|
|
45, 46, 48, 58, 65, 90, 97, 122,
|
|
32, 62, 9, 13, 32, 47, 62, 63,
|
|
95, 9, 13, 45, 58, 65, 90, 97,
|
|
122, 32, 47, 62, 63, 95, 9, 13,
|
|
45, 58, 65, 90, 97, 122, 32, 47,
|
|
61, 62, 63, 95, 9, 13, 45, 58,
|
|
65, 90, 97, 122, 32, 47, 61, 62,
|
|
63, 95, 9, 13, 45, 58, 65, 90,
|
|
97, 122, 62, 13, 32, 34, 39, 47,
|
|
60, 62, 9, 10, 11, 12, 13, 32,
|
|
47, 60, 62, 9, 10, 11, 12, 32,
|
|
47, 62, 63, 95, 9, 13, 45, 58,
|
|
65, 90, 97, 122, 13, 32, 47, 60,
|
|
62, 63, 95, 9, 10, 11, 12, 45,
|
|
58, 65, 90, 97, 122, 13, 32, 47,
|
|
60, 61, 62, 63, 95, 9, 10, 11,
|
|
12, 45, 58, 65, 90, 97, 122, 13,
|
|
32, 47, 60, 61, 62, 63, 95, 9,
|
|
10, 11, 12, 45, 58, 65, 90, 97,
|
|
122, 13, 32, 47, 60, 62, 9, 10,
|
|
11, 12, 13, 32, 34, 39, 47, 60,
|
|
62, 9, 10, 11, 12, 13, 32, 34,
|
|
39, 47, 60, 62, 63, 95, 9, 10,
|
|
11, 12, 45, 58, 65, 90, 97, 122,
|
|
13, 32, 34, 39, 47, 60, 62, 63,
|
|
95, 9, 10, 11, 12, 45, 58, 65,
|
|
90, 97, 122, 13, 32, 34, 47, 60,
|
|
62, 92, 9, 10, 11, 12, 13, 32,
|
|
34, 47, 60, 62, 92, 9, 10, 11,
|
|
12, 32, 34, 47, 62, 63, 92, 95,
|
|
9, 13, 45, 58, 65, 90, 97, 122,
|
|
34, 92, 34, 92, 32, 34, 47, 61,
|
|
62, 63, 92, 95, 9, 13, 45, 58,
|
|
65, 90, 97, 122, 32, 34, 47, 61,
|
|
62, 63, 92, 95, 9, 13, 45, 58,
|
|
65, 90, 97, 122, 34, 62, 92, 13,
|
|
32, 34, 39, 47, 60, 62, 92, 9,
|
|
10, 11, 12, 13, 32, 34, 39, 47,
|
|
60, 62, 92, 9, 10, 11, 12, 13,
|
|
32, 34, 39, 47, 60, 62, 63, 92,
|
|
95, 9, 10, 11, 12, 45, 58, 65,
|
|
90, 97, 122, 13, 32, 34, 39, 47,
|
|
60, 62, 63, 92, 95, 9, 10, 11,
|
|
12, 45, 58, 65, 90, 97, 122, 13,
|
|
32, 34, 47, 60, 62, 63, 92, 95,
|
|
9, 10, 11, 12, 45, 58, 65, 90,
|
|
97, 122, 13, 32, 34, 47, 60, 62,
|
|
63, 92, 95, 9, 10, 11, 12, 45,
|
|
58, 65, 90, 97, 122, 13, 32, 34,
|
|
47, 60, 61, 62, 63, 92, 95, 9,
|
|
10, 11, 12, 45, 58, 65, 90, 97,
|
|
122, 13, 32, 34, 47, 60, 61, 62,
|
|
63, 92, 95, 9, 10, 11, 12, 45,
|
|
58, 65, 90, 97, 122, 13, 32, 34,
|
|
47, 60, 62, 92, 9, 10, 11, 12,
|
|
13, 32, 34, 47, 60, 62, 92, 9,
|
|
10, 11, 12, 13, 32, 34, 39, 47,
|
|
60, 62, 92, 9, 10, 11, 12, 13,
|
|
32, 34, 39, 47, 60, 62, 92, 9,
|
|
10, 11, 12, 13, 32, 34, 39, 47,
|
|
60, 62, 92, 9, 10, 11, 12, 32,
|
|
34, 39, 47, 62, 63, 92, 95, 9,
|
|
13, 45, 58, 65, 90, 97, 122, 34,
|
|
39, 92, 32, 39, 47, 62, 63, 92,
|
|
95, 9, 13, 45, 58, 65, 90, 97,
|
|
122, 39, 92, 39, 92, 32, 39, 47,
|
|
61, 62, 63, 92, 95, 9, 13, 45,
|
|
58, 65, 90, 97, 122, 32, 39, 47,
|
|
61, 62, 63, 92, 95, 9, 13, 45,
|
|
58, 65, 90, 97, 122, 39, 62, 92,
|
|
13, 32, 34, 39, 47, 60, 62, 92,
|
|
9, 10, 11, 12, 13, 32, 39, 47,
|
|
60, 62, 92, 9, 10, 11, 12, 13,
|
|
32, 39, 47, 60, 62, 63, 92, 95,
|
|
9, 10, 11, 12, 45, 58, 65, 90,
|
|
97, 122, 13, 32, 39, 47, 60, 61,
|
|
62, 63, 92, 95, 9, 10, 11, 12,
|
|
45, 58, 65, 90, 97, 122, 13, 32,
|
|
39, 47, 60, 61, 62, 63, 92, 95,
|
|
9, 10, 11, 12, 45, 58, 65, 90,
|
|
97, 122, 13, 32, 39, 47, 60, 62,
|
|
92, 9, 10, 11, 12, 13, 32, 39,
|
|
47, 60, 62, 92, 9, 10, 11, 12,
|
|
13, 32, 34, 39, 47, 60, 62, 92,
|
|
9, 10, 11, 12, 13, 32, 34, 39,
|
|
47, 60, 62, 63, 92, 95, 9, 10,
|
|
11, 12, 45, 58, 65, 90, 97, 122,
|
|
13, 32, 34, 39, 47, 60, 62, 63,
|
|
92, 95, 9, 10, 11, 12, 45, 58,
|
|
65, 90, 97, 122, 13, 32, 34, 39,
|
|
47, 60, 62, 92, 9, 10, 11, 12,
|
|
13, 32, 34, 39, 47, 60, 62, 63,
|
|
92, 95, 9, 10, 11, 12, 45, 58,
|
|
65, 90, 97, 122, 13, 32, 34, 39,
|
|
47, 60, 61, 62, 63, 92, 95, 9,
|
|
10, 11, 12, 45, 58, 65, 90, 97,
|
|
122, 32, 34, 39, 47, 61, 62, 63,
|
|
92, 95, 9, 13, 45, 58, 65, 90,
|
|
97, 122, 32, 34, 39, 47, 61, 62,
|
|
63, 92, 95, 9, 13, 45, 58, 65,
|
|
90, 97, 122, 34, 39, 62, 92, 34,
|
|
39, 92, 13, 32, 34, 39, 47, 60,
|
|
62, 92, 9, 10, 11, 12, 13, 32,
|
|
34, 39, 47, 60, 62, 92, 9, 10,
|
|
11, 12, 13, 32, 34, 39, 47, 60,
|
|
62, 63, 92, 95, 9, 10, 11, 12,
|
|
45, 58, 65, 90, 97, 122, 13, 32,
|
|
34, 39, 47, 60, 62, 63, 92, 95,
|
|
9, 10, 11, 12, 45, 58, 65, 90,
|
|
97, 122, 13, 32, 34, 39, 47, 60,
|
|
62, 63, 92, 95, 9, 10, 11, 12,
|
|
45, 58, 65, 90, 97, 122, 13, 32,
|
|
34, 39, 47, 60, 62, 92, 9, 10,
|
|
11, 12, 13, 32, 34, 39, 47, 60,
|
|
62, 92, 9, 10, 11, 12, 13, 32,
|
|
34, 39, 47, 60, 62, 63, 92, 95,
|
|
9, 10, 11, 12, 45, 58, 65, 90,
|
|
97, 122, 32, 34, 39, 47, 62, 63,
|
|
92, 95, 9, 13, 45, 58, 65, 90,
|
|
97, 122, 32, 34, 39, 47, 62, 63,
|
|
92, 95, 9, 13, 45, 58, 65, 90,
|
|
97, 122, 13, 32, 34, 39, 47, 60,
|
|
62, 92, 9, 10, 11, 12, 13, 32,
|
|
34, 39, 47, 60, 61, 62, 63, 92,
|
|
95, 9, 10, 11, 12, 45, 58, 65,
|
|
90, 97, 122, 13, 32, 39, 47, 60,
|
|
62, 63, 92, 95, 9, 10, 11, 12,
|
|
45, 58, 65, 90, 97, 122, 34, 39,
|
|
92, 32, 39, 47, 62, 63, 92, 95,
|
|
9, 13, 45, 58, 65, 90, 97, 122,
|
|
13, 32, 34, 39, 47, 60, 62, 92,
|
|
9, 10, 11, 12, 32, 34, 47, 62,
|
|
63, 92, 95, 9, 13, 45, 58, 65,
|
|
90, 97, 122, 34, 39, 92, 13, 32,
|
|
39, 47, 60, 62, 92, 9, 10, 11,
|
|
12, 34, 92, 39, 92, 13, 32, 34,
|
|
39, 47, 60, 62, 9, 10, 11, 12,
|
|
58, 95, 120, 65, 90, 97, 122, 32,
|
|
63, 95, 9, 13, 45, 46, 48, 58,
|
|
65, 90, 97, 122, 32, 63, 95, 109,
|
|
9, 13, 45, 46, 48, 58, 65, 90,
|
|
97, 122, 32, 63, 95, 108, 9, 13,
|
|
45, 46, 48, 58, 65, 90, 97, 122,
|
|
32, 63, 95, 9, 13, 45, 46, 48,
|
|
58, 65, 90, 97, 122, 101, 114, 115,
|
|
105, 111, 110, 32, 61, 9, 13, 32,
|
|
34, 39, 9, 13, 95, 45, 46, 48,
|
|
58, 65, 90, 97, 122, 34, 95, 45,
|
|
46, 48, 58, 65, 90, 97, 122, 32,
|
|
62, 63, 9, 13, 32, 62, 63, 101,
|
|
115, 9, 13, 62, 110, 99, 111, 100,
|
|
105, 110, 103, 32, 61, 9, 13, 32,
|
|
34, 39, 9, 13, 65, 90, 97, 122,
|
|
34, 95, 45, 46, 48, 57, 65, 90,
|
|
97, 122, 32, 62, 63, 9, 13, 32,
|
|
62, 63, 115, 9, 13, 116, 97, 110,
|
|
100, 97, 108, 111, 110, 101, 32, 61,
|
|
9, 13, 32, 34, 39, 9, 13, 110,
|
|
121, 111, 34, 32, 62, 63, 9, 13,
|
|
101, 115, 110, 121, 111, 39, 101, 115,
|
|
65, 90, 97, 122, 39, 95, 45, 46,
|
|
48, 57, 65, 90, 97, 122, 95, 45,
|
|
46, 48, 58, 65, 90, 97, 122, 39,
|
|
95, 45, 46, 48, 58, 65, 90, 97,
|
|
122, 62, 62, 10, 60, 33, 47, 58,
|
|
63, 95, 65, 90, 97, 122, 39, 93,
|
|
34, 34, 92, 39, 92, 34, 39, 92,
|
|
32, 9, 13, 32, 118, 9, 13, 10,
|
|
45, 45, 10, 93, 93, 10, 62, 63,
|
|
62, 0
|
|
};
|
|
|
|
static final byte[] _hpricot_scan_single_lengths = {
|
|
3, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 3, 5, 5, 1, 1, 1, 1,
|
|
1, 1, 3, 4, 4, 3, 5, 1,
|
|
1, 3, 1, 2, 1, 1, 4, 4,
|
|
7, 7, 7, 7, 5, 2, 2, 4,
|
|
2, 3, 3, 4, 2, 3, 4, 4,
|
|
1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 2, 4, 2, 5, 5,
|
|
6, 6, 1, 7, 5, 5, 7, 8,
|
|
8, 5, 7, 9, 9, 7, 7, 7,
|
|
2, 2, 8, 8, 3, 8, 8, 10,
|
|
10, 9, 9, 10, 10, 7, 7, 8,
|
|
8, 8, 8, 3, 7, 2, 2, 8,
|
|
8, 3, 8, 7, 9, 10, 10, 7,
|
|
7, 8, 10, 10, 8, 10, 11, 9,
|
|
9, 4, 3, 8, 8, 10, 10, 10,
|
|
8, 8, 10, 8, 8, 8, 11, 9,
|
|
3, 7, 8, 7, 3, 7, 2, 2,
|
|
7, 3, 3, 4, 4, 3, 1, 1,
|
|
1, 1, 1, 1, 2, 3, 1, 2,
|
|
3, 5, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 2, 3, 0, 2, 3, 4,
|
|
1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 2, 3, 2, 1, 1, 3, 1,
|
|
1, 2, 1, 1, 1, 1, 0, 2,
|
|
1, 2, 1, 1, 2, 5, 1, 1,
|
|
1, 2, 2, 3, 1, 2, 2, 1,
|
|
2, 1, 3, 1
|
|
};
|
|
|
|
static final byte[] _hpricot_scan_range_lengths = {
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
1, 3, 5, 1, 0, 0, 0, 0,
|
|
0, 1, 1, 4, 4, 1, 1, 0,
|
|
0, 1, 0, 1, 0, 0, 5, 5,
|
|
5, 5, 5, 5, 1, 0, 0, 1,
|
|
0, 1, 1, 1, 0, 1, 1, 5,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 2, 5, 1, 4, 4,
|
|
4, 4, 0, 2, 2, 4, 5, 5,
|
|
5, 2, 2, 5, 5, 2, 2, 4,
|
|
0, 0, 4, 4, 0, 2, 2, 5,
|
|
5, 5, 5, 5, 5, 2, 2, 2,
|
|
2, 2, 4, 0, 4, 0, 0, 4,
|
|
4, 0, 2, 2, 5, 5, 5, 2,
|
|
2, 2, 5, 5, 2, 5, 5, 4,
|
|
4, 0, 0, 2, 2, 5, 5, 5,
|
|
2, 2, 5, 4, 4, 2, 5, 5,
|
|
0, 4, 2, 4, 0, 2, 0, 0,
|
|
2, 2, 5, 5, 5, 5, 0, 0,
|
|
0, 0, 0, 0, 1, 1, 4, 4,
|
|
1, 1, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 1, 1, 2, 4, 1, 1,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 1, 1, 0, 0, 0, 1, 0,
|
|
0, 0, 0, 0, 0, 0, 2, 4,
|
|
4, 4, 0, 0, 0, 2, 0, 0,
|
|
0, 0, 0, 0, 1, 1, 0, 0,
|
|
0, 0, 0, 0
|
|
};
|
|
|
|
static final short[] _hpricot_scan_index_offsets = {
|
|
0, 4, 6, 8, 10, 12, 14, 16,
|
|
18, 21, 28, 39, 46, 48, 50, 52,
|
|
54, 56, 59, 64, 73, 82, 87, 94,
|
|
96, 98, 103, 105, 109, 111, 113, 123,
|
|
133, 146, 159, 172, 185, 192, 195, 198,
|
|
204, 207, 212, 217, 223, 226, 231, 237,
|
|
247, 249, 251, 253, 255, 257, 259, 261,
|
|
263, 265, 267, 269, 274, 284, 288, 298,
|
|
308, 319, 330, 332, 342, 350, 360, 373,
|
|
387, 401, 409, 419, 434, 449, 459, 469,
|
|
481, 484, 487, 500, 513, 517, 528, 539,
|
|
555, 571, 586, 601, 617, 633, 643, 653,
|
|
664, 675, 686, 699, 703, 715, 718, 721,
|
|
734, 747, 751, 762, 772, 787, 803, 819,
|
|
829, 839, 850, 866, 882, 893, 909, 926,
|
|
940, 954, 959, 963, 974, 985, 1001, 1017,
|
|
1033, 1044, 1055, 1071, 1084, 1097, 1108, 1125,
|
|
1140, 1144, 1156, 1167, 1179, 1183, 1193, 1196,
|
|
1199, 1209, 1215, 1224, 1234, 1244, 1253, 1255,
|
|
1257, 1259, 1261, 1263, 1265, 1269, 1274, 1280,
|
|
1287, 1292, 1299, 1301, 1303, 1305, 1307, 1309,
|
|
1311, 1313, 1315, 1319, 1324, 1327, 1334, 1339,
|
|
1345, 1347, 1349, 1351, 1353, 1355, 1357, 1359,
|
|
1361, 1363, 1367, 1372, 1375, 1377, 1379, 1384,
|
|
1386, 1388, 1391, 1393, 1395, 1397, 1399, 1402,
|
|
1409, 1415, 1422, 1424, 1426, 1429, 1437, 1439,
|
|
1441, 1443, 1446, 1449, 1453, 1456, 1460, 1463,
|
|
1465, 1468, 1470, 1474
|
|
};
|
|
|
|
static final short[] _hpricot_scan_indicies = {
|
|
335, 336, 337, 296, 356, 296, 349, 296,
|
|
399, 296, 401, 296, 354, 296, 350, 296,
|
|
400, 296, 308, 308, 296, 308, 309, 309,
|
|
308, 309, 309, 296, 328, 330, 329, 331,
|
|
329, 328, 329, 329, 329, 329, 296, 310,
|
|
302, 311, 312, 0, 310, 296, 353, 296,
|
|
342, 296, 347, 296, 346, 296, 343, 296,
|
|
304, 304, 296, 304, 305, 306, 304, 296,
|
|
321, 320, 321, 321, 321, 321, 321, 321,
|
|
296, 319, 320, 319, 319, 319, 319, 319,
|
|
319, 296, 298, 302, 0, 298, 296, 298,
|
|
300, 307, 302, 0, 298, 296, 6, 222,
|
|
6, 13, 358, 302, 0, 358, 69, 1,
|
|
0, 1, 302, 1, 69, 6, 182, 6,
|
|
5, 322, 323, 322, 322, 322, 322, 322,
|
|
322, 322, 296, 299, 303, 299, 299, 299,
|
|
299, 299, 299, 299, 296, 297, 297, 299,
|
|
303, 302, 0, 299, 298, 299, 299, 299,
|
|
299, 296, 297, 297, 300, 301, 302, 0,
|
|
299, 298, 299, 299, 299, 299, 296, 186,
|
|
186, 188, 42, 184, 185, 188, 187, 188,
|
|
188, 188, 188, 182, 43, 43, 38, 44,
|
|
40, 34, 41, 37, 41, 41, 41, 41,
|
|
5, 37, 38, 39, 40, 34, 37, 5,
|
|
63, 224, 223, 63, 64, 62, 371, 6,
|
|
40, 34, 371, 5, 35, 36, 34, 26,
|
|
27, 1, 26, 0, 36, 6, 40, 36,
|
|
5, 60, 6, 61, 58, 60, 13, 35,
|
|
59, 58, 59, 6, 61, 59, 13, 183,
|
|
6, 184, 185, 183, 182, 41, 42, 41,
|
|
41, 41, 41, 41, 41, 41, 5, 403,
|
|
296, 351, 296, 352, 296, 345, 296, 348,
|
|
296, 398, 296, 344, 296, 341, 296, 402,
|
|
296, 397, 296, 355, 296, 338, 338, 338,
|
|
338, 296, 332, 334, 333, 333, 332, 333,
|
|
333, 333, 333, 296, 313, 314, 313, 296,
|
|
324, 326, 327, 325, 325, 324, 325, 325,
|
|
325, 296, 315, 317, 318, 316, 316, 315,
|
|
316, 316, 316, 296, 364, 366, 367, 368,
|
|
365, 365, 364, 365, 365, 365, 69, 359,
|
|
361, 362, 162, 360, 360, 359, 360, 360,
|
|
360, 69, 369, 69, 157, 157, 159, 160,
|
|
161, 69, 162, 157, 158, 156, 66, 66,
|
|
68, 69, 70, 66, 67, 65, 363, 361,
|
|
162, 360, 360, 363, 360, 360, 360, 69,
|
|
66, 66, 74, 69, 76, 73, 73, 66,
|
|
67, 73, 73, 73, 65, 132, 132, 135,
|
|
69, 136, 137, 134, 134, 132, 133, 134,
|
|
134, 134, 65, 71, 71, 74, 69, 75,
|
|
76, 73, 73, 71, 72, 73, 73, 73,
|
|
65, 66, 66, 68, 69, 70, 66, 67,
|
|
65, 226, 226, 228, 229, 230, 69, 70,
|
|
226, 227, 156, 163, 163, 159, 160, 161,
|
|
69, 162, 165, 165, 163, 164, 165, 165,
|
|
165, 156, 226, 226, 228, 229, 231, 69,
|
|
76, 165, 165, 226, 227, 165, 165, 165,
|
|
156, 248, 248, 84, 246, 199, 250, 195,
|
|
248, 249, 189, 92, 92, 84, 95, 7,
|
|
96, 97, 92, 93, 91, 372, 3, 48,
|
|
50, 47, 8, 47, 372, 47, 47, 47,
|
|
7, 3, 8, 7, 11, 8, 7, 122,
|
|
3, 124, 125, 126, 123, 8, 123, 122,
|
|
123, 123, 123, 7, 46, 3, 48, 49,
|
|
50, 47, 8, 47, 46, 47, 47, 47,
|
|
7, 3, 45, 8, 7, 190, 190, 192,
|
|
193, 194, 7, 50, 195, 190, 191, 189,
|
|
196, 196, 192, 193, 194, 7, 50, 195,
|
|
196, 197, 189, 196, 196, 192, 193, 194,
|
|
7, 50, 198, 195, 198, 196, 197, 198,
|
|
198, 198, 189, 242, 242, 244, 245, 247,
|
|
7, 103, 198, 195, 198, 242, 243, 198,
|
|
198, 198, 189, 248, 248, 84, 247, 199,
|
|
251, 198, 195, 198, 248, 249, 198, 198,
|
|
198, 189, 92, 92, 84, 101, 7, 103,
|
|
100, 97, 100, 92, 93, 100, 100, 100,
|
|
91, 144, 144, 84, 147, 7, 148, 149,
|
|
146, 97, 146, 144, 145, 146, 146, 146,
|
|
91, 98, 98, 84, 101, 7, 102, 103,
|
|
100, 97, 100, 98, 99, 100, 100, 100,
|
|
91, 92, 92, 84, 95, 7, 96, 97,
|
|
92, 93, 91, 92, 92, 94, 95, 7,
|
|
96, 97, 92, 93, 91, 242, 242, 244,
|
|
245, 246, 7, 96, 195, 242, 243, 189,
|
|
258, 258, 263, 94, 256, 215, 261, 211,
|
|
258, 259, 205, 105, 105, 80, 94, 108,
|
|
9, 109, 110, 105, 106, 104, 373, 10,
|
|
11, 55, 57, 54, 12, 54, 373, 54,
|
|
54, 54, 9, 10, 11, 12, 9, 370,
|
|
3, 31, 33, 30, 4, 30, 370, 30,
|
|
30, 30, 2, 3, 4, 2, 10, 4,
|
|
2, 117, 3, 119, 120, 121, 118, 4,
|
|
118, 117, 118, 118, 118, 2, 29, 3,
|
|
31, 32, 33, 30, 4, 30, 29, 30,
|
|
30, 30, 2, 3, 28, 4, 2, 167,
|
|
167, 169, 170, 171, 2, 33, 172, 167,
|
|
168, 166, 78, 78, 84, 81, 2, 82,
|
|
83, 78, 79, 77, 78, 78, 84, 88,
|
|
2, 90, 87, 83, 87, 78, 79, 87,
|
|
87, 87, 77, 138, 138, 84, 141, 2,
|
|
142, 143, 140, 83, 140, 138, 139, 140,
|
|
140, 140, 77, 85, 85, 84, 88, 2,
|
|
89, 90, 87, 83, 87, 85, 86, 87,
|
|
87, 87, 77, 78, 78, 84, 81, 2,
|
|
82, 83, 78, 79, 77, 78, 78, 80,
|
|
81, 2, 82, 83, 78, 79, 77, 232,
|
|
232, 234, 235, 236, 2, 82, 172, 232,
|
|
233, 166, 173, 173, 169, 170, 171, 2,
|
|
33, 175, 172, 175, 173, 174, 175, 175,
|
|
175, 166, 232, 232, 234, 235, 237, 2,
|
|
90, 175, 172, 175, 232, 233, 175, 175,
|
|
175, 166, 258, 258, 80, 260, 256, 215,
|
|
261, 211, 258, 259, 205, 105, 105, 80,
|
|
94, 114, 9, 116, 113, 110, 113, 105,
|
|
106, 113, 113, 113, 104, 150, 150, 80,
|
|
94, 153, 9, 154, 155, 152, 110, 152,
|
|
150, 151, 152, 152, 152, 104, 53, 10,
|
|
11, 55, 56, 57, 54, 12, 54, 53,
|
|
54, 54, 54, 9, 127, 10, 11, 129,
|
|
130, 131, 128, 12, 128, 127, 128, 128,
|
|
128, 9, 10, 11, 52, 12, 9, 51,
|
|
51, 12, 9, 206, 206, 208, 209, 210,
|
|
9, 57, 211, 206, 207, 205, 212, 212,
|
|
208, 209, 210, 9, 57, 211, 212, 213,
|
|
205, 212, 212, 208, 209, 210, 9, 57,
|
|
214, 211, 214, 212, 213, 214, 214, 214,
|
|
205, 252, 252, 254, 255, 257, 9, 116,
|
|
214, 211, 214, 252, 253, 214, 214, 214,
|
|
205, 258, 258, 80, 260, 257, 215, 262,
|
|
214, 211, 214, 258, 259, 214, 214, 214,
|
|
205, 105, 105, 80, 94, 108, 9, 109,
|
|
110, 105, 106, 104, 105, 105, 107, 107,
|
|
108, 9, 109, 110, 105, 106, 104, 258,
|
|
258, 263, 94, 257, 215, 262, 214, 211,
|
|
214, 258, 259, 214, 214, 214, 205, 218,
|
|
10, 216, 220, 221, 219, 217, 219, 218,
|
|
219, 219, 219, 215, 218, 225, 11, 220,
|
|
221, 219, 217, 219, 218, 219, 219, 219,
|
|
215, 252, 252, 254, 255, 256, 9, 109,
|
|
211, 252, 253, 205, 111, 111, 80, 94,
|
|
114, 9, 115, 116, 113, 110, 113, 111,
|
|
112, 113, 113, 113, 104, 238, 238, 84,
|
|
237, 176, 241, 175, 172, 175, 238, 239,
|
|
175, 175, 175, 166, 10, 216, 217, 215,
|
|
178, 3, 180, 181, 179, 177, 179, 178,
|
|
179, 179, 179, 176, 173, 173, 169, 170,
|
|
171, 2, 33, 172, 173, 174, 166, 201,
|
|
3, 203, 204, 202, 200, 202, 201, 202,
|
|
202, 202, 199, 225, 11, 217, 215, 238,
|
|
238, 84, 236, 176, 240, 172, 238, 239,
|
|
166, 3, 200, 199, 3, 177, 176, 163,
|
|
163, 159, 160, 161, 69, 162, 163, 164,
|
|
156, 339, 339, 340, 339, 339, 296, 15,
|
|
357, 357, 15, 357, 357, 357, 357, 296,
|
|
15, 357, 357, 408, 15, 357, 357, 357,
|
|
357, 296, 15, 357, 357, 404, 15, 357,
|
|
357, 357, 357, 296, 16, 357, 357, 16,
|
|
357, 357, 357, 357, 296, 287, 264, 294,
|
|
264, 396, 264, 387, 264, 393, 264, 268,
|
|
264, 268, 265, 268, 264, 265, 266, 267,
|
|
265, 264, 282, 282, 282, 282, 282, 264,
|
|
275, 276, 276, 276, 276, 276, 264, 269,
|
|
270, 271, 269, 264, 269, 270, 271, 272,
|
|
273, 269, 264, 270, 264, 388, 264, 285,
|
|
264, 394, 264, 385, 264, 289, 264, 390,
|
|
264, 288, 264, 288, 374, 288, 264, 374,
|
|
375, 376, 374, 264, 283, 283, 264, 277,
|
|
278, 278, 278, 278, 278, 264, 274, 270,
|
|
271, 274, 264, 274, 270, 271, 273, 274,
|
|
264, 295, 264, 384, 264, 389, 264, 286,
|
|
264, 284, 264, 290, 264, 395, 264, 391,
|
|
264, 380, 264, 380, 377, 380, 264, 377,
|
|
378, 379, 377, 264, 291, 292, 264, 293,
|
|
264, 279, 264, 381, 270, 271, 381, 264,
|
|
386, 264, 293, 264, 405, 406, 264, 392,
|
|
264, 279, 264, 407, 264, 392, 264, 383,
|
|
383, 264, 277, 281, 281, 281, 281, 281,
|
|
264, 382, 382, 382, 382, 382, 264, 275,
|
|
280, 280, 280, 280, 280, 264, 415, 414,
|
|
422, 421, 24, 25, 23, 19, 20, 21,
|
|
22, 21, 21, 21, 18, 6, 5, 1,
|
|
0, 6, 13, 3, 8, 7, 3, 4,
|
|
2, 10, 11, 12, 9, 15, 15, 14,
|
|
16, 17, 16, 14, 412, 413, 411, 410,
|
|
409, 419, 420, 418, 417, 416, 426, 424,
|
|
427, 425, 424, 423, 0
|
|
};
|
|
|
|
static final short[] _hpricot_scan_trans_targs_wi = {
|
|
26, 27, 101, 69, 102, 29, 25, 80,
|
|
81, 99, 100, 79, 122, 24, 204, 212,
|
|
213, 150, 204, 0, 59, 62, 145, 204,
|
|
204, 205, 41, 207, 210, 104, 103, 105,
|
|
106, 210, 40, 41, 42, 36, 37, 46,
|
|
206, 47, 32, 35, 34, 209, 83, 82,
|
|
84, 85, 209, 98, 211, 119, 120, 121,
|
|
123, 211, 44, 45, 43, 208, 38, 39,
|
|
43, 68, 69, 70, 73, 204, 204, 65,
|
|
72, 71, 73, 74, 204, 107, 100, 108,
|
|
108, 111, 210, 112, 70, 104, 110, 109,
|
|
111, 113, 210, 78, 79, 90, 90, 93,
|
|
209, 94, 83, 92, 91, 93, 95, 209,
|
|
97, 98, 117, 117, 128, 211, 129, 119,
|
|
134, 118, 128, 133, 211, 104, 103, 105,
|
|
106, 210, 83, 82, 84, 85, 209, 119,
|
|
120, 121, 123, 211, 65, 72, 71, 73,
|
|
74, 204, 104, 110, 109, 111, 113, 210,
|
|
83, 92, 91, 93, 95, 209, 119, 134,
|
|
118, 128, 133, 211, 68, 144, 74, 142,
|
|
143, 73, 204, 75, 76, 71, 107, 138,
|
|
113, 136, 137, 111, 112, 114, 115, 109,
|
|
101, 102, 100, 103, 105, 210, 29, 39,
|
|
206, 40, 35, 36, 47, 78, 86, 95,
|
|
139, 140, 93, 94, 87, 88, 91, 80,
|
|
81, 79, 82, 84, 209, 97, 124, 133,
|
|
131, 132, 128, 129, 125, 126, 118, 99,
|
|
79, 122, 98, 120, 121, 211, 24, 38,
|
|
43, 100, 75, 76, 77, 141, 73, 73,
|
|
114, 115, 116, 135, 111, 111, 100, 108,
|
|
210, 210, 87, 88, 89, 96, 93, 93,
|
|
79, 90, 209, 209, 125, 126, 127, 130,
|
|
128, 128, 98, 117, 90, 211, 211, 108,
|
|
204, 157, 158, 200, 156, 161, 204, 162,
|
|
163, 176, 175, 160, 159, 174, 173, 190,
|
|
201, 199, 159, 173, 181, 165, 180, 151,
|
|
170, 168, 182, 188, 191, 189, 152, 177,
|
|
204, 33, 22, 31, 23, 34, 204, 32,
|
|
18, 19, 30, 28, 9, 10, 11, 12,
|
|
48, 61, 204, 63, 64, 66, 204, 20,
|
|
21, 20, 31, 32, 63, 62, 66, 204,
|
|
11, 10, 204, 26, 61, 60, 204, 1,
|
|
2, 53, 60, 146, 147, 56, 14, 17,
|
|
55, 52, 16, 15, 21, 3, 7, 50,
|
|
51, 13, 6, 204, 204, 146, 25, 65,
|
|
64, 66, 67, 69, 65, 64, 66, 67,
|
|
204, 204, 100, 39, 79, 98, 171, 172,
|
|
198, 186, 187, 193, 185, 190, 201, 199,
|
|
178, 167, 192, 154, 164, 179, 169, 184,
|
|
195, 155, 166, 183, 153, 58, 54, 4,
|
|
8, 5, 57, 49, 149, 194, 196, 197,
|
|
148, 214, 202, 214, 214, 215, 214, 214,
|
|
216, 203, 216, 216, 217, 216, 216, 218,
|
|
218, 218, 218, 219
|
|
};
|
|
|
|
static final short[] _hpricot_scan_trans_actions_wi = {
|
|
0, 0, 0, 7, 0, 0, 21, 0,
|
|
0, 0, 7, 7, 0, 0, 65, 0,
|
|
31, 0, 67, 0, 0, 1, 0, 63,
|
|
132, 178, 0, 144, 147, 0, 174, 23,
|
|
0, 186, 0, 21, 0, 0, 0, 21,
|
|
144, 0, 111, 0, 111, 147, 0, 174,
|
|
23, 0, 186, 7, 147, 0, 174, 23,
|
|
0, 186, 0, 0, 0, 144, 0, 21,
|
|
21, 0, 9, 9, 102, 73, 162, 9,
|
|
9, 174, 117, 0, 170, 0, 9, 9,
|
|
7, 102, 205, 0, 7, 9, 9, 174,
|
|
117, 0, 215, 0, 9, 9, 7, 102,
|
|
205, 0, 9, 9, 174, 117, 0, 215,
|
|
0, 9, 9, 7, 102, 205, 0, 9,
|
|
9, 174, 117, 0, 215, 11, 0, 108,
|
|
11, 210, 11, 0, 108, 11, 210, 11,
|
|
0, 108, 11, 210, 105, 105, 0, 158,
|
|
11, 195, 105, 105, 0, 158, 11, 232,
|
|
105, 105, 0, 158, 11, 232, 105, 105,
|
|
0, 158, 11, 232, 3, 3, 3, 0,
|
|
0, 87, 120, 3, 3, 190, 3, 3,
|
|
3, 0, 7, 87, 3, 3, 3, 190,
|
|
3, 3, 3, 190, 87, 200, 3, 3,
|
|
182, 3, 3, 3, 3, 3, 3, 3,
|
|
7, 0, 87, 3, 3, 3, 190, 3,
|
|
3, 3, 190, 87, 200, 3, 3, 3,
|
|
7, 7, 87, 3, 3, 3, 190, 3,
|
|
75, 3, 3, 190, 87, 200, 3, 3,
|
|
84, 99, 78, 78, 0, 0, 150, 154,
|
|
78, 78, 0, 7, 150, 154, 78, 78,
|
|
220, 226, 78, 78, 7, 0, 150, 154,
|
|
78, 78, 220, 226, 78, 78, 7, 7,
|
|
150, 154, 78, 78, 75, 220, 226, 99,
|
|
69, 0, 0, 0, 0, 0, 49, 0,
|
|
0, 0, 0, 13, 0, 15, 0, 17,
|
|
0, 0, 3, 3, 0, 0, 0, 0,
|
|
0, 0, 0, 3, 3, 0, 0, 0,
|
|
71, 0, 0, 0, 0, 19, 51, 19,
|
|
0, 0, 0, 0, 0, 1, 0, 0,
|
|
0, 0, 55, 0, 114, 0, 53, 0,
|
|
19, 3, 3, 81, 5, 0, 5, 93,
|
|
5, 0, 90, 5, 5, 0, 96, 0,
|
|
0, 0, 1, 25, 25, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 61, 59, 0, 0, 0,
|
|
174, 23, 0, 0, 11, 0, 108, 11,
|
|
166, 57, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 3, 3,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 3, 3, 0,
|
|
0, 35, 0, 33, 123, 31, 37, 135,
|
|
41, 0, 39, 126, 31, 43, 138, 47,
|
|
141, 45, 129, 0
|
|
};
|
|
|
|
static final short[] _hpricot_scan_to_state_actions = {
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 27, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 27, 0,
|
|
27, 0, 27, 0
|
|
};
|
|
|
|
static final short[] _hpricot_scan_from_state_actions = {
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 29, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 29, 0,
|
|
29, 0, 29, 0
|
|
};
|
|
|
|
static final int hpricot_scan_start = 204;
|
|
|
|
static final int hpricot_scan_error = -1;
|
|
|
|
|
|
public final static int BUFSIZE=16384;
|
|
|
|
private void rb_yield_tokens(IRubyObject sym, IRubyObject tag, IRubyObject attr, IRubyObject raw, boolean taint) {
|
|
IRubyObject ary;
|
|
if (sym == runtime.newSymbol("text")) {
|
|
raw = tag;
|
|
}
|
|
ary = runtime.newArray(new IRubyObject[]{sym, tag, attr, raw});
|
|
if (taint) {
|
|
ary.setTaint(true);
|
|
tag.setTaint(true);
|
|
attr.setTaint(true);
|
|
raw.setTaint(true);
|
|
}
|
|
block.yield(runtime.getCurrentContext(), ary, null, null, false);
|
|
}
|
|
|
|
|
|
int cs, act, have = 0, nread = 0, curline = 1, p=-1;
|
|
boolean text = false;
|
|
int tokstart=-1, tokend;
|
|
char[] buf;
|
|
Ruby runtime;
|
|
IRubyObject attr, bufsize;
|
|
IRubyObject[] tag, akey, aval;
|
|
int mark_tag, mark_akey, mark_aval;
|
|
boolean done = false, ele_open = false;
|
|
int buffer_size = 0;
|
|
boolean taint = false;
|
|
Block block = null;
|
|
|
|
|
|
IRubyObject xmldecl, doctype, procins, stag, etag, emptytag, comment,
|
|
cdata, sym_text;
|
|
|
|
IRubyObject hpricot_scan(IRubyObject recv, IRubyObject port) {
|
|
attr = bufsize = runtime.getNil();
|
|
tag = new IRubyObject[]{runtime.getNil()};
|
|
akey = new IRubyObject[]{runtime.getNil()};
|
|
aval = new IRubyObject[]{runtime.getNil()};
|
|
|
|
RubyClass rb_eHpricotParseError = runtime.getModule("Hpricot").getClass("ParseError");
|
|
|
|
taint = port.isTaint();
|
|
if ( !port.respondsTo("read")) {
|
|
if ( port.respondsTo("to_str")) {
|
|
port = port.callMethod(runtime.getCurrentContext(),"to_str");
|
|
} else {
|
|
throw runtime.newArgumentError("bad Hpricot argument, String or IO only please.");
|
|
}
|
|
}
|
|
|
|
buffer_size = BUFSIZE;
|
|
if (recv.getInstanceVariable("@buffer_size") != null) {
|
|
bufsize = recv.getInstanceVariable("@buffer_size");
|
|
if (!bufsize.isNil()) {
|
|
buffer_size = RubyNumeric.fix2int(bufsize);
|
|
}
|
|
}
|
|
buf = new char[buffer_size];
|
|
|
|
|
|
{
|
|
cs = hpricot_scan_start;
|
|
tokstart = -1;
|
|
tokend = -1;
|
|
act = 0;
|
|
}
|
|
|
|
while( !done ) {
|
|
IRubyObject str;
|
|
p = have;
|
|
int pe;
|
|
int len, space = buffer_size - have;
|
|
|
|
if ( space == 0 ) {
|
|
/* We've used up the entire buffer storing an already-parsed token
|
|
* prefix that must be preserved. Likely caused by super-long attributes.
|
|
* See ticket #13. */
|
|
rb_raise(rb_eHpricotParseError, "ran out of buffer space on element <" + tag.toString() + ">, starting on line "+curline+".");
|
|
}
|
|
|
|
if (port.respondsTo("read")) {
|
|
str = port.callMethod(runtime.getCurrentContext(),"read",runtime.newFixnum(space));
|
|
} else {
|
|
str = ((RubyString)port).substr(nread,space);
|
|
}
|
|
|
|
str = str.convertToString();
|
|
String sss = str.toString();
|
|
char[] chars = sss.toCharArray();
|
|
System.arraycopy(chars,0,buf,p,chars.length);
|
|
|
|
len = sss.length();
|
|
nread += len;
|
|
|
|
if ( len < space ) {
|
|
len++;
|
|
done = true;
|
|
}
|
|
|
|
pe = p + len;
|
|
char[] data = buf;
|
|
|
|
|
|
{
|
|
int _klen;
|
|
int _trans;
|
|
int _acts;
|
|
int _nacts;
|
|
int _keys;
|
|
|
|
if ( p != pe ) {
|
|
_resume: while ( true ) {
|
|
_again: do {
|
|
_acts = _hpricot_scan_from_state_actions[cs];
|
|
_nacts = (int) _hpricot_scan_actions[_acts++];
|
|
while ( _nacts-- > 0 ) {
|
|
switch ( _hpricot_scan_actions[_acts++] ) {
|
|
case 21:
|
|
{tokstart = p;}
|
|
break;
|
|
}
|
|
}
|
|
|
|
_match: do {
|
|
_keys = _hpricot_scan_key_offsets[cs];
|
|
_trans = _hpricot_scan_index_offsets[cs];
|
|
_klen = _hpricot_scan_single_lengths[cs];
|
|
if ( _klen > 0 ) {
|
|
int _lower = _keys;
|
|
int _mid;
|
|
int _upper = _keys + _klen - 1;
|
|
while (true) {
|
|
if ( _upper < _lower )
|
|
break;
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1);
|
|
if ( data[p] < _hpricot_scan_trans_keys[_mid] )
|
|
_upper = _mid - 1;
|
|
else if ( data[p] > _hpricot_scan_trans_keys[_mid] )
|
|
_lower = _mid + 1;
|
|
else {
|
|
_trans += (_mid - _keys);
|
|
break _match;
|
|
}
|
|
}
|
|
_keys += _klen;
|
|
_trans += _klen;
|
|
}
|
|
|
|
_klen = _hpricot_scan_range_lengths[cs];
|
|
if ( _klen > 0 ) {
|
|
int _lower = _keys;
|
|
int _mid;
|
|
int _upper = _keys + (_klen<<1) - 2;
|
|
while (true) {
|
|
if ( _upper < _lower )
|
|
break;
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1);
|
|
if ( data[p] < _hpricot_scan_trans_keys[_mid] )
|
|
_upper = _mid - 2;
|
|
else if ( data[p] > _hpricot_scan_trans_keys[_mid+1] )
|
|
_lower = _mid + 2;
|
|
else {
|
|
_trans += ((_mid - _keys)>>1);
|
|
break _match;
|
|
}
|
|
}
|
|
_trans += _klen;
|
|
}
|
|
} while (false);
|
|
|
|
_trans = _hpricot_scan_indicies[_trans];
|
|
cs = _hpricot_scan_trans_targs_wi[_trans];
|
|
|
|
if ( _hpricot_scan_trans_actions_wi[_trans] == 0 )
|
|
break _again;
|
|
|
|
_acts = _hpricot_scan_trans_actions_wi[_trans];
|
|
_nacts = (int) _hpricot_scan_actions[_acts++];
|
|
while ( _nacts-- > 0 )
|
|
{
|
|
switch ( _hpricot_scan_actions[_acts++] )
|
|
{
|
|
case 0:
|
|
{
|
|
if (text) {
|
|
CAT(tag, p);
|
|
ELE(sym_text);
|
|
text = false;
|
|
}
|
|
attr = runtime.getNil();
|
|
tag[0] = runtime.getNil();
|
|
mark_tag = -1;
|
|
ele_open = true;
|
|
}
|
|
break;
|
|
case 1:
|
|
{ mark_tag = p; }
|
|
break;
|
|
case 2:
|
|
{ mark_aval = p; }
|
|
break;
|
|
case 3:
|
|
{ mark_akey = p; }
|
|
break;
|
|
case 4:
|
|
{ SET(tag, p); }
|
|
break;
|
|
case 5:
|
|
{ SET(aval, p); }
|
|
break;
|
|
case 6:
|
|
{
|
|
if (buf[p-1] == '"' || buf[p-1] == '\'') { SET(aval, p-1); }
|
|
else { SET(aval, p); }
|
|
}
|
|
break;
|
|
case 7:
|
|
{ SET(akey, p); }
|
|
break;
|
|
case 8:
|
|
{ SET(aval, p); ATTR(rb_str_new2("version"), aval); }
|
|
break;
|
|
case 9:
|
|
{ SET(aval, p); ATTR(rb_str_new2("encoding"), aval); }
|
|
break;
|
|
case 10:
|
|
{ SET(aval, p); ATTR(rb_str_new2("standalone"), aval); }
|
|
break;
|
|
case 11:
|
|
{ SET(aval, p); ATTR(rb_str_new2("public_id"), aval); }
|
|
break;
|
|
case 12:
|
|
{ SET(aval, p); ATTR(rb_str_new2("system_id"), aval); }
|
|
break;
|
|
case 13:
|
|
{
|
|
akey[0] = runtime.getNil();
|
|
aval[0] = runtime.getNil();
|
|
mark_akey = -1;
|
|
mark_aval = -1;
|
|
}
|
|
break;
|
|
case 14:
|
|
{
|
|
ATTR(akey, aval);
|
|
}
|
|
break;
|
|
case 15:
|
|
{curline += 1;}
|
|
break;
|
|
case 16:
|
|
{ TEXT_PASS(); }
|
|
break;
|
|
case 17:
|
|
{ EBLK(comment, 3); {cs = 204; if (true) break _again;} }
|
|
break;
|
|
case 18:
|
|
{ EBLK(cdata, 3); {cs = 204; if (true) break _again;} }
|
|
break;
|
|
case 19:
|
|
{ EBLK(procins, 2); {cs = 204; if (true) break _again;} }
|
|
break;
|
|
case 22:
|
|
{tokend = p+1;}
|
|
break;
|
|
case 23:
|
|
{tokend = p+1;{p = ((tokend))-1;}}
|
|
break;
|
|
case 24:
|
|
{tokend = p+1;{ TEXT_PASS(); }{p = ((tokend))-1;}}
|
|
break;
|
|
case 25:
|
|
{tokend = p;{ TEXT_PASS(); }{p = ((tokend))-1;}}
|
|
break;
|
|
case 26:
|
|
{{ TEXT_PASS(); }{p = ((tokend))-1;}}
|
|
break;
|
|
case 27:
|
|
{tokend = p+1;{p = ((tokend))-1;}}
|
|
break;
|
|
case 28:
|
|
{tokend = p+1;{ TEXT_PASS(); }{p = ((tokend))-1;}}
|
|
break;
|
|
case 29:
|
|
{tokend = p;{ TEXT_PASS(); }{p = ((tokend))-1;}}
|
|
break;
|
|
case 30:
|
|
{{ TEXT_PASS(); }{p = ((tokend))-1;}}
|
|
break;
|
|
case 31:
|
|
{tokend = p+1;{p = ((tokend))-1;}}
|
|
break;
|
|
case 32:
|
|
{tokend = p+1;{ TEXT_PASS(); }{p = ((tokend))-1;}}
|
|
break;
|
|
case 33:
|
|
{tokend = p;{ TEXT_PASS(); }{p = ((tokend))-1;}}
|
|
break;
|
|
case 34:
|
|
{act = 8;}
|
|
break;
|
|
case 35:
|
|
{act = 10;}
|
|
break;
|
|
case 36:
|
|
{act = 12;}
|
|
break;
|
|
case 37:
|
|
{act = 15;}
|
|
break;
|
|
case 38:
|
|
{tokend = p+1;{ ELE(xmldecl); }{p = ((tokend))-1;}}
|
|
break;
|
|
case 39:
|
|
{tokend = p+1;{ ELE(doctype); }{p = ((tokend))-1;}}
|
|
break;
|
|
case 40:
|
|
{tokend = p+1;{ ELE(stag); }{p = ((tokend))-1;}}
|
|
break;
|
|
case 41:
|
|
{tokend = p+1;{ ELE(etag); }{p = ((tokend))-1;}}
|
|
break;
|
|
case 42:
|
|
{tokend = p+1;{ ELE(emptytag); }{p = ((tokend))-1;}}
|
|
break;
|
|
case 43:
|
|
{tokend = p+1;{ {{p = ((tokend))-1;}{cs = 214; if (true) break _again;}} }{p = ((tokend))-1;}}
|
|
break;
|
|
case 44:
|
|
{tokend = p+1;{ {{p = ((tokend))-1;}{cs = 216; if (true) break _again;}} }{p = ((tokend))-1;}}
|
|
break;
|
|
case 45:
|
|
{tokend = p+1;{ TEXT_PASS(); }{p = ((tokend))-1;}}
|
|
break;
|
|
case 46:
|
|
{tokend = p;{ {{p = ((tokend))-1;}{cs = 218; if (true) break _again;}} }{p = ((tokend))-1;}}
|
|
break;
|
|
case 47:
|
|
{tokend = p;{ TEXT_PASS(); }{p = ((tokend))-1;}}
|
|
break;
|
|
case 48:
|
|
{{ {{p = ((tokend))-1;}{cs = 218; if (true) break _again;}} }{p = ((tokend))-1;}}
|
|
break;
|
|
case 49:
|
|
{{ TEXT_PASS(); }{p = ((tokend))-1;}}
|
|
break;
|
|
case 50:
|
|
{ switch( act ) {
|
|
case 8:
|
|
{ ELE(doctype); }
|
|
break;
|
|
case 10:
|
|
{ ELE(stag); }
|
|
break;
|
|
case 12:
|
|
{ ELE(emptytag); }
|
|
break;
|
|
case 15:
|
|
{ TEXT_PASS(); }
|
|
break;
|
|
default: break;
|
|
}
|
|
{p = ((tokend))-1;}}
|
|
break;
|
|
}
|
|
}
|
|
|
|
} while (false);
|
|
_acts = _hpricot_scan_to_state_actions[cs];
|
|
_nacts = (int) _hpricot_scan_actions[_acts++];
|
|
while ( _nacts-- > 0 ) {
|
|
switch ( _hpricot_scan_actions[_acts++] ) {
|
|
case 20:
|
|
{tokstart = -1;}
|
|
break;
|
|
}
|
|
}
|
|
|
|
if ( ++p == pe )
|
|
break _resume;
|
|
}
|
|
}
|
|
}
|
|
|
|
if ( cs == hpricot_scan_error ) {
|
|
if(!tag[0].isNil()) {
|
|
rb_raise(rb_eHpricotParseError, "parse error on element <"+tag.toString()+">, starting on line "+curline+".\n" + NO_WAY_SERIOUSLY);
|
|
} else {
|
|
rb_raise(rb_eHpricotParseError, "parse error on line "+curline+".\n" + NO_WAY_SERIOUSLY);
|
|
}
|
|
}
|
|
|
|
if ( done && ele_open ) {
|
|
ele_open = false;
|
|
if(tokstart > -1) {
|
|
mark_tag = tokstart;
|
|
tokstart = -1;
|
|
text = true;
|
|
}
|
|
}
|
|
|
|
if(tokstart == -1) {
|
|
have = 0;
|
|
/* text nodes have no tokstart because each byte is parsed alone */
|
|
if(mark_tag != -1 && text) {
|
|
if (done) {
|
|
if(mark_tag < p-1) {
|
|
CAT(tag, p-1);
|
|
ELE(sym_text);
|
|
}
|
|
} else {
|
|
CAT(tag, p);
|
|
}
|
|
}
|
|
mark_tag = 0;
|
|
} else {
|
|
have = pe - tokstart;
|
|
System.arraycopy(buf,tokstart,buf,0,have);
|
|
SLIDE(tag);
|
|
SLIDE(akey);
|
|
SLIDE(aval);
|
|
tokend = (tokend - tokstart);
|
|
tokstart = 0;
|
|
}
|
|
}
|
|
return runtime.getNil();
|
|
}
|
|
|
|
public static IRubyObject __hpricot_scan(IRubyObject recv, IRubyObject port, Block block) {
|
|
Ruby runtime = recv.getRuntime();
|
|
HpricotScanService service = new HpricotScanService();
|
|
service.runtime = runtime;
|
|
service.xmldecl = runtime.newSymbol("xmldecl");
|
|
service.doctype = runtime.newSymbol("doctype");
|
|
service.procins = runtime.newSymbol("procins");
|
|
service.stag = runtime.newSymbol("stag");
|
|
service.etag = runtime.newSymbol("etag");
|
|
service.emptytag = runtime.newSymbol("emptytag");
|
|
service.comment = runtime.newSymbol("comment");
|
|
service.cdata = runtime.newSymbol("cdata");
|
|
service.sym_text = runtime.newSymbol("text");
|
|
service.block = block;
|
|
return service.hpricot_scan(recv, port);
|
|
}
|
|
|
|
|
|
public boolean basicLoad(final Ruby runtime) throws IOException {
|
|
Init_hpricot_scan(runtime);
|
|
return true;
|
|
}
|
|
|
|
public static void Init_hpricot_scan(Ruby runtime) {
|
|
RubyModule mHpricot = runtime.defineModule("Hpricot");
|
|
mHpricot.getMetaClass().attr_accessor(new IRubyObject[]{runtime.newSymbol("buffer_size")});
|
|
CallbackFactory fact = runtime.callbackFactory(HpricotScanService.class);
|
|
mHpricot.getMetaClass().defineMethod("scan",fact.getSingletonMethod("__hpricot_scan",IRubyObject.class));
|
|
mHpricot.defineClassUnder("ParseError",runtime.getClass("Exception"),runtime.getClass("Exception").getAllocator());
|
|
}
|
|
}
|