You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This repo is archived. You can view files and clone it, but cannot push or open issues/pull-requests.

1341 lines
39 KiB

import java.io.IOException;
import org.jruby.Ruby;
import org.jruby.RubyClass;
import org.jruby.RubyHash;
import org.jruby.RubyModule;
import org.jruby.RubyNumeric;
import org.jruby.RubyString;
import org.jruby.runtime.Block;
import org.jruby.runtime.CallbackFactory;
import org.jruby.runtime.builtin.IRubyObject;
import org.jruby.exceptions.RaiseException;
import org.jruby.runtime.load.BasicLibraryService;
public class HpricotScanService implements BasicLibraryService {
public static String NO_WAY_SERIOUSLY="*** This should not happen, please send a bug report with the HTML you're parsing to why@whytheluckystiff.net. So sorry!";
public void ELE(IRubyObject N) {
if (tokend > tokstart || text) {
IRubyObject raw_string = runtime.getNil();
ele_open = false; text = false;
if (tokstart != -1 && N != cdata && N != sym_text && N != procins && N != comment) {
raw_string = runtime.newString(new String(buf,tokstart,tokend-tokstart));
}
rb_yield_tokens(N, tag[0], attr, raw_string, taint);
}
}
public void SET(IRubyObject[] N, int E) {
int mark = 0;
if(N == tag) {
if(mark_tag == -1 || E == mark_tag) {
tag[0] = runtime.newString("");
} else if(E > mark_tag) {
tag[0] = runtime.newString(new String(buf,mark_tag, E-mark_tag));
}
} else if(N == akey) {
if(mark_akey == -1 || E == mark_akey) {
akey[0] = runtime.newString("");
} else if(E > mark_akey) {
akey[0] = runtime.newString(new String(buf,mark_akey, E-mark_akey));
}
} else if(N == aval) {
if(mark_aval == -1 || E == mark_aval) {
aval[0] = runtime.newString("");
} else if(E > mark_aval) {
aval[0] = runtime.newString(new String(buf,mark_aval, E-mark_aval));
}
}
}
public void CAT(IRubyObject[] N, int E) {
if(N[0].isNil()) {
SET(N,E);
} else {
int mark = 0;
if(N == tag) {
mark = mark_tag;
} else if(N == akey) {
mark = mark_akey;
} else if(N == aval) {
mark = mark_aval;
}
((RubyString)(N[0])).append(runtime.newString(new String(buf, mark, E-mark)));
}
}
public void SLIDE(Object N) {
int mark = 0;
if(N == tag) {
mark = mark_tag;
} else if(N == akey) {
mark = mark_akey;
} else if(N == aval) {
mark = mark_aval;
}
if(mark > tokstart) {
if(N == tag) {
mark_tag -= tokstart;
} else if(N == akey) {
mark_akey -= tokstart;
} else if(N == aval) {
mark_aval -= tokstart;
}
}
}
public void ATTR(IRubyObject K, IRubyObject V) {
if(!K.isNil()) {
if(attr.isNil()) {
attr = RubyHash.newHash(runtime);
}
((RubyHash)attr).aset(K,V);
}
}
public void ATTR(IRubyObject[] K, IRubyObject V) {
ATTR(K[0],V);
}
public void ATTR(IRubyObject K, IRubyObject[] V) {
ATTR(K,V[0]);
}
public void ATTR(IRubyObject[] K, IRubyObject[] V) {
ATTR(K[0],V[0]);
}
public void TEXT_PASS() {
if(!text) {
if(ele_open) {
ele_open = false;
if(tokstart > -1) {
mark_tag = tokstart;
}
} else {
mark_tag = p;
}
attr = runtime.getNil();
tag[0] = runtime.getNil();
text = true;
}
}
public void EBLK(IRubyObject N, int T) {
CAT(tag, p - T + 1);
ELE(N);
}
public void rb_raise(RubyClass error, String message) {
throw new RaiseException(runtime, error, message, true);
}
public IRubyObject rb_str_new2(String s) {
return runtime.newString(s);
}
static final byte[] _hpricot_scan_actions = {
0, 1, 1, 1, 2, 1, 4, 1,
5, 1, 6, 1, 7, 1, 8, 1,
9, 1, 10, 1, 11, 1, 12, 1,
14, 1, 16, 1, 20, 1, 21, 1,
22, 1, 24, 1, 25, 1, 26, 1,
28, 1, 29, 1, 30, 1, 32, 1,
33, 1, 38, 1, 39, 1, 40, 1,
41, 1, 42, 1, 43, 1, 44, 1,
45, 1, 46, 1, 47, 1, 48, 1,
49, 1, 50, 2, 2, 5, 2, 2,
6, 2, 2, 11, 2, 2, 12, 2,
2, 14, 2, 4, 39, 2, 4, 40,
2, 4, 41, 2, 5, 2, 2, 6,
14, 2, 7, 6, 2, 7, 14, 2,
11, 12, 2, 13, 3, 2, 14, 6,
2, 14, 40, 2, 15, 24, 2, 15,
28, 2, 15, 32, 2, 15, 45, 2,
17, 23, 2, 18, 27, 2, 19, 31,
2, 22, 34, 2, 22, 36, 3, 2,
6, 14, 3, 2, 14, 6, 3, 6,
7, 14, 3, 6, 14, 40, 3, 7,
14, 40, 3, 14, 6, 40, 3, 14,
13, 3, 3, 22, 0, 37, 3, 22,
2, 34, 3, 22, 14, 35, 4, 2,
14, 13, 3, 4, 6, 7, 14, 40,
4, 22, 2, 14, 35, 4, 22, 6,
14, 35, 4, 22, 7, 14, 35, 4,
22, 14, 6, 35, 5, 22, 2, 6,
14, 35, 5, 22, 2, 14, 6, 35,
5, 22, 6, 7, 14, 35
};
static final short[] _hpricot_scan_key_offsets = {
0, 3, 4, 5, 6, 7, 8, 9,
10, 13, 22, 37, 44, 45, 46, 47,
48, 49, 52, 57, 69, 81, 86, 93,
94, 95, 100, 101, 105, 106, 107, 121,
135, 152, 169, 186, 203, 210, 212, 214,
220, 222, 227, 232, 238, 240, 245, 251,
265, 266, 267, 268, 269, 270, 271, 272,
273, 274, 275, 276, 282, 296, 300, 313,
326, 340, 354, 355, 366, 375, 388, 405,
423, 441, 450, 461, 480, 499, 510, 521,
536, 538, 540, 556, 572, 575, 587, 599,
619, 639, 658, 677, 697, 717, 728, 739,
751, 763, 775, 791, 794, 809, 811, 813,
829, 845, 848, 860, 871, 890, 910, 930,
941, 952, 964, 984, 1004, 1016, 1036, 1057,
1074, 1091, 1095, 1098, 1110, 1122, 1142, 1162,
1182, 1194, 1206, 1226, 1242, 1258, 1270, 1291,
1310, 1313, 1328, 1340, 1355, 1358, 1369, 1371,
1373, 1384, 1391, 1404, 1418, 1432, 1445, 1446,
1447, 1448, 1449, 1450, 1451, 1455, 1460, 1469,
1479, 1484, 1491, 1492, 1493, 1494, 1495, 1496,
1497, 1498, 1499, 1503, 1508, 1512, 1522, 1527,
1533, 1534, 1535, 1536, 1537, 1538, 1539, 1540,
1541, 1542, 1546, 1551, 1553, 1554, 1555, 1560,
1561, 1562, 1564, 1565, 1566, 1567, 1568, 1572,
1582, 1591, 1601, 1602, 1603, 1605, 1614, 1615,
1616, 1617, 1619, 1621, 1624, 1627, 1631, 1633,
1634, 1636, 1637, 1640
};
static final char[] _hpricot_scan_trans_keys = {
45, 68, 91, 45, 79, 67, 84, 89,
80, 69, 32, 9, 13, 32, 58, 95,
9, 13, 65, 90, 97, 122, 32, 62,
63, 91, 95, 9, 13, 45, 46, 48,
58, 65, 90, 97, 122, 32, 62, 80,
83, 91, 9, 13, 85, 66, 76, 73,
67, 32, 9, 13, 32, 34, 39, 9,
13, 9, 34, 61, 95, 32, 37, 39,
59, 63, 90, 97, 122, 9, 34, 61,
95, 32, 37, 39, 59, 63, 90, 97,
122, 32, 62, 91, 9, 13, 32, 34,
39, 62, 91, 9, 13, 34, 34, 32,
62, 91, 9, 13, 93, 32, 62, 9,
13, 39, 39, 9, 39, 61, 95, 32,
33, 35, 37, 40, 59, 63, 90, 97,
122, 9, 39, 61, 95, 32, 33, 35,
37, 40, 59, 63, 90, 97, 122, 9,
32, 33, 39, 62, 91, 95, 10, 13,
35, 37, 40, 59, 61, 90, 97, 122,
9, 32, 34, 39, 62, 91, 95, 10,
13, 33, 37, 40, 59, 61, 90, 97,
122, 9, 32, 33, 39, 62, 91, 95,
10, 13, 35, 37, 40, 59, 61, 90,
97, 122, 9, 32, 34, 39, 62, 91,
95, 10, 13, 33, 37, 40, 59, 61,
90, 97, 122, 32, 34, 39, 62, 91,
9, 13, 34, 39, 34, 39, 32, 39,
62, 91, 9, 13, 39, 93, 32, 62,
93, 9, 13, 32, 39, 62, 9, 13,
32, 34, 62, 91, 9, 13, 34, 93,
32, 34, 62, 9, 13, 32, 39, 62,
91, 9, 13, 9, 39, 61, 95, 32,
33, 35, 37, 40, 59, 63, 90, 97,
122, 89, 83, 84, 69, 77, 67, 68,
65, 84, 65, 91, 58, 95, 65, 90,
97, 122, 32, 62, 63, 95, 9, 13,
45, 46, 48, 58, 65, 90, 97, 122,
32, 62, 9, 13, 32, 47, 62, 63,
95, 9, 13, 45, 58, 65, 90, 97,
122, 32, 47, 62, 63, 95, 9, 13,
45, 58, 65, 90, 97, 122, 32, 47,
61, 62, 63, 95, 9, 13, 45, 58,
65, 90, 97, 122, 32, 47, 61, 62,
63, 95, 9, 13, 45, 58, 65, 90,
97, 122, 62, 13, 32, 34, 39, 47,
60, 62, 9, 10, 11, 12, 13, 32,
47, 60, 62, 9, 10, 11, 12, 32,
47, 62, 63, 95, 9, 13, 45, 58,
65, 90, 97, 122, 13, 32, 47, 60,
62, 63, 95, 9, 10, 11, 12, 45,
58, 65, 90, 97, 122, 13, 32, 47,
60, 61, 62, 63, 95, 9, 10, 11,
12, 45, 58, 65, 90, 97, 122, 13,
32, 47, 60, 61, 62, 63, 95, 9,
10, 11, 12, 45, 58, 65, 90, 97,
122, 13, 32, 47, 60, 62, 9, 10,
11, 12, 13, 32, 34, 39, 47, 60,
62, 9, 10, 11, 12, 13, 32, 34,
39, 47, 60, 62, 63, 95, 9, 10,
11, 12, 45, 58, 65, 90, 97, 122,
13, 32, 34, 39, 47, 60, 62, 63,
95, 9, 10, 11, 12, 45, 58, 65,
90, 97, 122, 13, 32, 34, 47, 60,
62, 92, 9, 10, 11, 12, 13, 32,
34, 47, 60, 62, 92, 9, 10, 11,
12, 32, 34, 47, 62, 63, 92, 95,
9, 13, 45, 58, 65, 90, 97, 122,
34, 92, 34, 92, 32, 34, 47, 61,
62, 63, 92, 95, 9, 13, 45, 58,
65, 90, 97, 122, 32, 34, 47, 61,
62, 63, 92, 95, 9, 13, 45, 58,
65, 90, 97, 122, 34, 62, 92, 13,
32, 34, 39, 47, 60, 62, 92, 9,
10, 11, 12, 13, 32, 34, 39, 47,
60, 62, 92, 9, 10, 11, 12, 13,
32, 34, 39, 47, 60, 62, 63, 92,
95, 9, 10, 11, 12, 45, 58, 65,
90, 97, 122, 13, 32, 34, 39, 47,
60, 62, 63, 92, 95, 9, 10, 11,
12, 45, 58, 65, 90, 97, 122, 13,
32, 34, 47, 60, 62, 63, 92, 95,
9, 10, 11, 12, 45, 58, 65, 90,
97, 122, 13, 32, 34, 47, 60, 62,
63, 92, 95, 9, 10, 11, 12, 45,
58, 65, 90, 97, 122, 13, 32, 34,
47, 60, 61, 62, 63, 92, 95, 9,
10, 11, 12, 45, 58, 65, 90, 97,
122, 13, 32, 34, 47, 60, 61, 62,
63, 92, 95, 9, 10, 11, 12, 45,
58, 65, 90, 97, 122, 13, 32, 34,
47, 60, 62, 92, 9, 10, 11, 12,
13, 32, 34, 47, 60, 62, 92, 9,
10, 11, 12, 13, 32, 34, 39, 47,
60, 62, 92, 9, 10, 11, 12, 13,
32, 34, 39, 47, 60, 62, 92, 9,
10, 11, 12, 13, 32, 34, 39, 47,
60, 62, 92, 9, 10, 11, 12, 32,
34, 39, 47, 62, 63, 92, 95, 9,
13, 45, 58, 65, 90, 97, 122, 34,
39, 92, 32, 39, 47, 62, 63, 92,
95, 9, 13, 45, 58, 65, 90, 97,
122, 39, 92, 39, 92, 32, 39, 47,
61, 62, 63, 92, 95, 9, 13, 45,
58, 65, 90, 97, 122, 32, 39, 47,
61, 62, 63, 92, 95, 9, 13, 45,
58, 65, 90, 97, 122, 39, 62, 92,
13, 32, 34, 39, 47, 60, 62, 92,
9, 10, 11, 12, 13, 32, 39, 47,
60, 62, 92, 9, 10, 11, 12, 13,
32, 39, 47, 60, 62, 63, 92, 95,
9, 10, 11, 12, 45, 58, 65, 90,
97, 122, 13, 32, 39, 47, 60, 61,
62, 63, 92, 95, 9, 10, 11, 12,
45, 58, 65, 90, 97, 122, 13, 32,
39, 47, 60, 61, 62, 63, 92, 95,
9, 10, 11, 12, 45, 58, 65, 90,
97, 122, 13, 32, 39, 47, 60, 62,
92, 9, 10, 11, 12, 13, 32, 39,
47, 60, 62, 92, 9, 10, 11, 12,
13, 32, 34, 39, 47, 60, 62, 92,
9, 10, 11, 12, 13, 32, 34, 39,
47, 60, 62, 63, 92, 95, 9, 10,
11, 12, 45, 58, 65, 90, 97, 122,
13, 32, 34, 39, 47, 60, 62, 63,
92, 95, 9, 10, 11, 12, 45, 58,
65, 90, 97, 122, 13, 32, 34, 39,
47, 60, 62, 92, 9, 10, 11, 12,
13, 32, 34, 39, 47, 60, 62, 63,
92, 95, 9, 10, 11, 12, 45, 58,
65, 90, 97, 122, 13, 32, 34, 39,
47, 60, 61, 62, 63, 92, 95, 9,
10, 11, 12, 45, 58, 65, 90, 97,
122, 32, 34, 39, 47, 61, 62, 63,
92, 95, 9, 13, 45, 58, 65, 90,
97, 122, 32, 34, 39, 47, 61, 62,
63, 92, 95, 9, 13, 45, 58, 65,
90, 97, 122, 34, 39, 62, 92, 34,
39, 92, 13, 32, 34, 39, 47, 60,
62, 92, 9, 10, 11, 12, 13, 32,
34, 39, 47, 60, 62, 92, 9, 10,
11, 12, 13, 32, 34, 39, 47, 60,
62, 63, 92, 95, 9, 10, 11, 12,
45, 58, 65, 90, 97, 122, 13, 32,
34, 39, 47, 60, 62, 63, 92, 95,
9, 10, 11, 12, 45, 58, 65, 90,
97, 122, 13, 32, 34, 39, 47, 60,
62, 63, 92, 95, 9, 10, 11, 12,
45, 58, 65, 90, 97, 122, 13, 32,
34, 39, 47, 60, 62, 92, 9, 10,
11, 12, 13, 32, 34, 39, 47, 60,
62, 92, 9, 10, 11, 12, 13, 32,
34, 39, 47, 60, 62, 63, 92, 95,
9, 10, 11, 12, 45, 58, 65, 90,
97, 122, 32, 34, 39, 47, 62, 63,
92, 95, 9, 13, 45, 58, 65, 90,
97, 122, 32, 34, 39, 47, 62, 63,
92, 95, 9, 13, 45, 58, 65, 90,
97, 122, 13, 32, 34, 39, 47, 60,
62, 92, 9, 10, 11, 12, 13, 32,
34, 39, 47, 60, 61, 62, 63, 92,
95, 9, 10, 11, 12, 45, 58, 65,
90, 97, 122, 13, 32, 39, 47, 60,
62, 63, 92, 95, 9, 10, 11, 12,
45, 58, 65, 90, 97, 122, 34, 39,
92, 32, 39, 47, 62, 63, 92, 95,
9, 13, 45, 58, 65, 90, 97, 122,
13, 32, 34, 39, 47, 60, 62, 92,
9, 10, 11, 12, 32, 34, 47, 62,
63, 92, 95, 9, 13, 45, 58, 65,
90, 97, 122, 34, 39, 92, 13, 32,
39, 47, 60, 62, 92, 9, 10, 11,
12, 34, 92, 39, 92, 13, 32, 34,
39, 47, 60, 62, 9, 10, 11, 12,
58, 95, 120, 65, 90, 97, 122, 32,
63, 95, 9, 13, 45, 46, 48, 58,
65, 90, 97, 122, 32, 63, 95, 109,
9, 13, 45, 46, 48, 58, 65, 90,
97, 122, 32, 63, 95, 108, 9, 13,
45, 46, 48, 58, 65, 90, 97, 122,
32, 63, 95, 9, 13, 45, 46, 48,
58, 65, 90, 97, 122, 101, 114, 115,
105, 111, 110, 32, 61, 9, 13, 32,
34, 39, 9, 13, 95, 45, 46, 48,
58, 65, 90, 97, 122, 34, 95, 45,
46, 48, 58, 65, 90, 97, 122, 32,
62, 63, 9, 13, 32, 62, 63, 101,
115, 9, 13, 62, 110, 99, 111, 100,
105, 110, 103, 32, 61, 9, 13, 32,
34, 39, 9, 13, 65, 90, 97, 122,
34, 95, 45, 46, 48, 57, 65, 90,
97, 122, 32, 62, 63, 9, 13, 32,
62, 63, 115, 9, 13, 116, 97, 110,
100, 97, 108, 111, 110, 101, 32, 61,
9, 13, 32, 34, 39, 9, 13, 110,
121, 111, 34, 32, 62, 63, 9, 13,
101, 115, 110, 121, 111, 39, 101, 115,
65, 90, 97, 122, 39, 95, 45, 46,
48, 57, 65, 90, 97, 122, 95, 45,
46, 48, 58, 65, 90, 97, 122, 39,
95, 45, 46, 48, 58, 65, 90, 97,
122, 62, 62, 10, 60, 33, 47, 58,
63, 95, 65, 90, 97, 122, 39, 93,
34, 34, 92, 39, 92, 34, 39, 92,
32, 9, 13, 32, 118, 9, 13, 10,
45, 45, 10, 93, 93, 10, 62, 63,
62, 0
};
static final byte[] _hpricot_scan_single_lengths = {
3, 1, 1, 1, 1, 1, 1, 1,
1, 3, 5, 5, 1, 1, 1, 1,
1, 1, 3, 4, 4, 3, 5, 1,
1, 3, 1, 2, 1, 1, 4, 4,
7, 7, 7, 7, 5, 2, 2, 4,
2, 3, 3, 4, 2, 3, 4, 4,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 2, 4, 2, 5, 5,
6, 6, 1, 7, 5, 5, 7, 8,
8, 5, 7, 9, 9, 7, 7, 7,
2, 2, 8, 8, 3, 8, 8, 10,
10, 9, 9, 10, 10, 7, 7, 8,
8, 8, 8, 3, 7, 2, 2, 8,
8, 3, 8, 7, 9, 10, 10, 7,
7, 8, 10, 10, 8, 10, 11, 9,
9, 4, 3, 8, 8, 10, 10, 10,
8, 8, 10, 8, 8, 8, 11, 9,
3, 7, 8, 7, 3, 7, 2, 2,
7, 3, 3, 4, 4, 3, 1, 1,
1, 1, 1, 1, 2, 3, 1, 2,
3, 5, 1, 1, 1, 1, 1, 1,
1, 1, 2, 3, 0, 2, 3, 4,
1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 3, 2, 1, 1, 3, 1,
1, 2, 1, 1, 1, 1, 0, 2,
1, 2, 1, 1, 2, 5, 1, 1,
1, 2, 2, 3, 1, 2, 2, 1,
2, 1, 3, 1
};
static final byte[] _hpricot_scan_range_lengths = {
0, 0, 0, 0, 0, 0, 0, 0,
1, 3, 5, 1, 0, 0, 0, 0,
0, 1, 1, 4, 4, 1, 1, 0,
0, 1, 0, 1, 0, 0, 5, 5,
5, 5, 5, 5, 1, 0, 0, 1,
0, 1, 1, 1, 0, 1, 1, 5,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 2, 5, 1, 4, 4,
4, 4, 0, 2, 2, 4, 5, 5,
5, 2, 2, 5, 5, 2, 2, 4,
0, 0, 4, 4, 0, 2, 2, 5,
5, 5, 5, 5, 5, 2, 2, 2,
2, 2, 4, 0, 4, 0, 0, 4,
4, 0, 2, 2, 5, 5, 5, 2,
2, 2, 5, 5, 2, 5, 5, 4,
4, 0, 0, 2, 2, 5, 5, 5,
2, 2, 5, 4, 4, 2, 5, 5,
0, 4, 2, 4, 0, 2, 0, 0,
2, 2, 5, 5, 5, 5, 0, 0,
0, 0, 0, 0, 1, 1, 4, 4,
1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 1, 1, 2, 4, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 2, 4,
4, 4, 0, 0, 0, 2, 0, 0,
0, 0, 0, 0, 1, 1, 0, 0,
0, 0, 0, 0
};
static final short[] _hpricot_scan_index_offsets = {
0, 4, 6, 8, 10, 12, 14, 16,
18, 21, 28, 39, 46, 48, 50, 52,
54, 56, 59, 64, 73, 82, 87, 94,
96, 98, 103, 105, 109, 111, 113, 123,
133, 146, 159, 172, 185, 192, 195, 198,
204, 207, 212, 217, 223, 226, 231, 237,
247, 249, 251, 253, 255, 257, 259, 261,
263, 265, 267, 269, 274, 284, 288, 298,
308, 319, 330, 332, 342, 350, 360, 373,
387, 401, 409, 419, 434, 449, 459, 469,
481, 484, 487, 500, 513, 517, 528, 539,
555, 571, 586, 601, 617, 633, 643, 653,
664, 675, 686, 699, 703, 715, 718, 721,
734, 747, 751, 762, 772, 787, 803, 819,
829, 839, 850, 866, 882, 893, 909, 926,
940, 954, 959, 963, 974, 985, 1001, 1017,
1033, 1044, 1055, 1071, 1084, 1097, 1108, 1125,
1140, 1144, 1156, 1167, 1179, 1183, 1193, 1196,
1199, 1209, 1215, 1224, 1234, 1244, 1253, 1255,
1257, 1259, 1261, 1263, 1265, 1269, 1274, 1280,
1287, 1292, 1299, 1301, 1303, 1305, 1307, 1309,
1311, 1313, 1315, 1319, 1324, 1327, 1334, 1339,
1345, 1347, 1349, 1351, 1353, 1355, 1357, 1359,
1361, 1363, 1367, 1372, 1375, 1377, 1379, 1384,
1386, 1388, 1391, 1393, 1395, 1397, 1399, 1402,
1409, 1415, 1422, 1424, 1426, 1429, 1437, 1439,
1441, 1443, 1446, 1449, 1453, 1456, 1460, 1463,
1465, 1468, 1470, 1474
};
static final short[] _hpricot_scan_indicies = {
335, 336, 337, 296, 356, 296, 349, 296,
399, 296, 401, 296, 354, 296, 350, 296,
400, 296, 308, 308, 296, 308, 309, 309,
308, 309, 309, 296, 328, 330, 329, 331,
329, 328, 329, 329, 329, 329, 296, 310,
302, 311, 312, 0, 310, 296, 353, 296,
342, 296, 347, 296, 346, 296, 343, 296,
304, 304, 296, 304, 305, 306, 304, 296,
321, 320, 321, 321, 321, 321, 321, 321,
296, 319, 320, 319, 319, 319, 319, 319,
319, 296, 298, 302, 0, 298, 296, 298,
300, 307, 302, 0, 298, 296, 6, 222,
6, 13, 358, 302, 0, 358, 69, 1,
0, 1, 302, 1, 69, 6, 182, 6,
5, 322, 323, 322, 322, 322, 322, 322,
322, 322, 296, 299, 303, 299, 299, 299,
299, 299, 299, 299, 296, 297, 297, 299,
303, 302, 0, 299, 298, 299, 299, 299,
299, 296, 297, 297, 300, 301, 302, 0,
299, 298, 299, 299, 299, 299, 296, 186,
186, 188, 42, 184, 185, 188, 187, 188,
188, 188, 188, 182, 43, 43, 38, 44,
40, 34, 41, 37, 41, 41, 41, 41,
5, 37, 38, 39, 40, 34, 37, 5,
63, 224, 223, 63, 64, 62, 371, 6,
40, 34, 371, 5, 35, 36, 34, 26,
27, 1, 26, 0, 36, 6, 40, 36,
5, 60, 6, 61, 58, 60, 13, 35,
59, 58, 59, 6, 61, 59, 13, 183,
6, 184, 185, 183, 182, 41, 42, 41,
41, 41, 41, 41, 41, 41, 5, 403,
296, 351, 296, 352, 296, 345, 296, 348,
296, 398, 296, 344, 296, 341, 296, 402,
296, 397, 296, 355, 296, 338, 338, 338,
338, 296, 332, 334, 333, 333, 332, 333,
333, 333, 333, 296, 313, 314, 313, 296,
324, 326, 327, 325, 325, 324, 325, 325,
325, 296, 315, 317, 318, 316, 316, 315,
316, 316, 316, 296, 364, 366, 367, 368,
365, 365, 364, 365, 365, 365, 69, 359,
361, 362, 162, 360, 360, 359, 360, 360,
360, 69, 369, 69, 157, 157, 159, 160,
161, 69, 162, 157, 158, 156, 66, 66,
68, 69, 70, 66, 67, 65, 363, 361,
162, 360, 360, 363, 360, 360, 360, 69,
66, 66, 74, 69, 76, 73, 73, 66,
67, 73, 73, 73, 65, 132, 132, 135,
69, 136, 137, 134, 134, 132, 133, 134,
134, 134, 65, 71, 71, 74, 69, 75,
76, 73, 73, 71, 72, 73, 73, 73,
65, 66, 66, 68, 69, 70, 66, 67,
65, 226, 226, 228, 229, 230, 69, 70,
226, 227, 156, 163, 163, 159, 160, 161,
69, 162, 165, 165, 163, 164, 165, 165,
165, 156, 226, 226, 228, 229, 231, 69,
76, 165, 165, 226, 227, 165, 165, 165,
156, 248, 248, 84, 246, 199, 250, 195,
248, 249, 189, 92, 92, 84, 95, 7,
96, 97, 92, 93, 91, 372, 3, 48,
50, 47, 8, 47, 372, 47, 47, 47,
7, 3, 8, 7, 11, 8, 7, 122,
3, 124, 125, 126, 123, 8, 123, 122,
123, 123, 123, 7, 46, 3, 48, 49,
50, 47, 8, 47, 46, 47, 47, 47,
7, 3, 45, 8, 7, 190, 190, 192,
193, 194, 7, 50, 195, 190, 191, 189,
196, 196, 192, 193, 194, 7, 50, 195,
196, 197, 189, 196, 196, 192, 193, 194,
7, 50, 198, 195, 198, 196, 197, 198,
198, 198, 189, 242, 242, 244, 245, 247,
7, 103, 198, 195, 198, 242, 243, 198,
198, 198, 189, 248, 248, 84, 247, 199,
251, 198, 195, 198, 248, 249, 198, 198,
198, 189, 92, 92, 84, 101, 7, 103,
100, 97, 100, 92, 93, 100, 100, 100,
91, 144, 144, 84, 147, 7, 148, 149,
146, 97, 146, 144, 145, 146, 146, 146,
91, 98, 98, 84, 101, 7, 102, 103,
100, 97, 100, 98, 99, 100, 100, 100,
91, 92, 92, 84, 95, 7, 96, 97,
92, 93, 91, 92, 92, 94, 95, 7,
96, 97, 92, 93, 91, 242, 242, 244,
245, 246, 7, 96, 195, 242, 243, 189,
258, 258, 263, 94, 256, 215, 261, 211,
258, 259, 205, 105, 105, 80, 94, 108,
9, 109, 110, 105, 106, 104, 373, 10,
11, 55, 57, 54, 12, 54, 373, 54,
54, 54, 9, 10, 11, 12, 9, 370,
3, 31, 33, 30, 4, 30, 370, 30,
30, 30, 2, 3, 4, 2, 10, 4,
2, 117, 3, 119, 120, 121, 118, 4,
118, 117, 118, 118, 118, 2, 29, 3,
31, 32, 33, 30, 4, 30, 29, 30,
30, 30, 2, 3, 28, 4, 2, 167,
167, 169, 170, 171, 2, 33, 172, 167,
168, 166, 78, 78, 84, 81, 2, 82,
83, 78, 79, 77, 78, 78, 84, 88,
2, 90, 87, 83, 87, 78, 79, 87,
87, 87, 77, 138, 138, 84, 141, 2,
142, 143, 140, 83, 140, 138, 139, 140,
140, 140, 77, 85, 85, 84, 88, 2,
89, 90, 87, 83, 87, 85, 86, 87,
87, 87, 77, 78, 78, 84, 81, 2,
82, 83, 78, 79, 77, 78, 78, 80,
81, 2, 82, 83, 78, 79, 77, 232,
232, 234, 235, 236, 2, 82, 172, 232,
233, 166, 173, 173, 169, 170, 171, 2,
33, 175, 172, 175, 173, 174, 175, 175,
175, 166, 232, 232, 234, 235, 237, 2,
90, 175, 172, 175, 232, 233, 175, 175,
175, 166, 258, 258, 80, 260, 256, 215,
261, 211, 258, 259, 205, 105, 105, 80,
94, 114, 9, 116, 113, 110, 113, 105,
106, 113, 113, 113, 104, 150, 150, 80,
94, 153, 9, 154, 155, 152, 110, 152,
150, 151, 152, 152, 152, 104, 53, 10,
11, 55, 56, 57, 54, 12, 54, 53,
54, 54, 54, 9, 127, 10, 11, 129,
130, 131, 128, 12, 128, 127, 128, 128,
128, 9, 10, 11, 52, 12, 9, 51,
51, 12, 9, 206, 206, 208, 209, 210,
9, 57, 211, 206, 207, 205, 212, 212,
208, 209, 210, 9, 57, 211, 212, 213,
205, 212, 212, 208, 209, 210, 9, 57,
214, 211, 214, 212, 213, 214, 214, 214,
205, 252, 252, 254, 255, 257, 9, 116,
214, 211, 214, 252, 253, 214, 214, 214,
205, 258, 258, 80, 260, 257, 215, 262,
214, 211, 214, 258, 259, 214, 214, 214,
205, 105, 105, 80, 94, 108, 9, 109,
110, 105, 106, 104, 105, 105, 107, 107,
108, 9, 109, 110, 105, 106, 104, 258,
258, 263, 94, 257, 215, 262, 214, 211,
214, 258, 259, 214, 214, 214, 205, 218,
10, 216, 220, 221, 219, 217, 219, 218,
219, 219, 219, 215, 218, 225, 11, 220,
221, 219, 217, 219, 218, 219, 219, 219,
215, 252, 252, 254, 255, 256, 9, 109,
211, 252, 253, 205, 111, 111, 80, 94,
114, 9, 115, 116, 113, 110, 113, 111,
112, 113, 113, 113, 104, 238, 238, 84,
237, 176, 241, 175, 172, 175, 238, 239,
175, 175, 175, 166, 10, 216, 217, 215,
178, 3, 180, 181, 179, 177, 179, 178,
179, 179, 179, 176, 173, 173, 169, 170,
171, 2, 33, 172, 173, 174, 166, 201,
3, 203, 204, 202, 200, 202, 201, 202,
202, 202, 199, 225, 11, 217, 215, 238,
238, 84, 236, 176, 240, 172, 238, 239,
166, 3, 200, 199, 3, 177, 176, 163,
163, 159, 160, 161, 69, 162, 163, 164,
156, 339, 339, 340, 339, 339, 296, 15,
357, 357, 15, 357, 357, 357, 357, 296,
15, 357, 357, 408, 15, 357, 357, 357,
357, 296, 15, 357, 357, 404, 15, 357,
357, 357, 357, 296, 16, 357, 357, 16,
357, 357, 357, 357, 296, 287, 264, 294,
264, 396, 264, 387, 264, 393, 264, 268,
264, 268, 265, 268, 264, 265, 266, 267,
265, 264, 282, 282, 282, 282, 282, 264,
275, 276, 276, 276, 276, 276, 264, 269,
270, 271, 269, 264, 269, 270, 271, 272,
273, 269, 264, 270, 264, 388, 264, 285,
264, 394, 264, 385, 264, 289, 264, 390,
264, 288, 264, 288, 374, 288, 264, 374,
375, 376, 374, 264, 283, 283, 264, 277,
278, 278, 278, 278, 278, 264, 274, 270,
271, 274, 264, 274, 270, 271, 273, 274,
264, 295, 264, 384, 264, 389, 264, 286,
264, 284, 264, 290, 264, 395, 264, 391,
264, 380, 264, 380, 377, 380, 264, 377,
378, 379, 377, 264, 291, 292, 264, 293,
264, 279, 264, 381, 270, 271, 381, 264,
386, 264, 293, 264, 405, 406, 264, 392,
264, 279, 264, 407, 264, 392, 264, 383,
383, 264, 277, 281, 281, 281, 281, 281,
264, 382, 382, 382, 382, 382, 264, 275,
280, 280, 280, 280, 280, 264, 415, 414,
422, 421, 24, 25, 23, 19, 20, 21,
22, 21, 21, 21, 18, 6, 5, 1,
0, 6, 13, 3, 8, 7, 3, 4,
2, 10, 11, 12, 9, 15, 15, 14,
16, 17, 16, 14, 412, 413, 411, 410,
409, 419, 420, 418, 417, 416, 426, 424,
427, 425, 424, 423, 0
};
static final short[] _hpricot_scan_trans_targs_wi = {
26, 27, 101, 69, 102, 29, 25, 80,
81, 99, 100, 79, 122, 24, 204, 212,
213, 150, 204, 0, 59, 62, 145, 204,
204, 205, 41, 207, 210, 104, 103, 105,
106, 210, 40, 41, 42, 36, 37, 46,
206, 47, 32, 35, 34, 209, 83, 82,
84, 85, 209, 98, 211, 119, 120, 121,
123, 211, 44, 45, 43, 208, 38, 39,
43, 68, 69, 70, 73, 204, 204, 65,
72, 71, 73, 74, 204, 107, 100, 108,
108, 111, 210, 112, 70, 104, 110, 109,
111, 113, 210, 78, 79, 90, 90, 93,
209, 94, 83, 92, 91, 93, 95, 209,
97, 98, 117, 117, 128, 211, 129, 119,
134, 118, 128, 133, 211, 104, 103, 105,
106, 210, 83, 82, 84, 85, 209, 119,
120, 121, 123, 211, 65, 72, 71, 73,
74, 204, 104, 110, 109, 111, 113, 210,
83, 92, 91, 93, 95, 209, 119, 134,
118, 128, 133, 211, 68, 144, 74, 142,
143, 73, 204, 75, 76, 71, 107, 138,
113, 136, 137, 111, 112, 114, 115, 109,
101, 102, 100, 103, 105, 210, 29, 39,
206, 40, 35, 36, 47, 78, 86, 95,
139, 140, 93, 94, 87, 88, 91, 80,
81, 79, 82, 84, 209, 97, 124, 133,
131, 132, 128, 129, 125, 126, 118, 99,
79, 122, 98, 120, 121, 211, 24, 38,
43, 100, 75, 76, 77, 141, 73, 73,
114, 115, 116, 135, 111, 111, 100, 108,
210, 210, 87, 88, 89, 96, 93, 93,
79, 90, 209, 209, 125, 126, 127, 130,
128, 128, 98, 117, 90, 211, 211, 108,
204, 157, 158, 200, 156, 161, 204, 162,
163, 176, 175, 160, 159, 174, 173, 190,
201, 199, 159, 173, 181, 165, 180, 151,
170, 168, 182, 188, 191, 189, 152, 177,
204, 33, 22, 31, 23, 34, 204, 32,
18, 19, 30, 28, 9, 10, 11, 12,
48, 61, 204, 63, 64, 66, 204, 20,
21, 20, 31, 32, 63, 62, 66, 204,
11, 10, 204, 26, 61, 60, 204, 1,
2, 53, 60, 146, 147, 56, 14, 17,
55, 52, 16, 15, 21, 3, 7, 50,
51, 13, 6, 204, 204, 146, 25, 65,
64, 66, 67, 69, 65, 64, 66, 67,
204, 204, 100, 39, 79, 98, 171, 172,
198, 186, 187, 193, 185, 190, 201, 199,
178, 167, 192, 154, 164, 179, 169, 184,
195, 155, 166, 183, 153, 58, 54, 4,
8, 5, 57, 49, 149, 194, 196, 197,
148, 214, 202, 214, 214, 215, 214, 214,
216, 203, 216, 216, 217, 216, 216, 218,
218, 218, 218, 219
};
static final short[] _hpricot_scan_trans_actions_wi = {
0, 0, 0, 7, 0, 0, 21, 0,
0, 0, 7, 7, 0, 0, 65, 0,
31, 0, 67, 0, 0, 1, 0, 63,
132, 178, 0, 144, 147, 0, 174, 23,
0, 186, 0, 21, 0, 0, 0, 21,
144, 0, 111, 0, 111, 147, 0, 174,
23, 0, 186, 7, 147, 0, 174, 23,
0, 186, 0, 0, 0, 144, 0, 21,
21, 0, 9, 9, 102, 73, 162, 9,
9, 174, 117, 0, 170, 0, 9, 9,
7, 102, 205, 0, 7, 9, 9, 174,
117, 0, 215, 0, 9, 9, 7, 102,
205, 0, 9, 9, 174, 117, 0, 215,
0, 9, 9, 7, 102, 205, 0, 9,
9, 174, 117, 0, 215, 11, 0, 108,
11, 210, 11, 0, 108, 11, 210, 11,
0, 108, 11, 210, 105, 105, 0, 158,
11, 195, 105, 105, 0, 158, 11, 232,
105, 105, 0, 158, 11, 232, 105, 105,
0, 158, 11, 232, 3, 3, 3, 0,
0, 87, 120, 3, 3, 190, 3, 3,
3, 0, 7, 87, 3, 3, 3, 190,
3, 3, 3, 190, 87, 200, 3, 3,
182, 3, 3, 3, 3, 3, 3, 3,
7, 0, 87, 3, 3, 3, 190, 3,
3, 3, 190, 87, 200, 3, 3, 3,
7, 7, 87, 3, 3, 3, 190, 3,
75, 3, 3, 190, 87, 200, 3, 3,
84, 99, 78, 78, 0, 0, 150, 154,
78, 78, 0, 7, 150, 154, 78, 78,
220, 226, 78, 78, 7, 0, 150, 154,
78, 78, 220, 226, 78, 78, 7, 7,
150, 154, 78, 78, 75, 220, 226, 99,
69, 0, 0, 0, 0, 0, 49, 0,
0, 0, 0, 13, 0, 15, 0, 17,
0, 0, 3, 3, 0, 0, 0, 0,
0, 0, 0, 3, 3, 0, 0, 0,
71, 0, 0, 0, 0, 19, 51, 19,
0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 55, 0, 114, 0, 53, 0,
19, 3, 3, 81, 5, 0, 5, 93,
5, 0, 90, 5, 5, 0, 96, 0,
0, 0, 1, 25, 25, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 61, 59, 0, 0, 0,
174, 23, 0, 0, 11, 0, 108, 11,
166, 57, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 3, 3,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 3, 3, 0,
0, 35, 0, 33, 123, 31, 37, 135,
41, 0, 39, 126, 31, 43, 138, 47,
141, 45, 129, 0
};
static final short[] _hpricot_scan_to_state_actions = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 27, 0, 0, 0,
0, 0, 0, 0, 0, 0, 27, 0,
27, 0, 27, 0
};
static final short[] _hpricot_scan_from_state_actions = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 29, 0, 0, 0,
0, 0, 0, 0, 0, 0, 29, 0,
29, 0, 29, 0
};
static final int hpricot_scan_start = 204;
static final int hpricot_scan_error = -1;
public final static int BUFSIZE=16384;
private void rb_yield_tokens(IRubyObject sym, IRubyObject tag, IRubyObject attr, IRubyObject raw, boolean taint) {
IRubyObject ary;
if (sym == runtime.newSymbol("text")) {
raw = tag;
}
ary = runtime.newArray(new IRubyObject[]{sym, tag, attr, raw});
if (taint) {
ary.setTaint(true);
tag.setTaint(true);
attr.setTaint(true);
raw.setTaint(true);
}
block.yield(runtime.getCurrentContext(), ary, null, null, false);
}
int cs, act, have = 0, nread = 0, curline = 1, p=-1;
boolean text = false;
int tokstart=-1, tokend;
char[] buf;
Ruby runtime;
IRubyObject attr, bufsize;
IRubyObject[] tag, akey, aval;
int mark_tag, mark_akey, mark_aval;
boolean done = false, ele_open = false;
int buffer_size = 0;
boolean taint = false;
Block block = null;
IRubyObject xmldecl, doctype, procins, stag, etag, emptytag, comment,
cdata, sym_text;
IRubyObject hpricot_scan(IRubyObject recv, IRubyObject port) {
attr = bufsize = runtime.getNil();
tag = new IRubyObject[]{runtime.getNil()};
akey = new IRubyObject[]{runtime.getNil()};
aval = new IRubyObject[]{runtime.getNil()};
RubyClass rb_eHpricotParseError = runtime.getModule("Hpricot").getClass("ParseError");
taint = port.isTaint();
if ( !port.respondsTo("read")) {
if ( port.respondsTo("to_str")) {
port = port.callMethod(runtime.getCurrentContext(),"to_str");
} else {
throw runtime.newArgumentError("bad Hpricot argument, String or IO only please.");
}
}
buffer_size = BUFSIZE;
if (recv.getInstanceVariable("@buffer_size") != null) {
bufsize = recv.getInstanceVariable("@buffer_size");
if (!bufsize.isNil()) {
buffer_size = RubyNumeric.fix2int(bufsize);
}
}
buf = new char[buffer_size];
{
cs = hpricot_scan_start;
tokstart = -1;
tokend = -1;
act = 0;
}
while( !done ) {
IRubyObject str;
p = have;
int pe;
int len, space = buffer_size - have;
if ( space == 0 ) {
/* We've used up the entire buffer storing an already-parsed token
* prefix that must be preserved. Likely caused by super-long attributes.
* See ticket #13. */
rb_raise(rb_eHpricotParseError, "ran out of buffer space on element <" + tag.toString() + ">, starting on line "+curline+".");
}
if (port.respondsTo("read")) {
str = port.callMethod(runtime.getCurrentContext(),"read",runtime.newFixnum(space));
} else {
str = ((RubyString)port).substr(nread,space);
}
str = str.convertToString();
String sss = str.toString();
char[] chars = sss.toCharArray();
System.arraycopy(chars,0,buf,p,chars.length);
len = sss.length();
nread += len;
if ( len < space ) {
len++;
done = true;
}
pe = p + len;
char[] data = buf;
{
int _klen;
int _trans;
int _acts;
int _nacts;
int _keys;
if ( p != pe ) {
_resume: while ( true ) {
_again: do {
_acts = _hpricot_scan_from_state_actions[cs];
_nacts = (int) _hpricot_scan_actions[_acts++];
while ( _nacts-- > 0 ) {
switch ( _hpricot_scan_actions[_acts++] ) {
case 21:
{tokstart = p;}
break;
}
}
_match: do {
_keys = _hpricot_scan_key_offsets[cs];
_trans = _hpricot_scan_index_offsets[cs];
_klen = _hpricot_scan_single_lengths[cs];
if ( _klen > 0 ) {
int _lower = _keys;
int _mid;
int _upper = _keys + _klen - 1;
while (true) {
if ( _upper < _lower )
break;
_mid = _lower + ((_upper-_lower) >> 1);
if ( data[p] < _hpricot_scan_trans_keys[_mid] )
_upper = _mid - 1;
else if ( data[p] > _hpricot_scan_trans_keys[_mid] )
_lower = _mid + 1;
else {
_trans += (_mid - _keys);
break _match;
}
}
_keys += _klen;
_trans += _klen;
}
_klen = _hpricot_scan_range_lengths[cs];
if ( _klen > 0 ) {
int _lower = _keys;
int _mid;
int _upper = _keys + (_klen<<1) - 2;
while (true) {
if ( _upper < _lower )
break;
_mid = _lower + (((_upper-_lower) >> 1) & ~1);
if ( data[p] < _hpricot_scan_trans_keys[_mid] )
_upper = _mid - 2;
else if ( data[p] > _hpricot_scan_trans_keys[_mid+1] )
_lower = _mid + 2;
else {
_trans += ((_mid - _keys)>>1);
break _match;
}
}
_trans += _klen;
}
} while (false);
_trans = _hpricot_scan_indicies[_trans];
cs = _hpricot_scan_trans_targs_wi[_trans];
if ( _hpricot_scan_trans_actions_wi[_trans] == 0 )
break _again;
_acts = _hpricot_scan_trans_actions_wi[_trans];
_nacts = (int) _hpricot_scan_actions[_acts++];
while ( _nacts-- > 0 )
{
switch ( _hpricot_scan_actions[_acts++] )
{
case 0:
{
if (text) {
CAT(tag, p);
ELE(sym_text);
text = false;
}
attr = runtime.getNil();
tag[0] = runtime.getNil();
mark_tag = -1;
ele_open = true;
}
break;
case 1:
{ mark_tag = p; }
break;
case 2:
{ mark_aval = p; }
break;
case 3:
{ mark_akey = p; }
break;
case 4:
{ SET(tag, p); }
break;
case 5:
{ SET(aval, p); }
break;
case 6:
{
if (buf[p-1] == '"' || buf[p-1] == '\'') { SET(aval, p-1); }
else { SET(aval, p); }
}
break;
case 7:
{ SET(akey, p); }
break;
case 8:
{ SET(aval, p); ATTR(rb_str_new2("version"), aval); }
break;
case 9:
{ SET(aval, p); ATTR(rb_str_new2("encoding"), aval); }
break;
case 10:
{ SET(aval, p); ATTR(rb_str_new2("standalone"), aval); }
break;
case 11:
{ SET(aval, p); ATTR(rb_str_new2("public_id"), aval); }
break;
case 12:
{ SET(aval, p); ATTR(rb_str_new2("system_id"), aval); }
break;
case 13:
{
akey[0] = runtime.getNil();
aval[0] = runtime.getNil();
mark_akey = -1;
mark_aval = -1;
}
break;
case 14:
{
ATTR(akey, aval);
}
break;
case 15:
{curline += 1;}
break;
case 16:
{ TEXT_PASS(); }
break;
case 17:
{ EBLK(comment, 3); {cs = 204; if (true) break _again;} }
break;
case 18:
{ EBLK(cdata, 3); {cs = 204; if (true) break _again;} }
break;
case 19:
{ EBLK(procins, 2); {cs = 204; if (true) break _again;} }
break;
case 22:
{tokend = p+1;}
break;
case 23:
{tokend = p+1;{p = ((tokend))-1;}}
break;
case 24:
{tokend = p+1;{ TEXT_PASS(); }{p = ((tokend))-1;}}
break;
case 25:
{tokend = p;{ TEXT_PASS(); }{p = ((tokend))-1;}}
break;
case 26:
{{ TEXT_PASS(); }{p = ((tokend))-1;}}
break;
case 27:
{tokend = p+1;{p = ((tokend))-1;}}
break;
case 28:
{tokend = p+1;{ TEXT_PASS(); }{p = ((tokend))-1;}}
break;
case 29:
{tokend = p;{ TEXT_PASS(); }{p = ((tokend))-1;}}
break;
case 30:
{{ TEXT_PASS(); }{p = ((tokend))-1;}}
break;
case 31:
{tokend = p+1;{p = ((tokend))-1;}}
break;
case 32:
{tokend = p+1;{ TEXT_PASS(); }{p = ((tokend))-1;}}
break;
case 33:
{tokend = p;{ TEXT_PASS(); }{p = ((tokend))-1;}}
break;
case 34:
{act = 8;}
break;
case 35:
{act = 10;}
break;
case 36:
{act = 12;}
break;
case 37:
{act = 15;}
break;
case 38:
{tokend = p+1;{ ELE(xmldecl); }{p = ((tokend))-1;}}
break;
case 39:
{tokend = p+1;{ ELE(doctype); }{p = ((tokend))-1;}}
break;
case 40:
{tokend = p+1;{ ELE(stag); }{p = ((tokend))-1;}}
break;
case 41:
{tokend = p+1;{ ELE(etag); }{p = ((tokend))-1;}}
break;
case 42:
{tokend = p+1;{ ELE(emptytag); }{p = ((tokend))-1;}}
break;
case 43:
{tokend = p+1;{ {{p = ((tokend))-1;}{cs = 214; if (true) break _again;}} }{p = ((tokend))-1;}}
break;
case 44:
{tokend = p+1;{ {{p = ((tokend))-1;}{cs = 216; if (true) break _again;}} }{p = ((tokend))-1;}}
break;
case 45:
{tokend = p+1;{ TEXT_PASS(); }{p = ((tokend))-1;}}
break;
case 46:
{tokend = p;{ {{p = ((tokend))-1;}{cs = 218; if (true) break _again;}} }{p = ((tokend))-1;}}
break;
case 47:
{tokend = p;{ TEXT_PASS(); }{p = ((tokend))-1;}}
break;
case 48:
{{ {{p = ((tokend))-1;}{cs = 218; if (true) break _again;}} }{p = ((tokend))-1;}}
break;
case 49:
{{ TEXT_PASS(); }{p = ((tokend))-1;}}
break;
case 50:
{ switch( act ) {
case 8:
{ ELE(doctype); }
break;
case 10:
{ ELE(stag); }
break;
case 12:
{ ELE(emptytag); }
break;
case 15:
{ TEXT_PASS(); }
break;
default: break;
}
{p = ((tokend))-1;}}
break;
}
}
} while (false);
_acts = _hpricot_scan_to_state_actions[cs];
_nacts = (int) _hpricot_scan_actions[_acts++];
while ( _nacts-- > 0 ) {
switch ( _hpricot_scan_actions[_acts++] ) {
case 20:
{tokstart = -1;}
break;
}
}
if ( ++p == pe )
break _resume;
}
}
}
if ( cs == hpricot_scan_error ) {
if(!tag[0].isNil()) {
rb_raise(rb_eHpricotParseError, "parse error on element <"+tag.toString()+">, starting on line "+curline+".\n" + NO_WAY_SERIOUSLY);
} else {
rb_raise(rb_eHpricotParseError, "parse error on line "+curline+".\n" + NO_WAY_SERIOUSLY);
}
}
if ( done && ele_open ) {
ele_open = false;
if(tokstart > -1) {
mark_tag = tokstart;
tokstart = -1;
text = true;
}
}
if(tokstart == -1) {
have = 0;
/* text nodes have no tokstart because each byte is parsed alone */
if(mark_tag != -1 && text) {
if (done) {
if(mark_tag < p-1) {
CAT(tag, p-1);
ELE(sym_text);
}
} else {
CAT(tag, p);
}
}
mark_tag = 0;
} else {
have = pe - tokstart;
System.arraycopy(buf,tokstart,buf,0,have);
SLIDE(tag);
SLIDE(akey);
SLIDE(aval);
tokend = (tokend - tokstart);
tokstart = 0;
}
}
return runtime.getNil();
}
public static IRubyObject __hpricot_scan(IRubyObject recv, IRubyObject port, Block block) {
Ruby runtime = recv.getRuntime();
HpricotScanService service = new HpricotScanService();
service.runtime = runtime;
service.xmldecl = runtime.newSymbol("xmldecl");
service.doctype = runtime.newSymbol("doctype");
service.procins = runtime.newSymbol("procins");
service.stag = runtime.newSymbol("stag");
service.etag = runtime.newSymbol("etag");
service.emptytag = runtime.newSymbol("emptytag");
service.comment = runtime.newSymbol("comment");
service.cdata = runtime.newSymbol("cdata");
service.sym_text = runtime.newSymbol("text");
service.block = block;
return service.hpricot_scan(recv, port);
}
public boolean basicLoad(final Ruby runtime) throws IOException {
Init_hpricot_scan(runtime);
return true;
}
public static void Init_hpricot_scan(Ruby runtime) {
RubyModule mHpricot = runtime.defineModule("Hpricot");
mHpricot.getMetaClass().attr_accessor(new IRubyObject[]{runtime.newSymbol("buffer_size")});
CallbackFactory fact = runtime.callbackFactory(HpricotScanService.class);
mHpricot.getMetaClass().defineMethod("scan",fact.getSingletonMethod("__hpricot_scan",IRubyObject.class));
mHpricot.defineClassUnder("ParseError",runtime.getClass("Exception"),runtime.getClass("Exception").getAllocator());
}
}