Subversion Repositories Applications.annuaire

Rev

Rev 42 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
42 aurelien 1
<?php
2
 
3
/**
4
 * This module implements a VERY limited parser that finds <link> tags
5
 * in the head of HTML or XHTML documents and parses out their
6
 * attributes according to the OpenID spec. It is a liberal parser,
7
 * but it requires these things from the data in order to work:
8
 *
9
 * - There must be an open <html> tag
10
 *
11
 * - There must be an open <head> tag inside of the <html> tag
12
 *
13
 * - Only <link>s that are found inside of the <head> tag are parsed
14
 *   (this is by design)
15
 *
16
 * - The parser follows the OpenID specification in resolving the
17
 *   attributes of the link tags. This means that the attributes DO
18
 *   NOT get resolved as they would by an XML or HTML parser. In
19
 *   particular, only certain entities get replaced, and href
20
 *   attributes do not get resolved relative to a base URL.
21
 *
22
 * From http://openid.net/specs.bml:
23
 *
24
 * - The openid.server URL MUST be an absolute URL. OpenID consumers
25
 *   MUST NOT attempt to resolve relative URLs.
26
 *
27
 * - The openid.server URL MUST NOT include entities other than &amp;,
28
 *   &lt;, &gt;, and &quot;.
29
 *
30
 * The parser ignores SGML comments and <![CDATA[blocks]]>. Both kinds
31
 * of quoting are allowed for attributes.
32
 *
33
 * The parser deals with invalid markup in these ways:
34
 *
35
 * - Tag names are not case-sensitive
36
 *
37
 * - The <html> tag is accepted even when it is not at the top level
38
 *
39
 * - The <head> tag is accepted even when it is not a direct child of
40
 *   the <html> tag, but a <html> tag must be an ancestor of the
41
 *   <head> tag
42
 *
43
 * - <link> tags are accepted even when they are not direct children
44
 *   of the <head> tag, but a <head> tag must be an ancestor of the
45
 *   <link> tag
46
 *
47
 * - If there is no closing tag for an open <html> or <head> tag, the
48
 *   remainder of the document is viewed as being inside of the
49
 *   tag. If there is no closing tag for a <link> tag, the link tag is
50
 *   treated as a short tag. Exceptions to this rule are that <html>
51
 *   closes <html> and <body> or <head> closes <head>
52
 *
53
 * - Attributes of the <link> tag are not required to be quoted.
54
 *
55
 * - In the case of duplicated attribute names, the attribute coming
56
 *   last in the tag will be the value returned.
57
 *
58
 * - Any text that does not parse as an attribute within a link tag
59
 *   will be ignored. (e.g. <link pumpkin rel='openid.server' /> will
60
 *   ignore pumpkin)
61
 *
62
 * - If there are more than one <html> or <head> tag, the parser only
63
 *   looks inside of the first one.
64
 *
65
 * - The contents of <script> tags are ignored entirely, except
66
 *   unclosed <script> tags. Unclosed <script> tags are ignored.
67
 *
68
 * - Any other invalid markup is ignored, including unclosed SGML
69
 *   comments and unclosed <![CDATA[blocks.
70
 *
71
 * PHP versions 4 and 5
72
 *
73
 * LICENSE: See the COPYING file included in this distribution.
74
 *
75
 * @access private
76
 * @package OpenID
77
 * @author JanRain, Inc. <openid@janrain.com>
78
 * @copyright 2005 Janrain, Inc.
79
 * @license http://www.gnu.org/copyleft/lesser.html LGPL
80
 */
81
 
82
/**
83
 * Require Auth_OpenID::arrayGet().
84
 */
85
require_once "Auth/OpenID.php";
86
 
87
class Auth_OpenID_Parse {
88
 
89
    /**
90
     * Specify some flags for use with regex matching.
91
     */
92
    var $_re_flags = "si";
93
 
94
    /**
95
     * Stuff to remove before we start looking for tags
96
     */
97
    var $_removed_re =
98
           "<!--.*?-->|<!\[CDATA\[.*?\]\]>|<script\b(?!:)[^>]*>.*?<\/script>";
99
 
100
    /**
101
     * Starts with the tag name at a word boundary, where the tag name
102
     * is not a namespace
103
     */
104
    var $_tag_expr = "<%s\b(?!:)([^>]*?)(?:\/>|>(.*?)(?:<\/?%s\s*>|\Z))";
105
 
106
    var $_attr_find = '\b(\w+)=("[^"]*"|\'[^\']*\'|[^\'"\s\/<>]+)';
107
 
108
    function Auth_OpenID_Parse()
109
    {
110
        $this->_link_find = sprintf("/<link\b(?!:)([^>]*)(?!<)>/%s",
111
                                    $this->_re_flags);
112
 
113
        $this->_entity_replacements = array(
114
                                            'amp' => '&',
115
                                            'lt' => '<',
116
                                            'gt' => '>',
117
                                            'quot' => '"'
118
                                            );
119
 
120
        $this->_attr_find = sprintf("/%s/%s",
121
                                    $this->_attr_find,
122
                                    $this->_re_flags);
123
 
124
        $this->_removed_re = sprintf("/%s/%s",
125
                                     $this->_removed_re,
126
                                     $this->_re_flags);
127
 
128
        $this->_ent_replace =
129
            sprintf("&(%s);", implode("|",
130
                                      $this->_entity_replacements));
131
    }
132
 
133
    /**
134
     * Returns a regular expression that will match a given tag in an
135
     * SGML string.
136
     */
137
    function tagMatcher($tag_name, $close_tags = null)
138
    {
139
        if ($close_tags) {
140
            $options = implode("|", array_merge(array($tag_name), $close_tags));
141
            $closer = sprintf("(?:%s)", $options);
142
        } else {
143
            $closer = $tag_name;
144
        }
145
 
146
        $expr = sprintf($this->_tag_expr, $tag_name, $closer);
147
        return sprintf("/%s/%s", $expr, $this->_re_flags);
148
    }
149
 
150
    function htmlFind()
151
    {
152
        return $this->tagMatcher('html');
153
    }
154
 
155
    function headFind()
156
    {
157
        return $this->tagMatcher('head', array('body'));
158
    }
159
 
160
    function replaceEntities($str)
161
    {
162
        foreach ($this->_entity_replacements as $old => $new) {
163
            $str = preg_replace(sprintf("/&%s;/", $old), $new, $str);
164
        }
165
        return $str;
166
    }
167
 
168
    function removeQuotes($str)
169
    {
170
        $matches = array();
171
        $double = '/^"(.*)"$/';
172
        $single = "/^\'(.*)\'$/";
173
 
174
        if (preg_match($double, $str, $matches)) {
175
            return $matches[1];
176
        } else if (preg_match($single, $str, $matches)) {
177
            return $matches[1];
178
        } else {
179
            return $str;
180
        }
181
    }
182
 
183
    /**
184
     * Find all link tags in a string representing a HTML document and
185
     * return a list of their attributes.
186
     *
187
     * @param string $html The text to parse
188
     * @return array $list An array of arrays of attributes, one for each
189
     * link tag
190
     */
191
    function parseLinkAttrs($html)
192
    {
193
        $stripped = preg_replace($this->_removed_re,
194
                                 "",
195
                                 $html);
196
 
197
        // Try to find the <HTML> tag.
198
        $html_re = $this->htmlFind();
199
        $html_matches = array();
200
        if (!preg_match($html_re, $stripped, $html_matches)) {
201
            return array();
202
        }
203
 
204
        // Try to find the <HEAD> tag.
205
        $head_re = $this->headFind();
206
        $head_matches = array();
207
        if (!preg_match($head_re, $html_matches[0], $head_matches)) {
208
            return array();
209
        }
210
 
211
        $link_data = array();
212
        $link_matches = array();
213
 
214
        if (!preg_match_all($this->_link_find, $head_matches[0],
215
                            $link_matches)) {
216
            return array();
217
        }
218
 
219
        foreach ($link_matches[0] as $link) {
220
            $attr_matches = array();
221
            preg_match_all($this->_attr_find, $link, $attr_matches);
222
            $link_attrs = array();
223
            foreach ($attr_matches[0] as $index => $full_match) {
224
                $name = $attr_matches[1][$index];
225
                $value = $this->replaceEntities(
226
                              $this->removeQuotes($attr_matches[2][$index]));
227
 
228
                $link_attrs[strtolower($name)] = $value;
229
            }
230
            $link_data[] = $link_attrs;
231
        }
232
 
233
        return $link_data;
234
    }
235
 
236
    function relMatches($rel_attr, $target_rel)
237
    {
238
        // Does this target_rel appear in the rel_str?
239
        // XXX: TESTME
240
        $rels = preg_split("/\s+/", trim($rel_attr));
241
        foreach ($rels as $rel) {
242
            $rel = strtolower($rel);
243
            if ($rel == $target_rel) {
244
                return 1;
245
            }
246
        }
247
 
248
        return 0;
249
    }
250
 
251
    function linkHasRel($link_attrs, $target_rel)
252
    {
253
        // Does this link have target_rel as a relationship?
254
        // XXX: TESTME
255
        $rel_attr = Auth_OpeniD::arrayGet($link_attrs, 'rel', null);
256
        return ($rel_attr && $this->relMatches($rel_attr,
257
                                               $target_rel));
258
    }
259
 
260
    function findLinksRel($link_attrs_list, $target_rel)
261
    {
262
        // Filter the list of link attributes on whether it has
263
        // target_rel as a relationship.
264
        // XXX: TESTME
265
        $result = array();
266
        foreach ($link_attrs_list as $attr) {
267
            if ($this->linkHasRel($attr, $target_rel)) {
268
                $result[] = $attr;
269
            }
270
        }
271
 
272
        return $result;
273
    }
274
 
275
    function findFirstHref($link_attrs_list, $target_rel)
276
    {
277
        // Return the value of the href attribute for the first link
278
        // tag in the list that has target_rel as a relationship.
279
        // XXX: TESTME
280
        $matches = $this->findLinksRel($link_attrs_list,
281
                                       $target_rel);
282
        if (!$matches) {
283
            return null;
284
        }
285
        $first = $matches[0];
286
        return Auth_OpenID::arrayGet($first, 'href', null);
287
    }
288
}
289
 
290
function Auth_OpenID_legacy_discover($html_text)
291
{
292
    $p = new Auth_OpenID_Parse();
293
 
294
    $link_attrs = $p->parseLinkAttrs($html_text);
295
 
296
    $server_url = $p->findFirstHref($link_attrs,
297
                                    'openid.server');
298
 
299
    if ($server_url === null) {
300
        return false;
301
    } else {
302
        $delegate_url = $p->findFirstHref($link_attrs,
303
                                          'openid.delegate');
304
        return array($delegate_url, $server_url);
305
    }
306
}
307
 
308
?>