| 42 | aurelien | 1 | <?php
 | 
        
           |  |  | 2 |   | 
        
           |  |  | 3 | /**
 | 
        
           |  |  | 4 |  * This module implements a VERY limited parser that finds <link> tags
 | 
        
           |  |  | 5 |  * in the head of HTML or XHTML documents and parses out their
 | 
        
           |  |  | 6 |  * attributes according to the OpenID spec. It is a liberal parser,
 | 
        
           |  |  | 7 |  * but it requires these things from the data in order to work:
 | 
        
           |  |  | 8 |  *
 | 
        
           |  |  | 9 |  * - There must be an open <html> tag
 | 
        
           |  |  | 10 |  *
 | 
        
           |  |  | 11 |  * - There must be an open <head> tag inside of the <html> tag
 | 
        
           |  |  | 12 |  *
 | 
        
           |  |  | 13 |  * - Only <link>s that are found inside of the <head> tag are parsed
 | 
        
           |  |  | 14 |  *   (this is by design)
 | 
        
           |  |  | 15 |  *
 | 
        
           |  |  | 16 |  * - The parser follows the OpenID specification in resolving the
 | 
        
           |  |  | 17 |  *   attributes of the link tags. This means that the attributes DO
 | 
        
           |  |  | 18 |  *   NOT get resolved as they would by an XML or HTML parser. In
 | 
        
           |  |  | 19 |  *   particular, only certain entities get replaced, and href
 | 
        
           |  |  | 20 |  *   attributes do not get resolved relative to a base URL.
 | 
        
           |  |  | 21 |  *
 | 
        
           |  |  | 22 |  * From http://openid.net/specs.bml:
 | 
        
           |  |  | 23 |  *
 | 
        
           |  |  | 24 |  * - The openid.server URL MUST be an absolute URL. OpenID consumers
 | 
        
           |  |  | 25 |  *   MUST NOT attempt to resolve relative URLs.
 | 
        
           |  |  | 26 |  *
 | 
        
           |  |  | 27 |  * - The openid.server URL MUST NOT include entities other than &,
 | 
        
           |  |  | 28 |  *   <, >, and ".
 | 
        
           |  |  | 29 |  *
 | 
        
           |  |  | 30 |  * The parser ignores SGML comments and <![CDATA[blocks]]>. Both kinds
 | 
        
           |  |  | 31 |  * of quoting are allowed for attributes.
 | 
        
           |  |  | 32 |  *
 | 
        
           |  |  | 33 |  * The parser deals with invalid markup in these ways:
 | 
        
           |  |  | 34 |  *
 | 
        
           |  |  | 35 |  * - Tag names are not case-sensitive
 | 
        
           |  |  | 36 |  *
 | 
        
           |  |  | 37 |  * - The <html> tag is accepted even when it is not at the top level
 | 
        
           |  |  | 38 |  *
 | 
        
           |  |  | 39 |  * - The <head> tag is accepted even when it is not a direct child of
 | 
        
           |  |  | 40 |  *   the <html> tag, but a <html> tag must be an ancestor of the
 | 
        
           |  |  | 41 |  *   <head> tag
 | 
        
           |  |  | 42 |  *
 | 
        
           |  |  | 43 |  * - <link> tags are accepted even when they are not direct children
 | 
        
           |  |  | 44 |  *   of the <head> tag, but a <head> tag must be an ancestor of the
 | 
        
           |  |  | 45 |  *   <link> tag
 | 
        
           |  |  | 46 |  *
 | 
        
           |  |  | 47 |  * - If there is no closing tag for an open <html> or <head> tag, the
 | 
        
           |  |  | 48 |  *   remainder of the document is viewed as being inside of the
 | 
        
           |  |  | 49 |  *   tag. If there is no closing tag for a <link> tag, the link tag is
 | 
        
           |  |  | 50 |  *   treated as a short tag. Exceptions to this rule are that <html>
 | 
        
           |  |  | 51 |  *   closes <html> and <body> or <head> closes <head>
 | 
        
           |  |  | 52 |  *
 | 
        
           |  |  | 53 |  * - Attributes of the <link> tag are not required to be quoted.
 | 
        
           |  |  | 54 |  *
 | 
        
           |  |  | 55 |  * - In the case of duplicated attribute names, the attribute coming
 | 
        
           |  |  | 56 |  *   last in the tag will be the value returned.
 | 
        
           |  |  | 57 |  *
 | 
        
           |  |  | 58 |  * - Any text that does not parse as an attribute within a link tag
 | 
        
           |  |  | 59 |  *   will be ignored. (e.g. <link pumpkin rel='openid.server' /> will
 | 
        
           |  |  | 60 |  *   ignore pumpkin)
 | 
        
           |  |  | 61 |  *
 | 
        
           |  |  | 62 |  * - If there are more than one <html> or <head> tag, the parser only
 | 
        
           |  |  | 63 |  *   looks inside of the first one.
 | 
        
           |  |  | 64 |  *
 | 
        
           |  |  | 65 |  * - The contents of <script> tags are ignored entirely, except
 | 
        
           |  |  | 66 |  *   unclosed <script> tags. Unclosed <script> tags are ignored.
 | 
        
           |  |  | 67 |  *
 | 
        
           |  |  | 68 |  * - Any other invalid markup is ignored, including unclosed SGML
 | 
        
           |  |  | 69 |  *   comments and unclosed <![CDATA[blocks.
 | 
        
           |  |  | 70 |  *
 | 
        
           |  |  | 71 |  * PHP versions 4 and 5
 | 
        
           |  |  | 72 |  *
 | 
        
           |  |  | 73 |  * LICENSE: See the COPYING file included in this distribution.
 | 
        
           |  |  | 74 |  *
 | 
        
           |  |  | 75 |  * @access private
 | 
        
           |  |  | 76 |  * @package OpenID
 | 
        
           |  |  | 77 |  * @author JanRain, Inc. <openid@janrain.com>
 | 
        
           |  |  | 78 |  * @copyright 2005 Janrain, Inc.
 | 
        
           |  |  | 79 |  * @license http://www.gnu.org/copyleft/lesser.html LGPL
 | 
        
           |  |  | 80 |  */
 | 
        
           |  |  | 81 |   | 
        
           |  |  | 82 | /**
 | 
        
           |  |  | 83 |  * Require Auth_OpenID::arrayGet().
 | 
        
           |  |  | 84 |  */
 | 
        
           |  |  | 85 | require_once "Auth/OpenID.php";
 | 
        
           |  |  | 86 |   | 
        
           |  |  | 87 | class Auth_OpenID_Parse {
 | 
        
           |  |  | 88 |   | 
        
           |  |  | 89 |     /**
 | 
        
           |  |  | 90 |      * Specify some flags for use with regex matching.
 | 
        
           |  |  | 91 |      */
 | 
        
           |  |  | 92 |     var $_re_flags = "si";
 | 
        
           |  |  | 93 |   | 
        
           |  |  | 94 |     /**
 | 
        
           |  |  | 95 |      * Stuff to remove before we start looking for tags
 | 
        
           |  |  | 96 |      */
 | 
        
           |  |  | 97 |     var $_removed_re =
 | 
        
           |  |  | 98 |            "<!--.*?-->|<!\[CDATA\[.*?\]\]>|<script\b(?!:)[^>]*>.*?<\/script>";
 | 
        
           |  |  | 99 |   | 
        
           |  |  | 100 |     /**
 | 
        
           |  |  | 101 |      * Starts with the tag name at a word boundary, where the tag name
 | 
        
           |  |  | 102 |      * is not a namespace
 | 
        
           |  |  | 103 |      */
 | 
        
           |  |  | 104 |     var $_tag_expr = "<%s\b(?!:)([^>]*?)(?:\/>|>(.*?)(?:<\/?%s\s*>|\Z))";
 | 
        
           |  |  | 105 |   | 
        
           |  |  | 106 |     var $_attr_find = '\b(\w+)=("[^"]*"|\'[^\']*\'|[^\'"\s\/<>]+)';
 | 
        
           |  |  | 107 |   | 
        
           |  |  | 108 |     function Auth_OpenID_Parse()
 | 
        
           |  |  | 109 |     {
 | 
        
           |  |  | 110 |         $this->_link_find = sprintf("/<link\b(?!:)([^>]*)(?!<)>/%s",
 | 
        
           |  |  | 111 |                                     $this->_re_flags);
 | 
        
           |  |  | 112 |   | 
        
           |  |  | 113 |         $this->_entity_replacements = array(
 | 
        
           |  |  | 114 |                                             'amp' => '&',
 | 
        
           |  |  | 115 |                                             'lt' => '<',
 | 
        
           |  |  | 116 |                                             'gt' => '>',
 | 
        
           |  |  | 117 |                                             'quot' => '"'
 | 
        
           |  |  | 118 |                                             );
 | 
        
           |  |  | 119 |   | 
        
           |  |  | 120 |         $this->_attr_find = sprintf("/%s/%s",
 | 
        
           |  |  | 121 |                                     $this->_attr_find,
 | 
        
           |  |  | 122 |                                     $this->_re_flags);
 | 
        
           |  |  | 123 |   | 
        
           |  |  | 124 |         $this->_removed_re = sprintf("/%s/%s",
 | 
        
           |  |  | 125 |                                      $this->_removed_re,
 | 
        
           |  |  | 126 |                                      $this->_re_flags);
 | 
        
           |  |  | 127 |   | 
        
           |  |  | 128 |         $this->_ent_replace =
 | 
        
           |  |  | 129 |             sprintf("&(%s);", implode("|",
 | 
        
           |  |  | 130 |                                       $this->_entity_replacements));
 | 
        
           |  |  | 131 |     }
 | 
        
           |  |  | 132 |   | 
        
           |  |  | 133 |     /**
 | 
        
           |  |  | 134 |      * Returns a regular expression that will match a given tag in an
 | 
        
           |  |  | 135 |      * SGML string.
 | 
        
           |  |  | 136 |      */
 | 
        
           |  |  | 137 |     function tagMatcher($tag_name, $close_tags = null)
 | 
        
           |  |  | 138 |     {
 | 
        
           |  |  | 139 |         if ($close_tags) {
 | 
        
           |  |  | 140 |             $options = implode("|", array_merge(array($tag_name), $close_tags));
 | 
        
           |  |  | 141 |             $closer = sprintf("(?:%s)", $options);
 | 
        
           |  |  | 142 |         } else {
 | 
        
           |  |  | 143 |             $closer = $tag_name;
 | 
        
           |  |  | 144 |         }
 | 
        
           |  |  | 145 |   | 
        
           |  |  | 146 |         $expr = sprintf($this->_tag_expr, $tag_name, $closer);
 | 
        
           |  |  | 147 |         return sprintf("/%s/%s", $expr, $this->_re_flags);
 | 
        
           |  |  | 148 |     }
 | 
        
           |  |  | 149 |   | 
        
           |  |  | 150 |     function htmlFind()
 | 
        
           |  |  | 151 |     {
 | 
        
           |  |  | 152 |         return $this->tagMatcher('html');
 | 
        
           |  |  | 153 |     }
 | 
        
           |  |  | 154 |   | 
        
           |  |  | 155 |     function headFind()
 | 
        
           |  |  | 156 |     {
 | 
        
           |  |  | 157 |         return $this->tagMatcher('head', array('body'));
 | 
        
           |  |  | 158 |     }
 | 
        
           |  |  | 159 |   | 
        
           |  |  | 160 |     function replaceEntities($str)
 | 
        
           |  |  | 161 |     {
 | 
        
           |  |  | 162 |         foreach ($this->_entity_replacements as $old => $new) {
 | 
        
           |  |  | 163 |             $str = preg_replace(sprintf("/&%s;/", $old), $new, $str);
 | 
        
           |  |  | 164 |         }
 | 
        
           |  |  | 165 |         return $str;
 | 
        
           |  |  | 166 |     }
 | 
        
           |  |  | 167 |   | 
        
           |  |  | 168 |     function removeQuotes($str)
 | 
        
           |  |  | 169 |     {
 | 
        
           |  |  | 170 |         $matches = array();
 | 
        
           |  |  | 171 |         $double = '/^"(.*)"$/';
 | 
        
           |  |  | 172 |         $single = "/^\'(.*)\'$/";
 | 
        
           |  |  | 173 |   | 
        
           |  |  | 174 |         if (preg_match($double, $str, $matches)) {
 | 
        
           |  |  | 175 |             return $matches[1];
 | 
        
           |  |  | 176 |         } else if (preg_match($single, $str, $matches)) {
 | 
        
           |  |  | 177 |             return $matches[1];
 | 
        
           |  |  | 178 |         } else {
 | 
        
           |  |  | 179 |             return $str;
 | 
        
           |  |  | 180 |         }
 | 
        
           |  |  | 181 |     }
 | 
        
           |  |  | 182 |   | 
        
           |  |  | 183 |     /**
 | 
        
           |  |  | 184 |      * Find all link tags in a string representing a HTML document and
 | 
        
           |  |  | 185 |      * return a list of their attributes.
 | 
        
           |  |  | 186 |      *
 | 
        
           |  |  | 187 |      * @param string $html The text to parse
 | 
        
           |  |  | 188 |      * @return array $list An array of arrays of attributes, one for each
 | 
        
           |  |  | 189 |      * link tag
 | 
        
           |  |  | 190 |      */
 | 
        
           |  |  | 191 |     function parseLinkAttrs($html)
 | 
        
           |  |  | 192 |     {
 | 
        
           |  |  | 193 |         $stripped = preg_replace($this->_removed_re,
 | 
        
           |  |  | 194 |                                  "",
 | 
        
           |  |  | 195 |                                  $html);
 | 
        
           |  |  | 196 |   | 
        
           |  |  | 197 |         // Try to find the <HTML> tag.
 | 
        
           |  |  | 198 |         $html_re = $this->htmlFind();
 | 
        
           |  |  | 199 |         $html_matches = array();
 | 
        
           |  |  | 200 |         if (!preg_match($html_re, $stripped, $html_matches)) {
 | 
        
           |  |  | 201 |             return array();
 | 
        
           |  |  | 202 |         }
 | 
        
           |  |  | 203 |   | 
        
           |  |  | 204 |         // Try to find the <HEAD> tag.
 | 
        
           |  |  | 205 |         $head_re = $this->headFind();
 | 
        
           |  |  | 206 |         $head_matches = array();
 | 
        
           |  |  | 207 |         if (!preg_match($head_re, $html_matches[0], $head_matches)) {
 | 
        
           |  |  | 208 |             return array();
 | 
        
           |  |  | 209 |         }
 | 
        
           |  |  | 210 |   | 
        
           |  |  | 211 |         $link_data = array();
 | 
        
           |  |  | 212 |         $link_matches = array();
 | 
        
           |  |  | 213 |   | 
        
           |  |  | 214 |         if (!preg_match_all($this->_link_find, $head_matches[0],
 | 
        
           |  |  | 215 |                             $link_matches)) {
 | 
        
           |  |  | 216 |             return array();
 | 
        
           |  |  | 217 |         }
 | 
        
           |  |  | 218 |   | 
        
           |  |  | 219 |         foreach ($link_matches[0] as $link) {
 | 
        
           |  |  | 220 |             $attr_matches = array();
 | 
        
           |  |  | 221 |             preg_match_all($this->_attr_find, $link, $attr_matches);
 | 
        
           |  |  | 222 |             $link_attrs = array();
 | 
        
           |  |  | 223 |             foreach ($attr_matches[0] as $index => $full_match) {
 | 
        
           |  |  | 224 |                 $name = $attr_matches[1][$index];
 | 
        
           |  |  | 225 |                 $value = $this->replaceEntities(
 | 
        
           |  |  | 226 |                               $this->removeQuotes($attr_matches[2][$index]));
 | 
        
           |  |  | 227 |   | 
        
           |  |  | 228 |                 $link_attrs[strtolower($name)] = $value;
 | 
        
           |  |  | 229 |             }
 | 
        
           |  |  | 230 |             $link_data[] = $link_attrs;
 | 
        
           |  |  | 231 |         }
 | 
        
           |  |  | 232 |   | 
        
           |  |  | 233 |         return $link_data;
 | 
        
           |  |  | 234 |     }
 | 
        
           |  |  | 235 |   | 
        
           |  |  | 236 |     function relMatches($rel_attr, $target_rel)
 | 
        
           |  |  | 237 |     {
 | 
        
           |  |  | 238 |         // Does this target_rel appear in the rel_str?
 | 
        
           |  |  | 239 |         // XXX: TESTME
 | 
        
           |  |  | 240 |         $rels = preg_split("/\s+/", trim($rel_attr));
 | 
        
           |  |  | 241 |         foreach ($rels as $rel) {
 | 
        
           |  |  | 242 |             $rel = strtolower($rel);
 | 
        
           |  |  | 243 |             if ($rel == $target_rel) {
 | 
        
           |  |  | 244 |                 return 1;
 | 
        
           |  |  | 245 |             }
 | 
        
           |  |  | 246 |         }
 | 
        
           |  |  | 247 |   | 
        
           |  |  | 248 |         return 0;
 | 
        
           |  |  | 249 |     }
 | 
        
           |  |  | 250 |   | 
        
           |  |  | 251 |     function linkHasRel($link_attrs, $target_rel)
 | 
        
           |  |  | 252 |     {
 | 
        
           |  |  | 253 |         // Does this link have target_rel as a relationship?
 | 
        
           |  |  | 254 |         // XXX: TESTME
 | 
        
           |  |  | 255 |         $rel_attr = Auth_OpeniD::arrayGet($link_attrs, 'rel', null);
 | 
        
           |  |  | 256 |         return ($rel_attr && $this->relMatches($rel_attr,
 | 
        
           |  |  | 257 |                                                $target_rel));
 | 
        
           |  |  | 258 |     }
 | 
        
           |  |  | 259 |   | 
        
           |  |  | 260 |     function findLinksRel($link_attrs_list, $target_rel)
 | 
        
           |  |  | 261 |     {
 | 
        
           |  |  | 262 |         // Filter the list of link attributes on whether it has
 | 
        
           |  |  | 263 |         // target_rel as a relationship.
 | 
        
           |  |  | 264 |         // XXX: TESTME
 | 
        
           |  |  | 265 |         $result = array();
 | 
        
           |  |  | 266 |         foreach ($link_attrs_list as $attr) {
 | 
        
           |  |  | 267 |             if ($this->linkHasRel($attr, $target_rel)) {
 | 
        
           |  |  | 268 |                 $result[] = $attr;
 | 
        
           |  |  | 269 |             }
 | 
        
           |  |  | 270 |         }
 | 
        
           |  |  | 271 |   | 
        
           |  |  | 272 |         return $result;
 | 
        
           |  |  | 273 |     }
 | 
        
           |  |  | 274 |   | 
        
           |  |  | 275 |     function findFirstHref($link_attrs_list, $target_rel)
 | 
        
           |  |  | 276 |     {
 | 
        
           |  |  | 277 |         // Return the value of the href attribute for the first link
 | 
        
           |  |  | 278 |         // tag in the list that has target_rel as a relationship.
 | 
        
           |  |  | 279 |         // XXX: TESTME
 | 
        
           |  |  | 280 |         $matches = $this->findLinksRel($link_attrs_list,
 | 
        
           |  |  | 281 |                                        $target_rel);
 | 
        
           |  |  | 282 |         if (!$matches) {
 | 
        
           |  |  | 283 |             return null;
 | 
        
           |  |  | 284 |         }
 | 
        
           |  |  | 285 |         $first = $matches[0];
 | 
        
           |  |  | 286 |         return Auth_OpenID::arrayGet($first, 'href', null);
 | 
        
           |  |  | 287 |     }
 | 
        
           |  |  | 288 | }
 | 
        
           |  |  | 289 |   | 
        
           |  |  | 290 | function Auth_OpenID_legacy_discover($html_text)
 | 
        
           |  |  | 291 | {
 | 
        
           |  |  | 292 |     $p = new Auth_OpenID_Parse();
 | 
        
           |  |  | 293 |   | 
        
           |  |  | 294 |     $link_attrs = $p->parseLinkAttrs($html_text);
 | 
        
           |  |  | 295 |   | 
        
           |  |  | 296 |     $server_url = $p->findFirstHref($link_attrs,
 | 
        
           |  |  | 297 |                                     'openid.server');
 | 
        
           |  |  | 298 |   | 
        
           |  |  | 299 |     if ($server_url === null) {
 | 
        
           |  |  | 300 |         return false;
 | 
        
           |  |  | 301 |     } else {
 | 
        
           |  |  | 302 |         $delegate_url = $p->findFirstHref($link_attrs,
 | 
        
           |  |  | 303 |                                           'openid.delegate');
 | 
        
           |  |  | 304 |         return array($delegate_url, $server_url);
 | 
        
           |  |  | 305 |     }
 | 
        
           |  |  | 306 | }
 | 
        
           |  |  | 307 |   | 
        
           |  |  | 308 | ?>
 |