[ Index ] |
PHP Cross Reference of Unnamed Project |
[Summary view] [Print] [Text view]
1 <?php 2 3 /** 4 * HTML Purifier's internal representation of a URI. 5 * @note 6 * Internal data-structures are completely escaped. If the data needs 7 * to be used in a non-URI context (which is very unlikely), be sure 8 * to decode it first. The URI may not necessarily be well-formed until 9 * validate() is called. 10 */ 11 class HTMLPurifier_URI 12 { 13 /** 14 * @type string 15 */ 16 public $scheme; 17 18 /** 19 * @type string 20 */ 21 public $userinfo; 22 23 /** 24 * @type string 25 */ 26 public $host; 27 28 /** 29 * @type int 30 */ 31 public $port; 32 33 /** 34 * @type string 35 */ 36 public $path; 37 38 /** 39 * @type string 40 */ 41 public $query; 42 43 /** 44 * @type string 45 */ 46 public $fragment; 47 48 /** 49 * @param string $scheme 50 * @param string $userinfo 51 * @param string $host 52 * @param int $port 53 * @param string $path 54 * @param string $query 55 * @param string $fragment 56 * @note Automatically normalizes scheme and port 57 */ 58 public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment) 59 { 60 $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme); 61 $this->userinfo = $userinfo; 62 $this->host = $host; 63 $this->port = is_null($port) ? $port : (int)$port; 64 $this->path = $path; 65 $this->query = $query; 66 $this->fragment = $fragment; 67 } 68 69 /** 70 * Retrieves a scheme object corresponding to the URI's scheme/default 71 * @param HTMLPurifier_Config $config 72 * @param HTMLPurifier_Context $context 73 * @return HTMLPurifier_URIScheme Scheme object appropriate for validating this URI 74 */ 75 public function getSchemeObj($config, $context) 76 { 77 $registry = HTMLPurifier_URISchemeRegistry::instance(); 78 if ($this->scheme !== null) { 79 $scheme_obj = $registry->getScheme($this->scheme, $config, $context); 80 if (!$scheme_obj) { 81 return false; 82 } // invalid scheme, clean it out 83 } else { 84 // no scheme: retrieve the default one 85 $def = $config->getDefinition('URI'); 86 $scheme_obj = $def->getDefaultScheme($config, $context); 87 if (!$scheme_obj) { 88 // something funky happened to the default scheme object 89 trigger_error( 90 'Default scheme object "' . $def->defaultScheme . '" was not readable', 91 E_USER_WARNING 92 ); 93 return false; 94 } 95 } 96 return $scheme_obj; 97 } 98 99 /** 100 * Generic validation method applicable for all schemes. May modify 101 * this URI in order to get it into a compliant form. 102 * @param HTMLPurifier_Config $config 103 * @param HTMLPurifier_Context $context 104 * @return bool True if validation/filtering succeeds, false if failure 105 */ 106 public function validate($config, $context) 107 { 108 // ABNF definitions from RFC 3986 109 $chars_sub_delims = '!$&\'()*+,;='; 110 $chars_gen_delims = ':/?#[]@'; 111 $chars_pchar = $chars_sub_delims . ':@'; 112 113 // validate host 114 if (!is_null($this->host)) { 115 $host_def = new HTMLPurifier_AttrDef_URI_Host(); 116 $this->host = $host_def->validate($this->host, $config, $context); 117 if ($this->host === false) { 118 $this->host = null; 119 } 120 } 121 122 // validate scheme 123 // NOTE: It's not appropriate to check whether or not this 124 // scheme is in our registry, since a URIFilter may convert a 125 // URI that we don't allow into one we do. So instead, we just 126 // check if the scheme can be dropped because there is no host 127 // and it is our default scheme. 128 if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') { 129 // support for relative paths is pretty abysmal when the 130 // scheme is present, so axe it when possible 131 $def = $config->getDefinition('URI'); 132 if ($def->defaultScheme === $this->scheme) { 133 $this->scheme = null; 134 } 135 } 136 137 // validate username 138 if (!is_null($this->userinfo)) { 139 $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':'); 140 $this->userinfo = $encoder->encode($this->userinfo); 141 } 142 143 // validate port 144 if (!is_null($this->port)) { 145 if ($this->port < 1 || $this->port > 65535) { 146 $this->port = null; 147 } 148 } 149 150 // validate path 151 $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/'); 152 if (!is_null($this->host)) { // this catches $this->host === '' 153 // path-abempty (hier and relative) 154 // http://www.example.com/my/path 155 // //www.example.com/my/path (looks odd, but works, and 156 // recognized by most browsers) 157 // (this set is valid or invalid on a scheme by scheme 158 // basis, so we'll deal with it later) 159 // file:///my/path 160 // ///my/path 161 $this->path = $segments_encoder->encode($this->path); 162 } elseif ($this->path !== '') { 163 if ($this->path[0] === '/') { 164 // path-absolute (hier and relative) 165 // http:/my/path 166 // /my/path 167 if (strlen($this->path) >= 2 && $this->path[1] === '/') { 168 // This could happen if both the host gets stripped 169 // out 170 // http://my/path 171 // //my/path 172 $this->path = ''; 173 } else { 174 $this->path = $segments_encoder->encode($this->path); 175 } 176 } elseif (!is_null($this->scheme)) { 177 // path-rootless (hier) 178 // http:my/path 179 // Short circuit evaluation means we don't need to check nz 180 $this->path = $segments_encoder->encode($this->path); 181 } else { 182 // path-noscheme (relative) 183 // my/path 184 // (once again, not checking nz) 185 $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@'); 186 $c = strpos($this->path, '/'); 187 if ($c !== false) { 188 $this->path = 189 $segment_nc_encoder->encode(substr($this->path, 0, $c)) . 190 $segments_encoder->encode(substr($this->path, $c)); 191 } else { 192 $this->path = $segment_nc_encoder->encode($this->path); 193 } 194 } 195 } else { 196 // path-empty (hier and relative) 197 $this->path = ''; // just to be safe 198 } 199 200 // qf = query and fragment 201 $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?'); 202 203 if (!is_null($this->query)) { 204 $this->query = $qf_encoder->encode($this->query); 205 } 206 207 if (!is_null($this->fragment)) { 208 $this->fragment = $qf_encoder->encode($this->fragment); 209 } 210 return true; 211 } 212 213 /** 214 * Convert URI back to string 215 * @return string URI appropriate for output 216 */ 217 public function toString() 218 { 219 // reconstruct authority 220 $authority = null; 221 // there is a rendering difference between a null authority 222 // (http:foo-bar) and an empty string authority 223 // (http:///foo-bar). 224 if (!is_null($this->host)) { 225 $authority = ''; 226 if (!is_null($this->userinfo)) { 227 $authority .= $this->userinfo . '@'; 228 } 229 $authority .= $this->host; 230 if (!is_null($this->port)) { 231 $authority .= ':' . $this->port; 232 } 233 } 234 235 // Reconstruct the result 236 // One might wonder about parsing quirks from browsers after 237 // this reconstruction. Unfortunately, parsing behavior depends 238 // on what *scheme* was employed (file:///foo is handled *very* 239 // differently than http:///foo), so unfortunately we have to 240 // defer to the schemes to do the right thing. 241 $result = ''; 242 if (!is_null($this->scheme)) { 243 $result .= $this->scheme . ':'; 244 } 245 if (!is_null($authority)) { 246 $result .= '//' . $authority; 247 } 248 $result .= $this->path; 249 if (!is_null($this->query)) { 250 $result .= '?' . $this->query; 251 } 252 if (!is_null($this->fragment)) { 253 $result .= '#' . $this->fragment; 254 } 255 256 return $result; 257 } 258 259 /** 260 * Returns true if this URL might be considered a 'local' URL given 261 * the current context. This is true when the host is null, or 262 * when it matches the host supplied to the configuration. 263 * 264 * Note that this does not do any scheme checking, so it is mostly 265 * only appropriate for metadata that doesn't care about protocol 266 * security. isBenign is probably what you actually want. 267 * @param HTMLPurifier_Config $config 268 * @param HTMLPurifier_Context $context 269 * @return bool 270 */ 271 public function isLocal($config, $context) 272 { 273 if ($this->host === null) { 274 return true; 275 } 276 $uri_def = $config->getDefinition('URI'); 277 if ($uri_def->host === $this->host) { 278 return true; 279 } 280 return false; 281 } 282 283 /** 284 * Returns true if this URL should be considered a 'benign' URL, 285 * that is: 286 * 287 * - It is a local URL (isLocal), and 288 * - It has a equal or better level of security 289 * @param HTMLPurifier_Config $config 290 * @param HTMLPurifier_Context $context 291 * @return bool 292 */ 293 public function isBenign($config, $context) 294 { 295 if (!$this->isLocal($config, $context)) { 296 return false; 297 } 298 299 $scheme_obj = $this->getSchemeObj($config, $context); 300 if (!$scheme_obj) { 301 return false; 302 } // conservative approach 303 304 $current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context); 305 if ($current_scheme_obj->secure) { 306 if (!$scheme_obj->secure) { 307 return false; 308 } 309 } 310 return true; 311 } 312 } 313 314 // vim: et sw=4 sts=4
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Tue Mar 17 22:47:18 2015 | Cross-referenced by PHPXref 0.7.1 |