本文实例讲述了php解析字符串里所有URL地址的方法。分享给大家供大家参考。具体如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
|
<?php // $html = the html on the page // $current_url = the full url that the html came from //(only needed for $repath) // $repath = converts ../ and / and // urls to full valid urls function pageLinks( $html , $current_url = "" , $repath = false){ preg_match_all( "/\<a.+?href=(\"|')(?!javascript:|#)(.+?)(\"|')/i" , $html , $matches ); $links = array (); if (isset( $matches [2])){ $links = $matches [2]; } if ( $repath && count ( $links ) > 0 && strlen ( $current_url ) > 0){ $pathi = pathinfo ( $current_url ); $dir = $pathi [ "dirname" ]; $base = parse_url ( $current_url ); $split_path = explode ( "/" , $dir ); $url = "" ; foreach ( $links as $k => $link ){ if (preg_match( "/^\.\./" , $link )){ $total = substr_count( $link , "../" ); for ( $i = 0; $i < $total ; $i ++){ array_pop ( $split_path ); } $url = implode( "/" , $split_path ) . "/" . str_replace ( "../" , "" , $link ); } elseif (preg_match( "/^\/\//" , $link )){ $url = $base [ "scheme" ] . ":" . $link ; } elseif (preg_match( "/^\/|^.\//" , $link )){ $url = $base [ "scheme" ] . "://" . $base [ "host" ] . $link ; } elseif (preg_match( "/^[a-zA-Z0-9]/" , $link )){ if (preg_match( "/^http/" , $link )){ $url = $link ; } else { $url = $dir . "/" . $link ; } } $links [ $k ] = $url ; } } return $links ; } header( "content-type: text/plain" ); $html = file_get_contents ( $url ); // Gets links from the page: print_r(pageLinks( $html )); // Gets links from the page and formats them to a full valid url: print_r(pageLinks( $html , $url , true)); |
希望本文所述对大家的php程序设计有所帮助。