source: branches/ray/plugins/SuperClean/tidy.php @ 762

Last change on this file since 762 was 762, checked in by ray, 13 years ago

updated branch

  • Property svn:eol-style set to native
  • Property svn:keywords set to LastChangedDate LastChangedRevision LastChangedBy HeadURL Id
File size: 4.6 KB
Line 
1<?php
2  /** This PHP file is intended for use with XMLHTTPRequest from Xinha
3   * it requrns javascript to set the Xinha html with tidied html that is
4   * submitted in a $_POST parameter called 'content'
5   */
6
7  if(get_magic_quotes_gpc())
8  {
9    // trigger_error('Magic Quotes GPC is on, cleaning GPC.', E_USER_NOTICE);
10    $to_clean = array(&$_GET, &$_POST, &$_REQUEST, &$_COOKIE);
11    while(count($to_clean))
12    {
13      $cleaning =& $to_clean[array_pop(array_keys($to_clean))];
14      unset($to_clean[array_pop(array_keys($to_clean))]);
15      foreach(array_keys($cleaning) as $k)
16      {
17        if(is_array($cleaning[$k]))
18        {
19          $to_clean[] =& $cleaning[$k];
20        }
21        else
22        {
23          $cleaning[$k] = stripslashes($cleaning[$k]);
24        }
25      }
26    }
27  }
28
29  header('Content-Type: text/javascript; charset=utf-8');
30
31  /** Function to POST some data to a URL */
32  function PostIt($DataStream, $URL)
33  {
34
35//  Strip http:// from the URL if present
36    $URL = ereg_replace("^http://", "", $URL);
37
38//  Separate into Host and URI
39    $Host = substr($URL, 0, strpos($URL, "/"));
40    $URI = strstr($URL, "/");
41
42//  Form up the request body
43    $ReqBody = "";
44    while (list($key, $val) = each($DataStream)) {
45      if ($ReqBody) $ReqBody.= "&";
46      $ReqBody.= $key."=".urlencode($val);
47    }
48    $ContentLength = strlen($ReqBody);
49
50//  Generate the request header
51    $ReqHeader =
52      "POST $URI HTTP/1.0\n".
53      "Host: $Host\n".
54      "User-Agent: PostIt\n".
55      "Content-Type: application/x-www-form-urlencoded\n".
56      "Content-Length: $ContentLength\n\n".
57      "$ReqBody\n";
58
59//     echo $ReqHeader;
60
61
62//  Open the connection to the host
63    $socket = fsockopen($Host, 80, &$errno, &$errstr);
64    if (!$socket) {
65      $result = "($errno) $errstr";
66      return $Result;
67    }
68
69    fputs($socket, $ReqHeader);
70
71    $result = '';
72    while(!feof($socket))
73    {
74      $result .= fgets($socket);
75    }
76    return $result;
77  }
78
79
80  function js_encode($string)
81  {
82    static $strings = "\\,\",',%,&,<,>,{,},@,\n,\r";
83
84    if(!is_array($strings))
85    {
86      $tr = array();
87      foreach(explode(',', $strings) as $chr)
88      {
89        $tr[$chr] = sprintf('\x%02X', ord($chr));
90      }
91      $strings = $tr;
92    }
93
94    return strtr($string, $strings);
95  }
96
97  // Any errors would screq up our javascript
98  error_reporting(E_NONE);
99  ini_set('display_errors', false);
100
101  if(trim(@$_REQUEST['content']))
102  {
103    // PHP's urldecode doesn't understand %uHHHH for unicode
104    $_REQUEST['content'] = preg_replace('/%u([a-f0-9]{4,4})/ei', 'utf8_chr(0x$1)', $_REQUEST['content']);
105    function utf8_chr($num)
106    {
107      if($num<128)return chr($num);
108      if($num<1024)return chr(($num>>6)+192).chr(($num&63)+128);
109      if($num<32768)return chr(($num>>12)+224).chr((($num>>6)&63)+128).chr(($num&63)+128);
110      if($num<2097152)return chr(($num>>18)+240).chr((($num>>12)&63)+128).chr((($num>>6)&63)+128) .chr(($num&63)+128);
111      return '';
112    }
113    ob_start();
114      passthru("echo " .  escapeshellarg($_REQUEST['content']) . " | tidy -q -i -u -wrap 9999 -utf8 -bare -asxhtml 2>/dev/null", $result);
115      $content = ob_get_contents();
116    ob_end_clean();
117
118    if(strlen($content) < 5)
119    {
120      // Tidy on the local machine failed, try a post
121      $res_1
122        = PostIt(
123          array
124            (
125              '_function' => 'tidy',
126              '_html'   => $_REQUEST['content'],
127              'char-encoding' => 'utf8',
128              '_output'       => 'warn',
129              'indent'        => 'auto',
130              'wrap'          => 9999,
131              'break-before-br' => 'y',
132              'bare'          => 'n',
133              'word-2000'     => 'n',
134              'drop-empty-paras' => 'y',
135              'drop-font-tags' => 'n',
136
137            ),
138          'http://infohound.net/tidy/tidy.pl');
139
140      if(preg_match('/<a href="([^"]+)" title="Save the tidied HTML/', $res_1, $m))
141      {
142        $tgt = strtr($m[1], array_flip(get_html_translation_table(HTML_ENTITIES)));
143        $content = implode('', file('http://infohound.net/tidy/' . $tgt));
144      }
145    }
146
147    if(strlen($content) && ! preg_match('/<\/body>/i', $_REQUEST['content']))
148    {
149      if( preg_match('/<body[^>]*>(.*)<\/body>/is', $content, $matches) )
150      {
151        $content = $matches[1];
152      }
153    }
154    elseif(!strlen($content))
155    {
156      $content = $_REQUEST['content'];
157    }
158
159    if($content)
160    {
161      ?>
162      {action:'setHTML',value:'<?php echo js_encode($content) ?>'};
163      <?php
164    }
165    else
166    {
167      ?>
168      {action:'alert',value:'Tidy failed.  Check your HTML for syntax errors.'};
169      <?php
170    }
171  }
172  else
173  {
174    ?>
175    {action:'alert',value:"You don't have anything to tidy!"}
176    <?php
177  }
178
179?>
Note: See TracBrowser for help on using the repository browser.