我一直在尝试输入不整洁的HTML并通过删除不需要的标签和属性来清理它。我只输出空白HTML作为输出,没有任何行。在下面发布我的代码..请帮帮我。
的index.php
<?php
//for converting tga to png or jpg
ini_set("memory_limit", "150M");
?>
<? ob_start(); ?>
<?php
/*
@name: Raja Gopal
@author: Raja Gopal
@discribe: Set Infotech Tool
@version: 1.2 Beta
*/
//file downloading after saving in "recodeit" folder
function download($file) {
if(file_exists($file)) {
if(ob_get_level()) {
ob_end_clean();
}
header('Content-Description: File Transfer');
header('Content-Type: application/octet-stream');
header('Content-Disposition: attachment; filename=' . basename($file));
header('Content-Transfer-Encoding: binary');
header('Expires: 0');
header('Cache-Control: must-revalidate');
header('Pragma: public');
header('Content-Length: ' . filesize($file));
readfile($file);
}
}
/*
MAIN
*/
if(count($_POST) && isset($_POST['submit'])) {
if(!empty($_POST['type'])) {
//upload file to the server
if(!empty($_FILES['files']['tmp_name'])) {
$dir = 'Original/';
$upload_dir = $dir . basename($_FILES['files']['name']);
$upload_dir = str_replace(" ", "", $upload_dir);
if(copy($_FILES['files']['tmp_name'], $upload_dir)) {
$flag = true;
} else {
$flag = false;
}
}
//get file format
$str[] = $_FILES['files']['name'];
$str = htmlspecialchars(implode('', $str));
$str_arr = explode('.', $str);
$result[] = $str_arr[count($str_arr)-1];
$result = implode('', $result);
$result = strtolower($result);
$result = str_replace(" ", "", $result);
//$result = trim($result);
//get file name with dots
$st[] = $_FILES['files']['name'];
$st = htmlspecialchars(implode('', $st));
$str_arr = explode('.', $st);
$i = 0;
$flname = '';
while($i < count($str_arr)-1) {
if($i == count($str_arr)-2) {
$flname .= $str_arr[$i];
} else {
$flname .= $str_arr[$i].'.';
}
$i++;
}
$flname = str_replace(" ", "", $flname);
//array with extensions
$arr = array();
$arr['html'] = array('html');
//format check
if($flag && array_key_exists($result, $arr)) {
$from = trim($result);
$to = $_POST['type'];
$format_res = $flname . '.' . $from;
$class = $from . $to;
//add classes from "classes" folder
if(require_once('classes/' . $class . '.php')) {
$fileclass = new $class($format_res, $flname);
//use our function to save the file
download('Recode/' . $flname . '.' . $to);
//delete Original folder
if(file_exists('Original/' . $format_res))
unlink('Original/' . $format_res);
//clear Recode folder
if(file_exists('Recode/'.$flname . '.' . $to))
unlink('Recode/'.$flname . '.' . $to);
exit();
}
}
}
}
?>
<? ob_start(); ?>
<!DOCTYPE html>
<html>
<head>
<title>HTML Cleaning Tool : Set Infotech Pvt Ltd</title>
<link href="style/style.css" rel="stylesheet">
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<!--works faster becouse file is cached-->
<script type="text/javascript" src="http://ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js"></script>
<script type="text/javascript" src="js/jquery-1.8.1.js"></script>
<script type="text/javascript">
//extensions select
$('.activated').live('click', function() {
$('.type_selected').removeClass('type_selected');
$('.input_type[value="' + $(this).val() + '"]').prop('checked',true);
$(this).addClass('type_selected');
});
</script>
<script type="text/javascript">
$(document).ready(function() {
var filename;
var extensions = [];
var extension;
//array with our extensions
extensions['html'] = ['html'];
$('input[type="file"]').change(function(e) {
// Deselect the error message \ successful recoding
$('.type_selected').removeClass('type_selected');
$("#message").removeClass("visible").addClass("hidden");
//get file name and extention
var filepath = e.target.value.split('\\');
filename = filepath[filepath.length-1].split('.');
extension = filename[filename.length-1];
$('.file_type').not('.deactivated').removeClass('activated').addClass('deactivated');
//show possible extensions
if(extensions[extension.toLowerCase()] !== undefined) {
$.each(extensions[extension.toLowerCase()], function(k,v) {
$('.' + v).removeClass('deactivated').addClass('activated');
});
}
//show tick
$("#validation").css({
"background": "url('img/true.png') no-repeat"
});
//show cross
if(filename.length == 1) {
$("#validation").css({
"background-image": "url('img/false.png')"
});
}
});
//submit event
$('.submit').click(function() {
var text;
//message about the wrong extension
if(filename === undefined) {
$("#message").removeClass("hidden").addClass("visible");
$("#message").css({
"border": "2px solid #9c3232",
"background-color": "#d59e9e"
});
text = "<center>Your book is not loaded! Please select a book to start conversion!</center>";
$("#message").html(text);
$('.file_type').not('.deactivated').removeClass('activated').addClass('deactivated');
return false;
}
//message that there is no extension
if($('input[type="radio"]:checked').length==0) {
$("#message").removeClass("hidden").addClass("visible");
$("#message").css({
"border": "2px solid #9c3232",
"background-color": "#d59e9e"
});
text = "<center>You have to select the extension!</center>";
$("#message").html(text);
return false;
}
//message about successful conversion
if(filename !== undefined && $('input[type="radio"]:checked').length>0) {
$("#message").css({
"border": "2px solid #2e8856",
"background-color": "#5abd68"
})
$("#message").removeClass("hidden").addClass("visible");
text = "<center>Success ! Pleat Wait</center>";
$("#message").html(text);
$('.type_selected').removeClass('type_selected');
$.each(extensions[extension.toLowerCase()], function(k,v) {
$('.' + v).removeClass('activated').addClass('deactivated');
});
$("#validation").css({
"background-image": "url('img/false.png')"
});
setTimeout(function(){
$('input[type="file"]').val('');
}, 3000);
return true;
}
});
});
</script>
</head>
<body>
<div class="hole_wrap">
<!--WRAP CONTENT-->
<div id="content_wrap">
<!--BEGIN CONTENT-->
<div id="content">
<!--HEADER-->
<div id="header">
<div id="block" class="f_l">
<div id="head_text"><h1>HTML Cleaning Tool</h1></div>
<div id="text"><p>Set Infotech Pvt Ltd</p></div>
</div>
<div id="logo" class="f_r"></div>
<div class="clearfix"></div>
</div>
<!--END HEADER-->
<!--FORM BEGIN-->
<form class="form" name="f" method="POST" enctype="multipart/form-data" target="_blank">
<div id="loading_block" class="f_l">
<div id="head_text"><h1><center>1. Load Untidy HTML to Clean:</center></h1></div>
<!-- file load -->
<div class="upload_b f_l">
<input id="loading_f" class="files_load" type="file" name="files" size="10">
</div>
<div id="validation" class="f_r"></div>
<div class="clearfix"></div>
<div class="gr_line"></div>
<div id="head_text"><h1><center>2. Select HTML Below:</center></h1></div>
<!-- radios -->
<div class="formats f_l">
<center><input class="input_type" type="radio" name="type" value="html">
<!-- buttons -->
<button class="file_type html deactivated" type="button" value="html"></button></center>
</div>
<div class="clearfix"></div>
<!-- exec -->
<div class="gr_line"></div>
<div id="head_text"><h1><center>3. Clean HTML !</center></h1></div>
<input class="submit f_l" id="loader" type="submit" name="submit" value="Convert">
<div id="message" class="hidden f_l"><center></center></div>
</div>
<div class="clearfix"></div>
</form>
<!--END OF FORM-->
</div>
<!--END OF CONTENT-->
</div>
<!--END OF CONTENT WRAP-->
</div>
</body>
</html>
htmlhtml.php
<?php
class htmlhtml
{
/** @var string */
private $tag;
/** @var string */
private $attribute;
private $dom;
public function __construct($format_res, $flname)
{
// Turn up error reporting
error_reporting(E_ALL | E_STRICT);
// Upload template
$this->data = file_get_contents('Original/' . $format_res);
$this->dom = new DOMDocument();
$this->dom->strictErrorChecking = false;
$this->dom->formatOutput = true;
$this->dom->loadHTML(base64_decode($this->data));
$exceptions = array(
'a' => array('href'),
'img' => array('src')
);
$this->stripAttributes($exceptions);
$this->stripSpanTags();
$decoded = base64_decode($this->data);
$decoded = $this->stripNonBreakingSpaces($decoded);
file_put_contents('Recode/' . $flname . '.html', $decoded);
}
public function stripAttributes(array $exceptions)
{
$xpath = new DOMXPath($this->dom);
if (false === ($elements = $xpath->query("//*"))) die('Xpath error!');
/** @var $element DOMElement */
foreach ($elements as $element) {
for ($i = $element->attributes->length; --$i >= 0;) {
$this->tag = $element->nodeName;
$this->attribute = $element->attributes->item($i)->nodeName;
if ($this->checkAttrExceptions($exceptions)) continue;
$element->removeAttribute($this->attribute);
}
}
$this->data = base64_encode($this->dom->saveHTML());
}
public function checkAttrExceptions(array $exceptions)
{
foreach ($exceptions as $tag => $attributes) {
if (empty($attributes) || !is_array($attributes)) {
die('Attributes not set!');
}
foreach ($attributes as $attribute) {
if ($tag === $this->tag && $attribute === $this->attribute) {
return true;
}
}
}
return false;
}
/**
* Strip SPAN tags from current DOM document
*
* @return void
*/
/**
* Strip SPAN tags from current DOM document
*
* @return void
*/
protected function stripSpanTags ()
{
$nodes = $this->dom->getElementsByTagName('span');
while ($span = $nodes->item(0)) {
$replacement = $this->dom->createDocumentFragment();
while ($inner = $span->childNodes->item(0)) {
$replacement->appendChild($inner);
}
$span->parentNode->replaceChild($replacement, $span);
}
$this->data = base64_encode($this->dom->saveHTML());
}
/**
* Replace all entities within a string with a regular space
*
* @param string $string Input string
*
* @return string
*/
protected function stripNonBreakingSpaces ($string)
{
return str_replace(' ', ' ', $string);
}
}