我正在使用Simple HTML DOM Parser来解析网站。我必须将这些数据保存到我的数据库中。解析过程工作得非常好,但是我几次得到一些不需要的符号,例如:€,â,等等。我不想在值中使用这些东西。到目前为止我用过:
这里有一段代码:
<?php
set_time_limit(10800);
ini_set('memory_limit', '512M');
$select = ("SELECT * FROM `event_urls` ORDER BY `id`");
$query = mysql_query($select);
while($r = mysql_fetch_array($query))
{
$url = $r['event_url'];
if(preg_match('/.+xyz\.abc\/.+/', $url, $timeURL))
{
$site = $url;
$html = file_get_html($site);
foreach($html->find('body') as $body)
{
foreach($body->find('div.box-header') as $headBox)
{
foreach($headBox->find('h3') as $heading)
{
$heading = $heading->innertext;
echo $title1 = mysql_real_escape_string($heading);
$title2 = str_replace('€', ' ', $title1);
$title3 = str_replace('â', ' ', $title2);
$title4 = str_replace('œ', ' ', $title3);
$title = $title4;
}
}
foreach($body->find('div.page-subtitle span') as $subtitle)
{
$subtitle = $subtitle->innertext;
$subtitle = mysql_real_escape_string($subtitle);
}
foreach($body->find('div.description') as $description)
{
$details = $description->innertext;
$details = mysql_real_escape_string($details);
echo $description = '<b>' . $subtitle . '</b><br><br>' . $details;
echo '<br>';
}
......
$insert = ("INSERT INTO `articles`(`contact`, `date_added`, `organizer_name`, `profile_image`, `start_date`, `start_month`, `end_date`, `end_month`, `year`, `city`, `time`, `description`, `event_name`, `full_address`) VALUES ('$contact', now(), '$organizer', '$pimage', '$startDate', '$startMonth', '$endDate', '$endMonth', '$year', '$city', '$time', '$description', '$title', '$address')");
mysql_query($insert);
...}
?>