mirror of
https://github.com/wallabag/wallabag.git
synced 2025-09-15 18:57:05 +00:00
Use DomCrawler in HtmlImport
This commit is contained in:
parent
b9796cce41
commit
a9394f6d4f
1 changed files with 11 additions and 14 deletions
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
namespace Wallabag\Import;
|
namespace Wallabag\Import;
|
||||||
|
|
||||||
|
use Symfony\Component\DomCrawler\Crawler;
|
||||||
use Wallabag\Entity\Entry;
|
use Wallabag\Entity\Entry;
|
||||||
use Wallabag\Event\EntrySavedEvent;
|
use Wallabag\Event\EntrySavedEvent;
|
||||||
|
|
||||||
|
@ -29,27 +30,23 @@ abstract class HtmlImport extends AbstractImport
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
$html = new \DOMDocument();
|
$crawler = new Crawler(file_get_contents($this->filepath));
|
||||||
|
|
||||||
libxml_use_internal_errors(true);
|
$hrefs = $crawler->filterXPath('//a');
|
||||||
$html->loadHTMLFile($this->filepath);
|
|
||||||
$hrefs = $html->getElementsByTagName('a');
|
|
||||||
libxml_use_internal_errors(false);
|
|
||||||
|
|
||||||
if (0 === $hrefs->length) {
|
if (0 === $hrefs->count()) {
|
||||||
$this->logger->error('Wallabag HTML: no entries in imported file');
|
$this->logger->error('Wallabag HTML: no entries in imported file');
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
$entries = [];
|
$entries = $hrefs->each(function (Crawler $node) {
|
||||||
foreach ($hrefs as $href) {
|
return [
|
||||||
$entry = [];
|
'url' => $node->attr('href'),
|
||||||
$entry['url'] = $href->getAttribute('href');
|
'tags' => $node->attr('tags'),
|
||||||
$entry['tags'] = $href->getAttribute('tags');
|
'created_at' => $node->attr('add_date'),
|
||||||
$entry['created_at'] = $href->getAttribute('add_date');
|
];
|
||||||
$entries[] = $entry;
|
});
|
||||||
}
|
|
||||||
|
|
||||||
if ($this->producer) {
|
if ($this->producer) {
|
||||||
$this->parseEntriesForProducer($entries);
|
$this->parseEntriesForProducer($entries);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue