1
0
Fork 0
mirror of https://github.com/wallabag/wallabag.git synced 2025-09-15 18:57:05 +00:00

Use DomCrawler in HtmlImport

This commit is contained in:
Yassine Guedidi 2024-11-19 23:30:28 +01:00
parent b9796cce41
commit a9394f6d4f

View file

@ -2,6 +2,7 @@
namespace Wallabag\Import; namespace Wallabag\Import;
use Symfony\Component\DomCrawler\Crawler;
use Wallabag\Entity\Entry; use Wallabag\Entity\Entry;
use Wallabag\Event\EntrySavedEvent; use Wallabag\Event\EntrySavedEvent;
@ -29,27 +30,23 @@ abstract class HtmlImport extends AbstractImport
return false; return false;
} }
$html = new \DOMDocument(); $crawler = new Crawler(file_get_contents($this->filepath));
libxml_use_internal_errors(true); $hrefs = $crawler->filterXPath('//a');
$html->loadHTMLFile($this->filepath);
$hrefs = $html->getElementsByTagName('a');
libxml_use_internal_errors(false);
if (0 === $hrefs->length) { if (0 === $hrefs->count()) {
$this->logger->error('Wallabag HTML: no entries in imported file'); $this->logger->error('Wallabag HTML: no entries in imported file');
return false; return false;
} }
$entries = []; $entries = $hrefs->each(function (Crawler $node) {
foreach ($hrefs as $href) { return [
$entry = []; 'url' => $node->attr('href'),
$entry['url'] = $href->getAttribute('href'); 'tags' => $node->attr('tags'),
$entry['tags'] = $href->getAttribute('tags'); 'created_at' => $node->attr('add_date'),
$entry['created_at'] = $href->getAttribute('add_date'); ];
$entries[] = $entry; });
}
if ($this->producer) { if ($this->producer) {
$this->parseEntriesForProducer($entries); $this->parseEntriesForProducer($entries);