1
0
Fork 0
mirror of https://github.com/wallabag/wallabag.git synced 2025-07-27 17:28:39 +00:00

Fix srcset attribute on images downloaded

This commit is contained in:
Simounet 2018-05-31 23:42:06 +02:00
parent 9707ac4661
commit c15bb5ad72
2 changed files with 54 additions and 3 deletions

View file

@ -42,14 +42,17 @@ class DownloadImages
public function processHtml($entryId, $html, $url)
{
$crawler = new Crawler($html);
$result = $crawler
->filterXpath('//img')
$imagesCrawler = $crawler
->filterXpath('//img');
$imagesUrls = $imagesCrawler
->extract(['src']);
$imagesSrcsetUrls = $this->getSrcsetUrls($imagesCrawler);
$imagesUrls = array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
$relativePath = $this->getRelativePath($entryId);
// download and save the image to the folder
foreach ($result as $image) {
foreach ($imagesUrls as $image) {
$imagePath = $this->processSingleImage($entryId, $image, $url, $relativePath);
if (false === $imagePath) {
@ -171,6 +174,33 @@ class DownloadImages
@rmdir($folderPath);
}
/**
* Get images urls from the srcset image attribute.
*
* @param Crawler $imagesCrawler
*
* @return array An array of urls
*/
protected function getSrcsetUrls(Crawler $imagesCrawler)
{
$urls = [];
$iterator = $imagesCrawler
->getIterator();
while ($iterator->valid()) {
$srcsetAttribute = $iterator->current()->getAttribute('srcset');
if ('' !== $srcsetAttribute) {
$srcset = array_map('trim', explode(',', $srcsetAttribute));
$srcsetUrls = array_map(function ($src) {
return explode(' ', $src)[0];
}, $srcset);
$urls = array_merge($srcsetUrls, $urls);
}
$iterator->next();
}
return $urls;
}
/**
* Setup base folder where all images are going to be saved.
*/