1
0
Fork 0
mirror of https://github.com/wallabag/wallabag.git synced 2025-07-27 17:28:39 +00:00

Merge pull request #3965 from nicofrand/previewPic

Preview picture: use the 1st pic retrieved if no og:image set
This commit is contained in:
Kevin Decherf 2019-05-26 17:47:44 +02:00 committed by GitHub
commit 5c0701ba41
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 123 additions and 14 deletions

View file

@ -12,8 +12,8 @@ use Wallabag\CoreBundle\Entity\Entry;
use Wallabag\CoreBundle\Tools\Utils;
/**
* This kind of proxy class take care of getting the content from an url
* and update the entry with what it found.
* This kind of proxy class takes care of getting the content from an url
* and updates the entry with what it found.
*/
class ContentProxy
{
@ -289,13 +289,25 @@ class ContentProxy
$this->updateLanguage($entry, $content['language']);
}
$previewPictureUrl = '';
if (!empty($content['open_graph']['og_image'])) {
$this->updatePreviewPicture($entry, $content['open_graph']['og_image']);
$previewPictureUrl = $content['open_graph']['og_image'];
}
// if content is an image, define it as a preview too
if (!empty($content['content_type']) && \in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) {
$this->updatePreviewPicture($entry, $content['url']);
$previewPictureUrl = $content['url'];
} elseif (empty($previewPictureUrl)) {
$this->logger->debug('Extracting images from content to provide a default preview picture');
$imagesUrls = DownloadImages::extractImagesUrlsFromHtml($content['html']);
$this->logger->debug(\count($imagesUrls) . ' pictures found');
if (!empty($imagesUrls)) {
$previewPictureUrl = $imagesUrls[0];
}
}
if (!empty($previewPictureUrl)) {
$this->updatePreviewPicture($entry, $previewPictureUrl);
}
if (!empty($content['content_type'])) {

View file

@ -30,6 +30,25 @@ class DownloadImages
$this->setFolder();
}
/**
* Process the html and extract images URLs from it.
*
* @param string $html
*
* @return string[]
*/
public static function extractImagesUrlsFromHtml($html)
{
$crawler = new Crawler($html);
$imagesCrawler = $crawler
->filterXpath('//img');
$imagesUrls = $imagesCrawler
->extract(['src']);
$imagesSrcsetUrls = self::getSrcsetUrls($imagesCrawler);
return array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
}
/**
* Process the html and extract image from it, save them to local and return the updated html.
*
@ -41,13 +60,7 @@ class DownloadImages
*/
public function processHtml($entryId, $html, $url)
{
$crawler = new Crawler($html);
$imagesCrawler = $crawler
->filterXpath('//img');
$imagesUrls = $imagesCrawler
->extract(['src']);
$imagesSrcsetUrls = $this->getSrcsetUrls($imagesCrawler);
$imagesUrls = array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
$imagesUrls = self::extractImagesUrlsFromHtml($html);
$relativePath = $this->getRelativePath($entryId);
@ -199,7 +212,7 @@ class DownloadImages
*
* @return array An array of urls
*/
private function getSrcsetUrls(Crawler $imagesCrawler)
private static function getSrcsetUrls(Crawler $imagesCrawler)
{
$urls = [];
$iterator = $imagesCrawler