mirror of
https://github.com/wallabag/wallabag.git
synced 2025-08-26 18:21:02 +00:00
Move source files directly under src/ directory
This commit is contained in:
parent
804261bc26
commit
a37b385c23
190 changed files with 19 additions and 21 deletions
410
src/Helper/ContentProxy.php
Normal file
410
src/Helper/ContentProxy.php
Normal file
|
@ -0,0 +1,410 @@
|
|||
<?php
|
||||
|
||||
namespace Wallabag\CoreBundle\Helper;
|
||||
|
||||
use Graby\Graby;
|
||||
use Psr\Log\LoggerInterface;
|
||||
use Symfony\Component\Mime\MimeTypes;
|
||||
use Symfony\Component\Validator\Constraints\Locale as LocaleConstraint;
|
||||
use Symfony\Component\Validator\Constraints\Url as UrlConstraint;
|
||||
use Symfony\Component\Validator\Validator\ValidatorInterface;
|
||||
use Wallabag\CoreBundle\Entity\Entry;
|
||||
use Wallabag\CoreBundle\Tools\Utils;
|
||||
|
||||
/**
|
||||
* This kind of proxy class takes care of getting the content from an url
|
||||
* and updates the entry with what it found.
|
||||
*/
|
||||
class ContentProxy
|
||||
{
|
||||
protected $graby;
|
||||
protected $tagger;
|
||||
protected $ignoreOriginProcessor;
|
||||
protected $validator;
|
||||
protected $logger;
|
||||
protected $mimeTypes;
|
||||
protected $fetchingErrorMessage;
|
||||
protected $eventDispatcher;
|
||||
protected $storeArticleHeaders;
|
||||
|
||||
public function __construct(Graby $graby, RuleBasedTagger $tagger, RuleBasedIgnoreOriginProcessor $ignoreOriginProcessor, ValidatorInterface $validator, LoggerInterface $logger, $fetchingErrorMessage, $storeArticleHeaders = false)
|
||||
{
|
||||
$this->graby = $graby;
|
||||
$this->tagger = $tagger;
|
||||
$this->ignoreOriginProcessor = $ignoreOriginProcessor;
|
||||
$this->validator = $validator;
|
||||
$this->logger = $logger;
|
||||
$this->mimeTypes = new MimeTypes();
|
||||
$this->fetchingErrorMessage = $fetchingErrorMessage;
|
||||
$this->storeArticleHeaders = $storeArticleHeaders;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update entry using either fetched or provided content.
|
||||
*
|
||||
* @param Entry $entry Entry to update
|
||||
* @param string $url Url of the content
|
||||
* @param array $content Array with content provided for import with AT LEAST keys title, html, url to skip the fetchContent from the url
|
||||
* @param bool $disableContentUpdate Whether to skip trying to fetch content using Graby
|
||||
*/
|
||||
public function updateEntry(Entry $entry, $url, array $content = [], $disableContentUpdate = false)
|
||||
{
|
||||
$this->graby->toggleImgNoReferrer(true);
|
||||
if (!empty($content['html'])) {
|
||||
$content['html'] = $this->graby->cleanupHtml($content['html'], $url);
|
||||
}
|
||||
|
||||
if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) {
|
||||
$fetchedContent = $this->graby->fetchContent($url);
|
||||
|
||||
$fetchedContent['title'] = $this->sanitizeContentTitle(
|
||||
$fetchedContent['title'],
|
||||
isset($fetchedContent['headers']['content-type']) ? $fetchedContent['headers']['content-type'] : ''
|
||||
);
|
||||
|
||||
// when content is imported, we have information in $content
|
||||
// in case fetching content goes bad, we'll keep the imported information instead of overriding them
|
||||
if (empty($content) || $fetchedContent['html'] !== $this->fetchingErrorMessage) {
|
||||
$content = $fetchedContent;
|
||||
}
|
||||
}
|
||||
|
||||
// be sure to keep the url in case of error
|
||||
// so we'll be able to refetch it in the future
|
||||
$content['url'] = !empty($content['url']) ? $content['url'] : $url;
|
||||
|
||||
// In one case (at least in tests), url is empty here
|
||||
// so we set it using $url provided in the updateEntry call.
|
||||
// Not sure what are the other possible cases where this property is empty
|
||||
if (empty($entry->getUrl()) && !empty($url)) {
|
||||
$entry->setUrl($url);
|
||||
}
|
||||
|
||||
$entry->setGivenUrl($url);
|
||||
|
||||
$this->stockEntry($entry, $content);
|
||||
}
|
||||
|
||||
/**
|
||||
* Use a Symfony validator to ensure the language is well formatted.
|
||||
*
|
||||
* @param string $value Language to validate and save
|
||||
*/
|
||||
public function updateLanguage(Entry $entry, $value)
|
||||
{
|
||||
// some lang are defined as fr-FR, es-ES.
|
||||
// replacing - by _ might increase language support
|
||||
$value = str_replace('-', '_', $value);
|
||||
|
||||
$errors = $this->validator->validate(
|
||||
$value,
|
||||
new LocaleConstraint(['canonicalize' => true])
|
||||
);
|
||||
|
||||
if (0 === \count($errors)) {
|
||||
$entry->setLanguage($value);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
$this->logger->warning('Language validation failed. ' . (string) $errors);
|
||||
}
|
||||
|
||||
/**
|
||||
* Use a Symfony validator to ensure the preview picture is a real url.
|
||||
*
|
||||
* @param string $value URL to validate and save
|
||||
*/
|
||||
public function updatePreviewPicture(Entry $entry, $value)
|
||||
{
|
||||
$errors = $this->validator->validate(
|
||||
$value,
|
||||
new UrlConstraint()
|
||||
);
|
||||
|
||||
if (0 === \count($errors)) {
|
||||
$entry->setPreviewPicture($value);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
$this->logger->warning('PreviewPicture validation failed. ' . (string) $errors);
|
||||
}
|
||||
|
||||
/**
|
||||
* Update date.
|
||||
*
|
||||
* @param string $value Date to validate and save
|
||||
*/
|
||||
public function updatePublishedAt(Entry $entry, $value)
|
||||
{
|
||||
$date = $value;
|
||||
|
||||
// is it a timestamp?
|
||||
if (false !== filter_var($date, \FILTER_VALIDATE_INT)) {
|
||||
$date = '@' . $date;
|
||||
}
|
||||
|
||||
try {
|
||||
// is it already a DateTime?
|
||||
// (it's inside the try/catch in case of fail to be parse time string)
|
||||
if (!$date instanceof \DateTime) {
|
||||
$date = new \DateTime($date);
|
||||
}
|
||||
|
||||
$entry->setPublishedAt($date);
|
||||
} catch (\Exception $e) {
|
||||
$this->logger->warning('Error while defining date', ['e' => $e, 'url' => $entry->getUrl(), 'date' => $value]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper to extract and save host from entry url.
|
||||
*/
|
||||
public function setEntryDomainName(Entry $entry)
|
||||
{
|
||||
$domainName = parse_url($entry->getUrl(), \PHP_URL_HOST);
|
||||
if (false !== $domainName) {
|
||||
$entry->setDomainName($domainName);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper to set a default title using:
|
||||
* - url basename, if applicable
|
||||
* - hostname.
|
||||
*/
|
||||
public function setDefaultEntryTitle(Entry $entry)
|
||||
{
|
||||
$url = parse_url($entry->getUrl());
|
||||
$path = pathinfo($url['path'], \PATHINFO_BASENAME);
|
||||
|
||||
if (empty($path)) {
|
||||
$path = $url['host'];
|
||||
}
|
||||
|
||||
$entry->setTitle($path);
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to sanitize the title of the fetched content from wrong character encodings and invalid UTF-8 character.
|
||||
*
|
||||
* @param string $title
|
||||
* @param string $contentType
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private function sanitizeContentTitle($title, $contentType)
|
||||
{
|
||||
if ('application/pdf' === $contentType) {
|
||||
$title = $this->convertPdfEncodingToUTF8($title);
|
||||
}
|
||||
|
||||
return $this->sanitizeUTF8Text($title);
|
||||
}
|
||||
|
||||
/**
|
||||
* If the title from the fetched content comes from a PDF, then its very possible that the character encoding is not
|
||||
* UTF-8. This methods tries to identify the character encoding and translate the title to UTF-8.
|
||||
*
|
||||
* @return string (maybe contains invalid UTF-8 character)
|
||||
*/
|
||||
private function convertPdfEncodingToUTF8($title)
|
||||
{
|
||||
// first try UTF-8 because its easier to detect its present/absence
|
||||
foreach (['UTF-8', 'UTF-16BE', 'WINDOWS-1252'] as $encoding) {
|
||||
if (mb_check_encoding($title, $encoding)) {
|
||||
return mb_convert_encoding($title, 'UTF-8', $encoding);
|
||||
}
|
||||
}
|
||||
|
||||
return $title;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove invalid UTF-8 characters from the given string.
|
||||
*
|
||||
* @param string $rawText
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private function sanitizeUTF8Text($rawText)
|
||||
{
|
||||
if (mb_check_encoding($rawText, 'UTF-8')) {
|
||||
return $rawText;
|
||||
}
|
||||
|
||||
mb_substitute_character('none');
|
||||
|
||||
return mb_convert_encoding($rawText, 'UTF-8', 'UTF-8');
|
||||
}
|
||||
|
||||
/**
|
||||
* Stock entry with fetched or imported content.
|
||||
* Will fall back to OpenGraph data if available.
|
||||
*
|
||||
* @param Entry $entry Entry to stock
|
||||
* @param array $content Array with at least title, url & html
|
||||
*/
|
||||
private function stockEntry(Entry $entry, array $content)
|
||||
{
|
||||
$this->updateOriginUrl($entry, $content['url']);
|
||||
|
||||
$this->setEntryDomainName($entry);
|
||||
|
||||
if (!empty($content['title'])) {
|
||||
$entry->setTitle($content['title']);
|
||||
}
|
||||
|
||||
if (empty($content['html'])) {
|
||||
$content['html'] = $this->fetchingErrorMessage;
|
||||
$entry->setNotParsed(true);
|
||||
|
||||
if (!empty($content['description'])) {
|
||||
$content['html'] .= '<p><i>But we found a short description: </i></p>';
|
||||
$content['html'] .= $content['description'];
|
||||
}
|
||||
}
|
||||
|
||||
$entry->setContent($content['html']);
|
||||
$entry->setReadingTime(Utils::getReadingTime($content['html']));
|
||||
|
||||
if (!empty($content['status'])) {
|
||||
$entry->setHttpStatus($content['status']);
|
||||
}
|
||||
|
||||
if (!empty($content['authors']) && \is_array($content['authors'])) {
|
||||
$entry->setPublishedBy($content['authors']);
|
||||
}
|
||||
|
||||
if (!empty($content['headers'])) {
|
||||
$entry->setHeaders($content['headers']);
|
||||
}
|
||||
|
||||
if (!empty($content['date'])) {
|
||||
$this->updatePublishedAt($entry, $content['date']);
|
||||
}
|
||||
|
||||
if (!empty($content['language'])) {
|
||||
$this->updateLanguage($entry, $content['language']);
|
||||
}
|
||||
|
||||
$previewPictureUrl = '';
|
||||
if (!empty($content['image'])) {
|
||||
$previewPictureUrl = $content['image'];
|
||||
}
|
||||
|
||||
// if content is an image, define it as a preview too
|
||||
if (!empty($content['headers']['content-type']) && \in_array(current($this->mimeTypes->getExtensions($content['headers']['content-type'])), ['jpeg', 'jpg', 'gif', 'png'], true)) {
|
||||
$previewPictureUrl = $content['url'];
|
||||
} elseif (empty($previewPictureUrl)) {
|
||||
$this->logger->debug('Extracting images from content to provide a default preview picture');
|
||||
$imagesUrls = DownloadImages::extractImagesUrlsFromHtml($content['html']);
|
||||
$this->logger->debug(\count($imagesUrls) . ' pictures found');
|
||||
|
||||
if (!empty($imagesUrls)) {
|
||||
$previewPictureUrl = $imagesUrls[0];
|
||||
}
|
||||
}
|
||||
|
||||
if (!empty($content['headers']['content-type'])) {
|
||||
$entry->setMimetype($content['headers']['content-type']);
|
||||
}
|
||||
|
||||
if (!empty($previewPictureUrl)) {
|
||||
$this->updatePreviewPicture($entry, $previewPictureUrl);
|
||||
}
|
||||
|
||||
try {
|
||||
$this->tagger->tag($entry);
|
||||
} catch (\Exception $e) {
|
||||
$this->logger->error('Error while trying to automatically tag an entry.', [
|
||||
'entry_url' => $content['url'],
|
||||
'error_msg' => $e->getMessage(),
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the origin_url field when a redirection occurs
|
||||
* This field is set if it is empty and new url does not match ignore list.
|
||||
*
|
||||
* @param string $url
|
||||
*/
|
||||
private function updateOriginUrl(Entry $entry, $url)
|
||||
{
|
||||
if (empty($url) || $entry->getUrl() === $url) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$parsed_entry_url = parse_url($entry->getUrl());
|
||||
$parsed_content_url = parse_url($url);
|
||||
|
||||
/**
|
||||
* The following part computes the list of part changes between two
|
||||
* parse_url arrays.
|
||||
*
|
||||
* As array_diff_assoc only computes changes to go from the left array
|
||||
* to the right one, we make two different arrays to have both
|
||||
* directions. We merge these two arrays and sort keys before passing
|
||||
* the result to the switch.
|
||||
*
|
||||
* The resulting array gives us all changing parts between the two
|
||||
* urls: scheme, host, path, query and/or fragment.
|
||||
*/
|
||||
$diff_ec = array_diff_assoc($parsed_entry_url, $parsed_content_url);
|
||||
$diff_ce = array_diff_assoc($parsed_content_url, $parsed_entry_url);
|
||||
|
||||
$diff = array_merge($diff_ec, $diff_ce);
|
||||
$diff_keys = array_keys($diff);
|
||||
sort($diff_keys);
|
||||
|
||||
if ($this->ignoreOriginProcessor->process($entry)) {
|
||||
$entry->setUrl($url);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* This switch case lets us apply different behaviors according to
|
||||
* changing parts of urls.
|
||||
*
|
||||
* As $diff_keys is an array, we provide arrays as cases. ['path'] means
|
||||
* 'only the path is different between the two urls' whereas
|
||||
* ['fragment', 'query'] means 'only fragment and query string parts are
|
||||
* different between the two urls'.
|
||||
*
|
||||
* Note that values in $diff_keys are sorted.
|
||||
*/
|
||||
switch ($diff_keys) {
|
||||
case ['path']:
|
||||
if (($parsed_entry_url['path'] . '/' === $parsed_content_url['path']) // diff is trailing slash, we only replace the url of the entry
|
||||
|| ($url === urldecode($entry->getUrl()))) { // we update entry url if new url is a decoded version of it, see EntryRepository#findByUrlAndUserId
|
||||
$entry->setUrl($url);
|
||||
}
|
||||
break;
|
||||
case ['scheme']:
|
||||
$entry->setUrl($url);
|
||||
break;
|
||||
case ['fragment']:
|
||||
// noop
|
||||
break;
|
||||
default:
|
||||
if (empty($entry->getOriginUrl())) {
|
||||
$entry->setOriginUrl($entry->getUrl());
|
||||
}
|
||||
$entry->setUrl($url);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate that the given content has at least a title, an html and a url.
|
||||
*
|
||||
* @return bool true if valid otherwise false
|
||||
*/
|
||||
private function validateContent(array $content)
|
||||
{
|
||||
return !empty($content['title']) && !empty($content['html']) && !empty($content['url']);
|
||||
}
|
||||
}
|
86
src/Helper/CryptoProxy.php
Normal file
86
src/Helper/CryptoProxy.php
Normal file
|
@ -0,0 +1,86 @@
|
|||
<?php
|
||||
|
||||
namespace Wallabag\CoreBundle\Helper;
|
||||
|
||||
use Defuse\Crypto\Crypto;
|
||||
use Defuse\Crypto\Exception\WrongKeyOrModifiedCiphertextException;
|
||||
use Defuse\Crypto\Key;
|
||||
use Psr\Log\LoggerInterface;
|
||||
|
||||
/**
|
||||
* This is a proxy to crypt and decrypt password used by SiteCredential entity.
|
||||
* BTW, It might be re-use for sth else.
|
||||
*/
|
||||
class CryptoProxy
|
||||
{
|
||||
private $logger;
|
||||
private $encryptionKey;
|
||||
|
||||
public function __construct($encryptionKeyPath, LoggerInterface $logger)
|
||||
{
|
||||
$this->logger = $logger;
|
||||
|
||||
if (!file_exists($encryptionKeyPath)) {
|
||||
$key = Key::createNewRandomKey();
|
||||
|
||||
file_put_contents($encryptionKeyPath, $key->saveToAsciiSafeString());
|
||||
chmod($encryptionKeyPath, 0600);
|
||||
}
|
||||
|
||||
$this->encryptionKey = file_get_contents($encryptionKeyPath);
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure the given value will be crypted.
|
||||
*
|
||||
* @param string $secretValue Secret value to crypt
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function crypt($secretValue)
|
||||
{
|
||||
$this->logger->debug('Crypto: crypting value: ' . $this->mask($secretValue));
|
||||
|
||||
return Crypto::encrypt($secretValue, $this->loadKey());
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure the given crypted value will be decrypted.
|
||||
*
|
||||
* @param string $cryptedValue The value to be decrypted
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function decrypt($cryptedValue)
|
||||
{
|
||||
$this->logger->debug('Crypto: decrypting value: ' . $this->mask($cryptedValue));
|
||||
|
||||
try {
|
||||
return Crypto::decrypt($cryptedValue, $this->loadKey());
|
||||
} catch (WrongKeyOrModifiedCiphertextException $e) {
|
||||
throw new \RuntimeException('Decrypt fail: ' . $e->getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Load the private key.
|
||||
*
|
||||
* @return Key
|
||||
*/
|
||||
private function loadKey()
|
||||
{
|
||||
return Key::loadFromAsciiSafeString($this->encryptionKey);
|
||||
}
|
||||
|
||||
/**
|
||||
* Keep first and last character and put some stars in between.
|
||||
*
|
||||
* @param string $value Value to mask
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private function mask($value)
|
||||
{
|
||||
return \strlen($value) > 0 ? $value[0] . '*****' . $value[\strlen($value) - 1] : 'Empty value';
|
||||
}
|
||||
}
|
394
src/Helper/DownloadImages.php
Normal file
394
src/Helper/DownloadImages.php
Normal file
|
@ -0,0 +1,394 @@
|
|||
<?php
|
||||
|
||||
namespace Wallabag\CoreBundle\Helper;
|
||||
|
||||
use enshrined\svgSanitize\Sanitizer;
|
||||
use GuzzleHttp\Psr7\Uri;
|
||||
use GuzzleHttp\Psr7\UriResolver;
|
||||
use Psr\Log\LoggerInterface;
|
||||
use Symfony\Component\DomCrawler\Crawler;
|
||||
use Symfony\Component\Finder\Finder;
|
||||
use Symfony\Component\HttpFoundation\Request;
|
||||
use Symfony\Component\Mime\MimeTypes;
|
||||
use Symfony\Contracts\HttpClient\HttpClientInterface;
|
||||
use Symfony\Contracts\HttpClient\ResponseInterface;
|
||||
|
||||
class DownloadImages
|
||||
{
|
||||
public const REGENERATE_PICTURES_QUALITY = 80;
|
||||
|
||||
private $client;
|
||||
private $baseFolder;
|
||||
private $logger;
|
||||
private $mimeTypes;
|
||||
private $wallabagUrl;
|
||||
|
||||
public function __construct(HttpClientInterface $downloadImagesClient, $baseFolder, $wallabagUrl, LoggerInterface $logger)
|
||||
{
|
||||
$this->client = $downloadImagesClient;
|
||||
$this->baseFolder = $baseFolder;
|
||||
$this->wallabagUrl = rtrim($wallabagUrl, '/');
|
||||
$this->logger = $logger;
|
||||
$this->mimeTypes = new MimeTypes();
|
||||
|
||||
$this->setFolder();
|
||||
}
|
||||
|
||||
public function getBaseFolder()
|
||||
{
|
||||
return $this->baseFolder;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process the html and extract images URLs from it.
|
||||
*
|
||||
* @param string $html
|
||||
*
|
||||
* @return string[]
|
||||
*/
|
||||
public static function extractImagesUrlsFromHtml($html)
|
||||
{
|
||||
$crawler = new Crawler($html);
|
||||
$imagesCrawler = $crawler->filterXpath('//img');
|
||||
$imagesUrls = $imagesCrawler->extract(['src']);
|
||||
$imagesSrcsetUrls = self::getSrcsetUrls($imagesCrawler);
|
||||
|
||||
return array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
|
||||
}
|
||||
|
||||
/**
|
||||
* Process the html and extract image from it, save them to local and return the updated html.
|
||||
*
|
||||
* @param int $entryId ID of the entry
|
||||
* @param string $html
|
||||
* @param string $url Used as a base path for relative image and folder
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function processHtml($entryId, $html, $url)
|
||||
{
|
||||
$imagesUrls = self::extractImagesUrlsFromHtml($html);
|
||||
|
||||
// ensure images aren't overlapping
|
||||
arsort($imagesUrls);
|
||||
|
||||
$relativePath = $this->getRelativePath($entryId);
|
||||
|
||||
// download and save the image to the folder
|
||||
foreach ($imagesUrls as $image) {
|
||||
$newImage = $this->processSingleImage($entryId, $image, $url, $relativePath);
|
||||
|
||||
if (false === $newImage) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$html = str_replace($image, $newImage, $html);
|
||||
// if image contains "&" and we can't find it in the html it might be because it's encoded as & or unicode
|
||||
if (false !== stripos($image, '&') && false === stripos($html, $image)) {
|
||||
$imageAmp = str_replace('&', '&', $image);
|
||||
$html = str_replace($imageAmp, $newImage, $html);
|
||||
$imageUnicode = str_replace('&', '&', $image);
|
||||
$html = str_replace($imageUnicode, $newImage, $html);
|
||||
}
|
||||
}
|
||||
|
||||
return $html;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a single image:
|
||||
* - retrieve it
|
||||
* - re-saved it (for security reason)
|
||||
* - return the new local path.
|
||||
*
|
||||
* @param int $entryId ID of the entry
|
||||
* @param string $imagePath Path to the image to retrieve
|
||||
* @param string $url Url from where the image were found
|
||||
* @param string $relativePath Relative local path to saved the image
|
||||
*
|
||||
* @return string|false Relative url to access the image from the web
|
||||
*/
|
||||
public function processSingleImage($entryId, $imagePath, $url, $relativePath = null)
|
||||
{
|
||||
if (null === $imagePath) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (null === $relativePath) {
|
||||
$relativePath = $this->getRelativePath($entryId);
|
||||
}
|
||||
|
||||
$this->logger->debug('DownloadImages: working on image: ' . $imagePath);
|
||||
|
||||
$folderPath = $this->baseFolder . '/' . $relativePath;
|
||||
|
||||
// build image path
|
||||
$absolutePath = $this->getAbsoluteLink($url, $imagePath);
|
||||
if (false === $absolutePath) {
|
||||
$this->logger->error('DownloadImages: Can not determine the absolute path for that image, skipping.');
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
try {
|
||||
$res = $this->client->request(Request::METHOD_GET, $absolutePath);
|
||||
} catch (\Exception $e) {
|
||||
$this->logger->error('DownloadImages: Can not retrieve image, skipping.', ['exception' => $e]);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
$ext = $this->getExtensionFromResponse($res, $imagePath);
|
||||
if (false === $res) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$hashImage = hash('crc32', $absolutePath);
|
||||
$localPath = $folderPath . '/' . $hashImage . '.' . $ext;
|
||||
$urlPath = $this->wallabagUrl . '/assets/images/' . $relativePath . '/' . $hashImage . '.' . $ext;
|
||||
|
||||
// custom case for SVG (because GD doesn't support SVG)
|
||||
if ('svg' === $ext) {
|
||||
try {
|
||||
$sanitizer = new Sanitizer();
|
||||
$sanitizer->minify(true);
|
||||
$sanitizer->removeRemoteReferences(true);
|
||||
$cleanSVG = $sanitizer->sanitize($res->getContent());
|
||||
|
||||
// add an extra validation by checking about `<svg `
|
||||
if (false === $cleanSVG || !str_contains($cleanSVG, '<svg ')) {
|
||||
$this->logger->error('DownloadImages: Bad SVG given', ['path' => $imagePath]);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
file_put_contents($localPath, $cleanSVG);
|
||||
|
||||
return $urlPath;
|
||||
} catch (\Exception $e) {
|
||||
$this->logger->error('DownloadImages: Error while sanitize SVG', ['path' => $imagePath, 'message' => $e->getMessage()]);
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
$im = imagecreatefromstring($res->getContent());
|
||||
} catch (\Exception $e) {
|
||||
$im = false;
|
||||
}
|
||||
|
||||
if (false === $im) {
|
||||
$this->logger->error('DownloadImages: Error while regenerating image', ['path' => $localPath]);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
switch ($ext) {
|
||||
case 'gif':
|
||||
// use Imagick if available to keep GIF animation
|
||||
if (class_exists(\Imagick::class)) {
|
||||
try {
|
||||
$imagick = new \Imagick();
|
||||
$imagick->readImageBlob($res->getContent());
|
||||
$imagick->setImageFormat('gif');
|
||||
$imagick->writeImages($localPath, true);
|
||||
} catch (\Exception $e) {
|
||||
// if Imagick fail, fallback to the default solution
|
||||
imagegif($im, $localPath);
|
||||
}
|
||||
} else {
|
||||
imagegif($im, $localPath);
|
||||
}
|
||||
|
||||
$this->logger->debug('DownloadImages: Re-creating gif');
|
||||
break;
|
||||
case 'jpeg':
|
||||
case 'jpg':
|
||||
imagejpeg($im, $localPath, self::REGENERATE_PICTURES_QUALITY);
|
||||
$this->logger->debug('DownloadImages: Re-creating jpg');
|
||||
break;
|
||||
case 'png':
|
||||
imagealphablending($im, false);
|
||||
imagesavealpha($im, true);
|
||||
imagepng($im, $localPath, ceil(self::REGENERATE_PICTURES_QUALITY / 100 * 9));
|
||||
$this->logger->debug('DownloadImages: Re-creating png');
|
||||
break;
|
||||
case 'webp':
|
||||
imagewebp($im, $localPath, self::REGENERATE_PICTURES_QUALITY);
|
||||
$this->logger->debug('DownloadImages: Re-creating webp');
|
||||
}
|
||||
|
||||
imagedestroy($im);
|
||||
|
||||
return $urlPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove all images for the given entry id.
|
||||
*
|
||||
* @param int $entryId ID of the entry
|
||||
*/
|
||||
public function removeImages($entryId)
|
||||
{
|
||||
$relativePath = $this->getRelativePath($entryId);
|
||||
$folderPath = $this->baseFolder . '/' . $relativePath;
|
||||
|
||||
$finder = new Finder();
|
||||
$finder
|
||||
->files()
|
||||
->ignoreDotFiles(true)
|
||||
->in($folderPath);
|
||||
|
||||
foreach ($finder as $file) {
|
||||
@unlink($file->getRealPath());
|
||||
}
|
||||
|
||||
@rmdir($folderPath);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate the folder where we are going to save images based on the entry url.
|
||||
*
|
||||
* @param int $entryId ID of the entry
|
||||
* @param bool $createFolder Should we create the folder for the given id?
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getRelativePath($entryId, $createFolder = true)
|
||||
{
|
||||
$hashId = hash('crc32', $entryId);
|
||||
$relativePath = $hashId[0] . '/' . $hashId[1] . '/' . $hashId;
|
||||
$folderPath = $this->baseFolder . '/' . $relativePath;
|
||||
|
||||
if (!file_exists($folderPath) && $createFolder) {
|
||||
mkdir($folderPath, 0777, true);
|
||||
}
|
||||
|
||||
$this->logger->debug('DownloadImages: Folder used for that Entry id', ['folder' => $folderPath, 'entryId' => $entryId]);
|
||||
|
||||
return $relativePath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get images urls from the srcset image attribute.
|
||||
*
|
||||
* @return array An array of urls
|
||||
*/
|
||||
private static function getSrcsetUrls(Crawler $imagesCrawler)
|
||||
{
|
||||
$urls = [];
|
||||
$iterator = $imagesCrawler->getIterator();
|
||||
|
||||
while ($iterator->valid()) {
|
||||
$node = $iterator->current();
|
||||
\assert($node instanceof \DOMElement);
|
||||
|
||||
$srcsetAttribute = $node->getAttribute('srcset');
|
||||
|
||||
if ('' !== $srcsetAttribute) {
|
||||
// Couldn't start with " OR ' OR a white space
|
||||
// Could be one or more white space
|
||||
// Must be one or more digits followed by w OR x
|
||||
$pattern = "/(?:[^\"'\s]+\s*(?:\d+[wx])+)/";
|
||||
preg_match_all($pattern, $srcsetAttribute, $matches);
|
||||
|
||||
$srcset = \call_user_func_array('array_merge', $matches);
|
||||
$srcsetUrls = array_map(function ($src) {
|
||||
return trim(explode(' ', $src, 2)[0]);
|
||||
}, $srcset);
|
||||
$urls = array_merge($srcsetUrls, $urls);
|
||||
}
|
||||
|
||||
$iterator->next();
|
||||
}
|
||||
|
||||
return $urls;
|
||||
}
|
||||
|
||||
/**
|
||||
* Setup base folder where all images are going to be saved.
|
||||
*/
|
||||
private function setFolder()
|
||||
{
|
||||
// if folder doesn't exist, attempt to create one and store the folder name in property $folder
|
||||
if (!file_exists($this->baseFolder)) {
|
||||
mkdir($this->baseFolder, 0755, true);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Make an $url absolute based on the $base.
|
||||
*
|
||||
* @see Graby->makeAbsoluteStr
|
||||
*
|
||||
* @param string $base Base url
|
||||
* @param string $url Url to make it absolute
|
||||
*
|
||||
* @return false|string
|
||||
*/
|
||||
private function getAbsoluteLink($base, $url)
|
||||
{
|
||||
if (preg_match('!^https?://!i', $url)) {
|
||||
// already absolute
|
||||
return $url;
|
||||
}
|
||||
|
||||
$base = new Uri($base);
|
||||
|
||||
// in case the url has no scheme & host
|
||||
if ('' === $base->getAuthority() || '' === $base->getScheme()) {
|
||||
$this->logger->error('DownloadImages: Can not make an absolute link', ['base' => $base, 'url' => $url]);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return (string) UriResolver::resolve($base, new Uri($url));
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve and validate the extension from the response of the url of the image.
|
||||
*
|
||||
* @param ResponseInterface $res Http Response
|
||||
* @param string $imagePath Path from the src image from the content (used for log only)
|
||||
*
|
||||
* @return string|false Extension name or false if validation failed
|
||||
*/
|
||||
private function getExtensionFromResponse(ResponseInterface $res, $imagePath)
|
||||
{
|
||||
if (200 !== $res->getStatusCode()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$ext = current($this->mimeTypes->getExtensions(current($res->getHeaders()['content-type'] ?? [])));
|
||||
$this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeaders()['content-type'] ?? []]);
|
||||
|
||||
// ok header doesn't have the extension, try a different way
|
||||
if (empty($ext)) {
|
||||
$types = [
|
||||
'jpeg' => "\xFF\xD8\xFF",
|
||||
'gif' => 'GIF',
|
||||
'png' => "\x89\x50\x4e\x47\x0d\x0a",
|
||||
'webp' => "\x52\x49\x46\x46",
|
||||
];
|
||||
$bytes = substr($res->getContent(), 0, 8);
|
||||
|
||||
foreach ($types as $type => $header) {
|
||||
if (str_starts_with($bytes, $header)) {
|
||||
$ext = $type;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
$this->logger->debug('DownloadImages: Checking extension (alternative)', ['ext' => $ext]);
|
||||
}
|
||||
|
||||
if (!\in_array($ext, ['jpeg', 'jpg', 'gif', 'png', 'webp', 'svg'], true)) {
|
||||
$this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: ' . $imagePath);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return $ext;
|
||||
}
|
||||
}
|
24
src/Helper/EntityTimestampsTrait.php
Normal file
24
src/Helper/EntityTimestampsTrait.php
Normal file
|
@ -0,0 +1,24 @@
|
|||
<?php
|
||||
|
||||
namespace Wallabag\CoreBundle\Helper;
|
||||
|
||||
use Doctrine\ORM\Mapping as ORM;
|
||||
|
||||
/**
|
||||
* Trait to handle created & updated date of an Entity.
|
||||
*/
|
||||
trait EntityTimestampsTrait
|
||||
{
|
||||
/**
|
||||
* @ORM\PrePersist
|
||||
* @ORM\PreUpdate
|
||||
*/
|
||||
public function timestamps()
|
||||
{
|
||||
if (null === $this->createdAt) {
|
||||
$this->createdAt = new \DateTime();
|
||||
}
|
||||
|
||||
$this->updatedAt = new \DateTime();
|
||||
}
|
||||
}
|
485
src/Helper/EntriesExport.php
Normal file
485
src/Helper/EntriesExport.php
Normal file
|
@ -0,0 +1,485 @@
|
|||
<?php
|
||||
|
||||
namespace Wallabag\CoreBundle\Helper;
|
||||
|
||||
use Html2Text\Html2Text;
|
||||
use JMS\Serializer\SerializationContext;
|
||||
use JMS\Serializer\SerializerBuilder;
|
||||
use PHPePub\Core\EPub;
|
||||
use PHPePub\Core\Structure\OPF\DublinCore;
|
||||
use Symfony\Component\HttpFoundation\Response;
|
||||
use Symfony\Component\Security\Core\Authentication\Token\Storage\TokenStorageInterface;
|
||||
use Symfony\Contracts\Translation\TranslatorInterface;
|
||||
use Wallabag\CoreBundle\Entity\Entry;
|
||||
use Wallabag\CoreBundle\Entity\User;
|
||||
|
||||
/**
|
||||
* This class doesn't have unit test BUT it's fully covered by a functional test with ExportControllerTest.
|
||||
*/
|
||||
class EntriesExport
|
||||
{
|
||||
private $wallabagUrl;
|
||||
private $logoPath;
|
||||
private $translator;
|
||||
private $tokenStorage;
|
||||
private $title = '';
|
||||
private $entries = [];
|
||||
private $author = 'wallabag';
|
||||
private $language = '';
|
||||
|
||||
/**
|
||||
* @param TranslatorInterface $translator Translator service
|
||||
* @param string $wallabagUrl Wallabag instance url
|
||||
* @param string $logoPath Path to the logo FROM THE BUNDLE SCOPE
|
||||
* @param TokenStorageInterface $tokenStorage Needed to retrieve the current user
|
||||
*/
|
||||
public function __construct(TranslatorInterface $translator, $wallabagUrl, $logoPath, TokenStorageInterface $tokenStorage)
|
||||
{
|
||||
$this->translator = $translator;
|
||||
$this->wallabagUrl = $wallabagUrl;
|
||||
$this->logoPath = $logoPath;
|
||||
$this->tokenStorage = $tokenStorage;
|
||||
}
|
||||
|
||||
/**
|
||||
* Define entries.
|
||||
*
|
||||
* @param array|Entry $entries An array of entries or one entry
|
||||
*
|
||||
* @return EntriesExport
|
||||
*/
|
||||
public function setEntries($entries)
|
||||
{
|
||||
if (!\is_array($entries)) {
|
||||
$this->language = $entries->getLanguage();
|
||||
$entries = [$entries];
|
||||
}
|
||||
|
||||
$this->entries = $entries;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the category of which we want to get articles, or just one entry.
|
||||
*
|
||||
* @param string $method Method to get articles
|
||||
*
|
||||
* @return EntriesExport
|
||||
*/
|
||||
public function updateTitle($method)
|
||||
{
|
||||
$this->title = $method . ' articles';
|
||||
|
||||
if ('entry' === $method) {
|
||||
$this->title = $this->entries[0]->getTitle();
|
||||
}
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the author for one entry or category.
|
||||
*
|
||||
* The publishers are used, or the domain name if empty.
|
||||
*
|
||||
* @param string $method Method to get articles
|
||||
*
|
||||
* @return EntriesExport
|
||||
*/
|
||||
public function updateAuthor($method)
|
||||
{
|
||||
if ('entry' !== $method) {
|
||||
$this->author = 'Various authors';
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
$this->author = $this->entries[0]->getDomainName();
|
||||
|
||||
$publishedBy = $this->entries[0]->getPublishedBy();
|
||||
if (!empty($publishedBy)) {
|
||||
$this->author = implode(', ', $publishedBy);
|
||||
}
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the output format.
|
||||
*
|
||||
* @param string $format
|
||||
*
|
||||
* @return Response
|
||||
*/
|
||||
public function exportAs($format)
|
||||
{
|
||||
$functionName = 'produce' . ucfirst($format);
|
||||
if (method_exists($this, $functionName)) {
|
||||
return $this->$functionName();
|
||||
}
|
||||
|
||||
throw new \InvalidArgumentException(sprintf('The format "%s" is not yet supported.', $format));
|
||||
}
|
||||
|
||||
public function exportJsonData()
|
||||
{
|
||||
return $this->prepareSerializingContent('json');
|
||||
}
|
||||
|
||||
/**
|
||||
* Use PHPePub to dump a .epub file.
|
||||
*
|
||||
* @return Response
|
||||
*/
|
||||
private function produceEpub()
|
||||
{
|
||||
$user = $this->tokenStorage->getToken() ? $this->tokenStorage->getToken()->getUser() : null;
|
||||
\assert($user instanceof User);
|
||||
|
||||
/*
|
||||
* Start and End of the book
|
||||
*/
|
||||
$content_start =
|
||||
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
|
||||
. "<html xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:epub=\"http://www.idpf.org/2007/ops\">\n"
|
||||
. '<head>'
|
||||
. "<meta http-equiv=\"Default-Style\" content=\"text/html; charset=utf-8\" />\n"
|
||||
. "<title>wallabag articles book</title>\n"
|
||||
. "</head>\n"
|
||||
. "<body>\n";
|
||||
|
||||
$bookEnd = "</body>\n</html>\n";
|
||||
|
||||
$book = new EPub(EPub::BOOK_VERSION_EPUB3);
|
||||
|
||||
/*
|
||||
* Book metadata
|
||||
*/
|
||||
|
||||
$book->setTitle($this->title);
|
||||
// EPub specification requires BCP47-compliant languages, thus we replace _ with -
|
||||
$book->setLanguage(str_replace('_', '-', $this->language));
|
||||
$book->setDescription('Some articles saved on my wallabag');
|
||||
|
||||
$book->setAuthor($this->author, $this->author);
|
||||
|
||||
// I hope this is a non-existent address :)
|
||||
$book->setPublisher('wallabag', 'wallabag');
|
||||
// Strictly not needed as the book date defaults to time().
|
||||
$book->setDate(time());
|
||||
$book->setSourceURL($this->wallabagUrl);
|
||||
|
||||
$book->addDublinCoreMetadata(DublinCore::CONTRIBUTOR, 'PHP');
|
||||
$book->addDublinCoreMetadata(DublinCore::CONTRIBUTOR, 'wallabag');
|
||||
|
||||
$entryIds = [];
|
||||
$entryCount = \count($this->entries);
|
||||
$i = 0;
|
||||
|
||||
/*
|
||||
* Adding actual entries
|
||||
*/
|
||||
|
||||
// set tags as subjects
|
||||
foreach ($this->entries as $entry) {
|
||||
++$i;
|
||||
|
||||
/*
|
||||
* Front page
|
||||
* Set if there's only one entry in the given set
|
||||
*/
|
||||
if (1 === $entryCount && null !== $entry->getPreviewPicture()) {
|
||||
$book->setCoverImage($entry->getPreviewPicture());
|
||||
}
|
||||
|
||||
foreach ($entry->getTags() as $tag) {
|
||||
$book->setSubject($tag->getLabel());
|
||||
}
|
||||
$filename = sha1(sprintf('%s:%s', $entry->getUrl(), $entry->getTitle()));
|
||||
|
||||
$publishedBy = $entry->getPublishedBy();
|
||||
$authors = $this->translator->trans('export.unknown');
|
||||
if (!empty($publishedBy)) {
|
||||
$authors = implode(',', $publishedBy);
|
||||
}
|
||||
|
||||
$publishedAt = $entry->getPublishedAt();
|
||||
$publishedDate = $this->translator->trans('export.unknown');
|
||||
if (!empty($publishedAt)) {
|
||||
$publishedDate = $entry->getPublishedAt()->format('Y-m-d');
|
||||
}
|
||||
|
||||
$readingTime = round($entry->getReadingTime() / $user->getConfig()->getReadingSpeed() * 200);
|
||||
|
||||
$titlepage = $content_start .
|
||||
'<h1>' . $entry->getTitle() . '</h1>' .
|
||||
'<dl>' .
|
||||
'<dt>' . $this->translator->trans('entry.view.published_by') . '</dt><dd>' . $authors . '</dd>' .
|
||||
'<dt>' . $this->translator->trans('entry.metadata.published_on') . '</dt><dd>' . $publishedDate . '</dd>' .
|
||||
'<dt>' . $this->translator->trans('entry.metadata.reading_time') . '</dt><dd>' . $this->translator->trans('entry.metadata.reading_time_minutes_short', ['%readingTime%' => $readingTime]) . '</dd>' .
|
||||
'<dt>' . $this->translator->trans('entry.metadata.added_on') . '</dt><dd>' . $entry->getCreatedAt()->format('Y-m-d') . '</dd>' .
|
||||
'<dt>' . $this->translator->trans('entry.metadata.address') . '</dt><dd><a href="' . $entry->getUrl() . '">' . $entry->getUrl() . '</a></dd>' .
|
||||
'</dl>' .
|
||||
$bookEnd;
|
||||
$book->addChapter("Entry {$i} of {$entryCount}", "{$filename}_cover.html", $titlepage, true, EPub::EXTERNAL_REF_ADD);
|
||||
$chapter = $content_start . $entry->getContent() . $bookEnd;
|
||||
|
||||
$entryIds[] = $entry->getId();
|
||||
$book->addChapter($entry->getTitle(), "{$filename}.html", $chapter, true, EPub::EXTERNAL_REF_ADD);
|
||||
}
|
||||
|
||||
$book->addChapter('Notices', 'Cover2.html', $content_start . $this->getExportInformation('PHPePub') . $bookEnd);
|
||||
|
||||
// Could also be the ISBN number, prefered for published books, or a UUID.
|
||||
$hash = sha1(sprintf('%s:%s', $this->wallabagUrl, implode(',', $entryIds)));
|
||||
$book->setIdentifier(sprintf('urn:wallabag:%s', $hash), EPub::IDENTIFIER_URI);
|
||||
|
||||
return Response::create(
|
||||
$book->getBook(),
|
||||
200,
|
||||
[
|
||||
'Content-Description' => 'File Transfer',
|
||||
'Content-type' => 'application/epub+zip',
|
||||
'Content-Disposition' => 'attachment; filename="' . $this->getSanitizedFilename() . '.epub"',
|
||||
'Content-Transfer-Encoding' => 'binary',
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Use TCPDF to dump a .pdf file.
|
||||
*
|
||||
* @return Response
|
||||
*/
|
||||
private function producePdf()
|
||||
{
|
||||
$user = $this->tokenStorage->getToken() ? $this->tokenStorage->getToken()->getUser() : null;
|
||||
\assert($user instanceof User);
|
||||
|
||||
$pdf = new \TCPDF(PDF_PAGE_ORIENTATION, PDF_UNIT, PDF_PAGE_FORMAT, true, 'UTF-8', false);
|
||||
|
||||
/*
|
||||
* Book metadata
|
||||
*/
|
||||
$pdf->SetCreator(PDF_CREATOR);
|
||||
$pdf->SetAuthor($this->author);
|
||||
$pdf->SetTitle($this->title);
|
||||
$pdf->SetSubject('Articles via wallabag');
|
||||
$pdf->SetKeywords('wallabag');
|
||||
|
||||
/*
|
||||
* Adding actual entries
|
||||
*/
|
||||
foreach ($this->entries as $entry) {
|
||||
foreach ($entry->getTags() as $tag) {
|
||||
$pdf->SetKeywords($tag->getLabel());
|
||||
}
|
||||
|
||||
$publishedBy = $entry->getPublishedBy();
|
||||
$authors = $this->translator->trans('export.unknown');
|
||||
if (!empty($publishedBy)) {
|
||||
$authors = implode(',', $publishedBy);
|
||||
}
|
||||
|
||||
$readingTime = $entry->getReadingTime() / $user->getConfig()->getReadingSpeed() * 200;
|
||||
|
||||
$pdf->addPage();
|
||||
$html = '<h1>' . $entry->getTitle() . '</h1>' .
|
||||
'<dl>' .
|
||||
'<dt>' . $this->translator->trans('entry.view.published_by') . '</dt><dd>' . $authors . '</dd>' .
|
||||
'<dt>' . $this->translator->trans('entry.metadata.reading_time') . '</dt><dd>' . $this->translator->trans('entry.metadata.reading_time_minutes_short', ['%readingTime%' => $readingTime]) . '</dd>' .
|
||||
'<dt>' . $this->translator->trans('entry.metadata.added_on') . '</dt><dd>' . $entry->getCreatedAt()->format('Y-m-d') . '</dd>' .
|
||||
'<dt>' . $this->translator->trans('entry.metadata.address') . '</dt><dd><a href="' . $entry->getUrl() . '">' . $entry->getUrl() . '</a></dd>' .
|
||||
'</dl>';
|
||||
$pdf->writeHTMLCell(0, 0, '', '', $html, 0, 1, 0, true, '', true);
|
||||
|
||||
$pdf->AddPage();
|
||||
$html = '<h1>' . $entry->getTitle() . '</h1>';
|
||||
$html .= $entry->getContent();
|
||||
|
||||
$pdf->writeHTMLCell(0, 0, '', '', $html, 0, 1, 0, true, '', true);
|
||||
}
|
||||
|
||||
/*
|
||||
* Last page
|
||||
*/
|
||||
$pdf->AddPage();
|
||||
$html = $this->getExportInformation('tcpdf');
|
||||
|
||||
$pdf->writeHTMLCell(0, 0, '', '', $html, 0, 1, 0, true, '', true);
|
||||
|
||||
// set image scale factor
|
||||
$pdf->setImageScale(PDF_IMAGE_SCALE_RATIO);
|
||||
|
||||
return Response::create(
|
||||
$pdf->Output('', 'S'),
|
||||
200,
|
||||
[
|
||||
'Content-Description' => 'File Transfer',
|
||||
'Content-type' => 'application/pdf',
|
||||
'Content-Disposition' => 'attachment; filename="' . $this->getSanitizedFilename() . '.pdf"',
|
||||
'Content-Transfer-Encoding' => 'binary',
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Inspired from CsvFileDumper.
|
||||
*
|
||||
* @return Response
|
||||
*/
|
||||
private function produceCsv()
|
||||
{
|
||||
$delimiter = ';';
|
||||
$enclosure = '"';
|
||||
$handle = fopen('php://memory', 'b+r');
|
||||
|
||||
fputcsv($handle, ['Title', 'URL', 'Content', 'Tags', 'MIME Type', 'Language', 'Creation date'], $delimiter, $enclosure);
|
||||
|
||||
foreach ($this->entries as $entry) {
|
||||
fputcsv(
|
||||
$handle,
|
||||
[
|
||||
$entry->getTitle(),
|
||||
$entry->getURL(),
|
||||
// remove new line to avoid crazy results
|
||||
str_replace(["\r\n", "\r", "\n"], '', $entry->getContent()),
|
||||
implode(', ', $entry->getTags()->toArray()),
|
||||
$entry->getMimetype(),
|
||||
$entry->getLanguage(),
|
||||
$entry->getCreatedAt()->format('d/m/Y h:i:s'),
|
||||
],
|
||||
$delimiter,
|
||||
$enclosure
|
||||
);
|
||||
}
|
||||
|
||||
rewind($handle);
|
||||
$output = stream_get_contents($handle);
|
||||
fclose($handle);
|
||||
|
||||
return Response::create(
|
||||
$output,
|
||||
200,
|
||||
[
|
||||
'Content-type' => 'application/csv',
|
||||
'Content-Disposition' => 'attachment; filename="' . $this->getSanitizedFilename() . '.csv"',
|
||||
'Content-Transfer-Encoding' => 'UTF-8',
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Dump a JSON file.
|
||||
*
|
||||
* @return Response
|
||||
*/
|
||||
private function produceJson()
|
||||
{
|
||||
return Response::create(
|
||||
$this->prepareSerializingContent('json'),
|
||||
200,
|
||||
[
|
||||
'Content-type' => 'application/json',
|
||||
'Content-Disposition' => 'attachment; filename="' . $this->getSanitizedFilename() . '.json"',
|
||||
'Content-Transfer-Encoding' => 'UTF-8',
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Dump a XML file.
|
||||
*
|
||||
* @return Response
|
||||
*/
|
||||
private function produceXml()
|
||||
{
|
||||
return Response::create(
|
||||
$this->prepareSerializingContent('xml'),
|
||||
200,
|
||||
[
|
||||
'Content-type' => 'application/xml',
|
||||
'Content-Disposition' => 'attachment; filename="' . $this->getSanitizedFilename() . '.xml"',
|
||||
'Content-Transfer-Encoding' => 'UTF-8',
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Dump a TXT file.
|
||||
*
|
||||
* @return Response
|
||||
*/
|
||||
private function produceTxt()
|
||||
{
|
||||
$content = '';
|
||||
$bar = str_repeat('=', 100);
|
||||
foreach ($this->entries as $entry) {
|
||||
$content .= "\n\n" . $bar . "\n\n" . $entry->getTitle() . "\n\n" . $bar . "\n\n";
|
||||
$html = new Html2Text($entry->getContent(), ['do_links' => 'none', 'width' => 100]);
|
||||
$content .= $html->getText();
|
||||
}
|
||||
|
||||
return Response::create(
|
||||
$content,
|
||||
200,
|
||||
[
|
||||
'Content-type' => 'text/plain',
|
||||
'Content-Disposition' => 'attachment; filename="' . $this->getSanitizedFilename() . '.txt"',
|
||||
'Content-Transfer-Encoding' => 'UTF-8',
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a Serializer object for producing processes that need it (JSON & XML).
|
||||
*
|
||||
* @param string $format
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private function prepareSerializingContent($format)
|
||||
{
|
||||
$serializer = SerializerBuilder::create()->build();
|
||||
|
||||
return $serializer->serialize(
|
||||
$this->entries,
|
||||
$format,
|
||||
SerializationContext::create()->setGroups(['entries_for_user'])
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a kind of footer / information for the epub.
|
||||
*
|
||||
* @param string $type Generator of the export, can be: tdpdf, PHPePub, PHPMobi
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private function getExportInformation($type)
|
||||
{
|
||||
$info = $this->translator->trans('export.footer_template', [
|
||||
'%method%' => $type,
|
||||
]);
|
||||
|
||||
if ('tcpdf' === $type) {
|
||||
return str_replace('%IMAGE%', '<img src="' . $this->logoPath . '" />', $info);
|
||||
}
|
||||
|
||||
return str_replace('%IMAGE%', '', $info);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a sanitized version of the title by applying translit iconv
|
||||
* and removing non alphanumeric characters, - and space.
|
||||
*
|
||||
* @return string Sanitized filename
|
||||
*/
|
||||
private function getSanitizedFilename()
|
||||
{
|
||||
$transliterator = \Transliterator::createFromRules(':: Any-Latin; :: Latin-ASCII; :: NFD; :: [:Nonspacing Mark:] Remove; :: NFC;', \Transliterator::FORWARD);
|
||||
|
||||
return preg_replace('/[^A-Za-z0-9\- \']/', '', $transliterator->transliterate($this->title));
|
||||
}
|
||||
}
|
67
src/Helper/FileCookieJar.php
Normal file
67
src/Helper/FileCookieJar.php
Normal file
|
@ -0,0 +1,67 @@
|
|||
<?php
|
||||
|
||||
namespace Wallabag\CoreBundle\Helper;
|
||||
|
||||
use GuzzleHttp\Cookie\FileCookieJar as BaseFileCookieJar;
|
||||
use GuzzleHttp\Cookie\SetCookie;
|
||||
use GuzzleHttp\Utils;
|
||||
use Psr\Log\LoggerInterface;
|
||||
|
||||
/**
|
||||
* Overidden Cookie behavior to:
|
||||
* - ignore error when the cookie file is malformatted (resulting in clearing it).
|
||||
*/
|
||||
class FileCookieJar extends BaseFileCookieJar
|
||||
{
|
||||
private $logger;
|
||||
|
||||
/**
|
||||
* @param LoggerInterface $logger Only used to log info when something goes wrong
|
||||
* @param string $cookieFile File to store the cookie data
|
||||
*/
|
||||
public function __construct(LoggerInterface $logger, $cookieFile)
|
||||
{
|
||||
parent::__construct($cookieFile);
|
||||
|
||||
$this->logger = $logger;
|
||||
}
|
||||
|
||||
/**
|
||||
* Load cookies from a JSON formatted file.
|
||||
*
|
||||
* Old cookies are kept unless overwritten by newly loaded ones.
|
||||
*
|
||||
* @param string $filename cookie file to load
|
||||
*
|
||||
* @throws \RuntimeException if the file cannot be loaded
|
||||
*/
|
||||
public function load($filename)
|
||||
{
|
||||
$json = file_get_contents($filename);
|
||||
if (false === $json) {
|
||||
// @codeCoverageIgnoreStart
|
||||
throw new \RuntimeException("Unable to load file {$filename}");
|
||||
// @codeCoverageIgnoreEnd
|
||||
}
|
||||
|
||||
try {
|
||||
$data = Utils::jsonDecode($json, true);
|
||||
} catch (\InvalidArgumentException $e) {
|
||||
$this->logger->error('JSON inside the cookie is broken', [
|
||||
'json' => $json,
|
||||
'error_msg' => $e->getMessage(),
|
||||
]);
|
||||
|
||||
// cookie file is invalid, just ignore the exception and it'll reset the whole cookie file
|
||||
$data = '';
|
||||
}
|
||||
|
||||
if (\is_array($data)) {
|
||||
foreach (Utils::jsonDecode($json, true) as $cookie) {
|
||||
$this->setCookie(new SetCookie($cookie));
|
||||
}
|
||||
} elseif (\strlen($data)) {
|
||||
throw new \RuntimeException("Invalid cookie file: {$filename}");
|
||||
}
|
||||
}
|
||||
}
|
74
src/Helper/HttpClientFactory.php
Normal file
74
src/Helper/HttpClientFactory.php
Normal file
|
@ -0,0 +1,74 @@
|
|||
<?php
|
||||
|
||||
namespace Wallabag\CoreBundle\Helper;
|
||||
|
||||
use GuzzleHttp\Client as GuzzleClient;
|
||||
use GuzzleHttp\Cookie\CookieJar;
|
||||
use GuzzleHttp\Event\SubscriberInterface;
|
||||
use Http\Adapter\Guzzle5\Client as GuzzleAdapter;
|
||||
use Http\Client\HttpClient;
|
||||
use Http\HttplugBundle\ClientFactory\ClientFactory;
|
||||
use Psr\Log\LoggerInterface;
|
||||
|
||||
/**
|
||||
* Builds and configures the HTTP client.
|
||||
*/
|
||||
class HttpClientFactory implements ClientFactory
|
||||
{
|
||||
/** @var SubscriberInterface[] */
|
||||
private $subscribers = [];
|
||||
|
||||
/** @var CookieJar */
|
||||
private $cookieJar;
|
||||
|
||||
private $restrictedAccess;
|
||||
private $logger;
|
||||
|
||||
/**
|
||||
* HttpClientFactory constructor.
|
||||
*
|
||||
* @param string $restrictedAccess This param is a kind of boolean. Values: 0 or 1
|
||||
*/
|
||||
public function __construct(CookieJar $cookieJar, $restrictedAccess, LoggerInterface $logger)
|
||||
{
|
||||
$this->cookieJar = $cookieJar;
|
||||
$this->restrictedAccess = $restrictedAccess;
|
||||
$this->logger = $logger;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a subscriber to the HTTP client.
|
||||
*/
|
||||
public function addSubscriber(SubscriberInterface $subscriber)
|
||||
{
|
||||
$this->subscribers[] = $subscriber;
|
||||
}
|
||||
|
||||
/**
|
||||
* Input an array of configuration to be able to create a HttpClient.
|
||||
*
|
||||
* @return HttpClient
|
||||
*/
|
||||
public function createClient(array $config = [])
|
||||
{
|
||||
$this->logger->log('debug', 'Restricted access config enabled?', ['enabled' => (int) $this->restrictedAccess]);
|
||||
|
||||
if (0 === (int) $this->restrictedAccess) {
|
||||
return new GuzzleAdapter(new GuzzleClient($config));
|
||||
}
|
||||
|
||||
// we clear the cookie to avoid websites who use cookies for analytics
|
||||
$this->cookieJar->clear();
|
||||
if (!isset($config['defaults']['cookies'])) {
|
||||
// need to set the (shared) cookie jar
|
||||
$config['defaults']['cookies'] = $this->cookieJar;
|
||||
}
|
||||
|
||||
$guzzle = new GuzzleClient($config);
|
||||
foreach ($this->subscribers as $subscriber) {
|
||||
$guzzle->getEmitter()->attach($subscriber);
|
||||
}
|
||||
|
||||
return new GuzzleAdapter($guzzle);
|
||||
}
|
||||
}
|
40
src/Helper/PreparePagerForEntries.php
Normal file
40
src/Helper/PreparePagerForEntries.php
Normal file
|
@ -0,0 +1,40 @@
|
|||
<?php
|
||||
|
||||
namespace Wallabag\CoreBundle\Helper;
|
||||
|
||||
use Pagerfanta\Adapter\AdapterInterface;
|
||||
use Pagerfanta\Adapter\NullAdapter;
|
||||
use Pagerfanta\Pagerfanta;
|
||||
use Symfony\Component\Security\Core\Authentication\Token\Storage\TokenStorageInterface;
|
||||
use Wallabag\CoreBundle\Entity\User;
|
||||
|
||||
class PreparePagerForEntries
|
||||
{
|
||||
private $tokenStorage;
|
||||
|
||||
public function __construct(TokenStorageInterface $tokenStorage)
|
||||
{
|
||||
$this->tokenStorage = $tokenStorage;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param User $user If user isn't logged in, we can force it (like for feed)
|
||||
*
|
||||
* @return Pagerfanta
|
||||
*/
|
||||
public function prepare(AdapterInterface $adapter, ?User $user = null)
|
||||
{
|
||||
if (null === $user) {
|
||||
$user = $this->tokenStorage->getToken() ? $this->tokenStorage->getToken()->getUser() : null;
|
||||
}
|
||||
|
||||
if (!$user instanceof User) {
|
||||
return new Pagerfanta(new NullAdapter());
|
||||
}
|
||||
|
||||
$entries = new Pagerfanta($adapter);
|
||||
$entries->setMaxPerPage($user->getConfig()->getItemsPerPage());
|
||||
|
||||
return $entries;
|
||||
}
|
||||
}
|
62
src/Helper/Redirect.php
Normal file
62
src/Helper/Redirect.php
Normal file
|
@ -0,0 +1,62 @@
|
|||
<?php
|
||||
|
||||
namespace Wallabag\CoreBundle\Helper;
|
||||
|
||||
use GuzzleHttp\Psr7\Uri;
|
||||
use Symfony\Component\Routing\Generator\UrlGeneratorInterface;
|
||||
use Symfony\Component\Security\Core\Authentication\Token\Storage\TokenStorageInterface;
|
||||
use Wallabag\CoreBundle\Entity\Config;
|
||||
use Wallabag\CoreBundle\Entity\User;
|
||||
|
||||
/**
|
||||
* Manage redirections to avoid redirecting to empty routes.
|
||||
*/
|
||||
class Redirect
|
||||
{
|
||||
private $router;
|
||||
private $tokenStorage;
|
||||
|
||||
public function __construct(UrlGeneratorInterface $router, TokenStorageInterface $tokenStorage)
|
||||
{
|
||||
$this->router = $router;
|
||||
$this->tokenStorage = $tokenStorage;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $url URL to redirect
|
||||
* @param bool $ignoreActionMarkAsRead Ignore configured action when mark as read
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function to($url, $ignoreActionMarkAsRead = false)
|
||||
{
|
||||
$user = $this->tokenStorage->getToken() ? $this->tokenStorage->getToken()->getUser() : null;
|
||||
|
||||
if (!$user instanceof User) {
|
||||
if (null === $url) {
|
||||
return $this->router->generate('homepage');
|
||||
}
|
||||
|
||||
if (!Uri::isAbsolutePathReference(new Uri($url))) {
|
||||
return $this->router->generate('homepage');
|
||||
}
|
||||
|
||||
return $url;
|
||||
}
|
||||
|
||||
if (!$ignoreActionMarkAsRead
|
||||
&& Config::REDIRECT_TO_HOMEPAGE === $user->getConfig()->getActionMarkAsRead()) {
|
||||
return $this->router->generate('homepage');
|
||||
}
|
||||
|
||||
if (null === $url) {
|
||||
return $this->router->generate('homepage');
|
||||
}
|
||||
|
||||
if (!Uri::isAbsolutePathReference(new Uri($url))) {
|
||||
return $this->router->generate('homepage');
|
||||
}
|
||||
|
||||
return $url;
|
||||
}
|
||||
}
|
50
src/Helper/RuleBasedIgnoreOriginProcessor.php
Normal file
50
src/Helper/RuleBasedIgnoreOriginProcessor.php
Normal file
|
@ -0,0 +1,50 @@
|
|||
<?php
|
||||
|
||||
namespace Wallabag\CoreBundle\Helper;
|
||||
|
||||
use Psr\Log\LoggerInterface;
|
||||
use RulerZ\RulerZ;
|
||||
use Wallabag\CoreBundle\Entity\Entry;
|
||||
use Wallabag\CoreBundle\Repository\IgnoreOriginInstanceRuleRepository;
|
||||
|
||||
class RuleBasedIgnoreOriginProcessor
|
||||
{
|
||||
protected $rulerz;
|
||||
protected $logger;
|
||||
protected $ignoreOriginInstanceRuleRepository;
|
||||
|
||||
public function __construct(RulerZ $rulerz, LoggerInterface $logger, IgnoreOriginInstanceRuleRepository $ignoreOriginInstanceRuleRepository)
|
||||
{
|
||||
$this->rulerz = $rulerz;
|
||||
$this->logger = $logger;
|
||||
$this->ignoreOriginInstanceRuleRepository = $ignoreOriginInstanceRuleRepository;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param Entry $entry Entry to process
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function process(Entry $entry)
|
||||
{
|
||||
$url = $entry->getUrl();
|
||||
$userRules = $entry->getUser()->getConfig()->getIgnoreOriginRules()->toArray();
|
||||
$rules = array_merge($this->ignoreOriginInstanceRuleRepository->findAll(), $userRules);
|
||||
|
||||
$parsed_url = parse_url($url);
|
||||
// We add the former url as a new key _all for pattern matching
|
||||
$parsed_url['_all'] = $url;
|
||||
|
||||
foreach ($rules as $rule) {
|
||||
if ($this->rulerz->satisfies($parsed_url, $rule->getRule())) {
|
||||
$this->logger->info('Origin url matching ignore rule.', [
|
||||
'rule' => $rule->getRule(),
|
||||
]);
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
141
src/Helper/RuleBasedTagger.php
Normal file
141
src/Helper/RuleBasedTagger.php
Normal file
|
@ -0,0 +1,141 @@
|
|||
<?php
|
||||
|
||||
namespace Wallabag\CoreBundle\Helper;
|
||||
|
||||
use Doctrine\Common\Collections\ArrayCollection;
|
||||
use Psr\Log\LoggerInterface;
|
||||
use RulerZ\RulerZ;
|
||||
use Wallabag\CoreBundle\Entity\Entry;
|
||||
use Wallabag\CoreBundle\Entity\Tag;
|
||||
use Wallabag\CoreBundle\Entity\TaggingRule;
|
||||
use Wallabag\CoreBundle\Entity\User;
|
||||
use Wallabag\CoreBundle\Repository\EntryRepository;
|
||||
use Wallabag\CoreBundle\Repository\TagRepository;
|
||||
|
||||
class RuleBasedTagger
|
||||
{
|
||||
private $rulerz;
|
||||
private $tagRepository;
|
||||
private $entryRepository;
|
||||
private $logger;
|
||||
|
||||
public function __construct(RulerZ $rulerz, TagRepository $tagRepository, EntryRepository $entryRepository, LoggerInterface $logger)
|
||||
{
|
||||
$this->rulerz = $rulerz;
|
||||
$this->tagRepository = $tagRepository;
|
||||
$this->entryRepository = $entryRepository;
|
||||
$this->logger = $logger;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add tags from rules defined by the user.
|
||||
*
|
||||
* @param Entry $entry Entry to tag
|
||||
*/
|
||||
public function tag(Entry $entry)
|
||||
{
|
||||
$rules = $this->getRulesForUser($entry->getUser());
|
||||
|
||||
$clonedEntry = $this->fixEntry($entry);
|
||||
|
||||
foreach ($rules as $rule) {
|
||||
if (!$this->rulerz->satisfies($clonedEntry, $rule->getRule())) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$this->logger->info('Matching rule.', [
|
||||
'rule' => $rule->getRule(),
|
||||
'tags' => $rule->getTags(),
|
||||
]);
|
||||
|
||||
foreach ($rule->getTags() as $label) {
|
||||
$tag = $this->getTag($label);
|
||||
|
||||
$entry->addTag($tag);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply all the tagging rules defined by a user on its entries.
|
||||
*
|
||||
* @return array<Entry> A list of modified entries
|
||||
*/
|
||||
public function tagAllForUser(User $user)
|
||||
{
|
||||
$rules = $this->getRulesForUser($user);
|
||||
$entriesToUpdate = [];
|
||||
$tagsCache = [];
|
||||
|
||||
$entries = $this->entryRepository
|
||||
->getBuilderForAllByUser($user->getId())
|
||||
->getQuery()
|
||||
->getResult();
|
||||
|
||||
foreach ($entries as $entry) {
|
||||
$clonedEntry = $this->fixEntry($entry);
|
||||
|
||||
foreach ($rules as $rule) {
|
||||
if (!$this->rulerz->satisfies($clonedEntry, $rule->getRule())) {
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach ($rule->getTags() as $label) {
|
||||
// avoid new tag duplicate by manually caching them
|
||||
if (!isset($tagsCache[$label])) {
|
||||
$tagsCache[$label] = $this->getTag($label);
|
||||
}
|
||||
|
||||
$tag = $tagsCache[$label];
|
||||
|
||||
$entry->addTag($tag);
|
||||
|
||||
$entriesToUpdate[] = $entry;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $entriesToUpdate;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch a tag.
|
||||
*
|
||||
* @param string $label The tag's label
|
||||
*
|
||||
* @return Tag
|
||||
*/
|
||||
private function getTag($label)
|
||||
{
|
||||
$label = mb_convert_case($label, \MB_CASE_LOWER);
|
||||
$tag = $this->tagRepository->findOneByLabel($label);
|
||||
|
||||
if (!$tag) {
|
||||
$tag = new Tag();
|
||||
$tag->setLabel($label);
|
||||
}
|
||||
|
||||
return $tag;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the tagging rules for a given user.
|
||||
*
|
||||
* @return ArrayCollection<TaggingRule>
|
||||
*/
|
||||
private function getRulesForUser(User $user)
|
||||
{
|
||||
return $user->getConfig()->getTaggingRules();
|
||||
}
|
||||
|
||||
/**
|
||||
* Update reading time on the fly to match the proper words per minute from the user.
|
||||
*/
|
||||
private function fixEntry(Entry $entry)
|
||||
{
|
||||
$clonedEntry = clone $entry;
|
||||
$clonedEntry->setReadingTime($entry->getReadingTime() / $entry->getUser()->getConfig()->getReadingSpeed() * 200);
|
||||
|
||||
return $clonedEntry;
|
||||
}
|
||||
}
|
74
src/Helper/TagsAssigner.php
Normal file
74
src/Helper/TagsAssigner.php
Normal file
|
@ -0,0 +1,74 @@
|
|||
<?php
|
||||
|
||||
namespace Wallabag\CoreBundle\Helper;
|
||||
|
||||
use Wallabag\CoreBundle\Entity\Entry;
|
||||
use Wallabag\CoreBundle\Entity\Tag;
|
||||
use Wallabag\CoreBundle\Repository\TagRepository;
|
||||
|
||||
class TagsAssigner
|
||||
{
|
||||
/**
|
||||
* @var TagRepository
|
||||
*/
|
||||
protected $tagRepository;
|
||||
|
||||
public function __construct(TagRepository $tagRepository)
|
||||
{
|
||||
$this->tagRepository = $tagRepository;
|
||||
}
|
||||
|
||||
/**
|
||||
* Assign some tags to an entry.
|
||||
*
|
||||
* @param array|string $tags An array of tag or a string coma separated of tag
|
||||
* @param array $entitiesReady Entities from the EntityManager which are persisted but not yet flushed
|
||||
* It is mostly to fix duplicate tag on import @see http://stackoverflow.com/a/7879164/569101
|
||||
*
|
||||
* @return Tag[]
|
||||
*/
|
||||
public function assignTagsToEntry(Entry $entry, $tags, array $entitiesReady = [])
|
||||
{
|
||||
$tagsEntities = [];
|
||||
|
||||
if (!\is_array($tags)) {
|
||||
$tags = explode(',', $tags);
|
||||
}
|
||||
|
||||
// keeps only Tag entity from the "not yet flushed entities"
|
||||
$tagsNotYetFlushed = [];
|
||||
foreach ($entitiesReady as $entity) {
|
||||
if ($entity instanceof Tag) {
|
||||
$tagsNotYetFlushed[$entity->getLabel()] = $entity;
|
||||
}
|
||||
}
|
||||
|
||||
foreach ($tags as $label) {
|
||||
$label = trim(mb_convert_case($label, \MB_CASE_LOWER));
|
||||
|
||||
// avoid empty tag
|
||||
if ('' === $label) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isset($tagsNotYetFlushed[$label])) {
|
||||
$tagEntity = $tagsNotYetFlushed[$label];
|
||||
} else {
|
||||
$tagEntity = $this->tagRepository->findOneByLabel($label);
|
||||
|
||||
if (null === $tagEntity) {
|
||||
$tagEntity = new Tag();
|
||||
$tagEntity->setLabel($label);
|
||||
}
|
||||
}
|
||||
|
||||
// only add the tag on the entry if the relation doesn't exist
|
||||
if (false === $entry->getTags()->contains($tagEntity)) {
|
||||
$entry->addTag($tagEntity);
|
||||
$tagsEntities[] = $tagEntity;
|
||||
}
|
||||
}
|
||||
|
||||
return $tagsEntities;
|
||||
}
|
||||
}
|
22
src/Helper/UrlHasher.php
Normal file
22
src/Helper/UrlHasher.php
Normal file
|
@ -0,0 +1,22 @@
|
|||
<?php
|
||||
|
||||
namespace Wallabag\CoreBundle\Helper;
|
||||
|
||||
/**
|
||||
* Hash URLs for privacy and performance.
|
||||
*/
|
||||
class UrlHasher
|
||||
{
|
||||
/**
|
||||
* Hash the given url using the given algorithm.
|
||||
* Hashed url are faster to be retrieved in the database than the real url.
|
||||
*
|
||||
* @param string $algorithm
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public static function hashUrl(string $url, $algorithm = 'sha1')
|
||||
{
|
||||
return hash($algorithm, urldecode($url));
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue