1
0
Fork 0
mirror of https://github.com/wallabag/wallabag.git synced 2025-08-26 18:21:02 +00:00

Move source files directly under src/ directory

This commit is contained in:
Yassine Guedidi 2024-02-19 00:39:48 +01:00
parent 804261bc26
commit a37b385c23
190 changed files with 19 additions and 21 deletions

410
src/Helper/ContentProxy.php Normal file
View file

@ -0,0 +1,410 @@
<?php
namespace Wallabag\CoreBundle\Helper;
use Graby\Graby;
use Psr\Log\LoggerInterface;
use Symfony\Component\Mime\MimeTypes;
use Symfony\Component\Validator\Constraints\Locale as LocaleConstraint;
use Symfony\Component\Validator\Constraints\Url as UrlConstraint;
use Symfony\Component\Validator\Validator\ValidatorInterface;
use Wallabag\CoreBundle\Entity\Entry;
use Wallabag\CoreBundle\Tools\Utils;
/**
* This kind of proxy class takes care of getting the content from an url
* and updates the entry with what it found.
*/
class ContentProxy
{
protected $graby;
protected $tagger;
protected $ignoreOriginProcessor;
protected $validator;
protected $logger;
protected $mimeTypes;
protected $fetchingErrorMessage;
protected $eventDispatcher;
protected $storeArticleHeaders;
public function __construct(Graby $graby, RuleBasedTagger $tagger, RuleBasedIgnoreOriginProcessor $ignoreOriginProcessor, ValidatorInterface $validator, LoggerInterface $logger, $fetchingErrorMessage, $storeArticleHeaders = false)
{
$this->graby = $graby;
$this->tagger = $tagger;
$this->ignoreOriginProcessor = $ignoreOriginProcessor;
$this->validator = $validator;
$this->logger = $logger;
$this->mimeTypes = new MimeTypes();
$this->fetchingErrorMessage = $fetchingErrorMessage;
$this->storeArticleHeaders = $storeArticleHeaders;
}
/**
* Update entry using either fetched or provided content.
*
* @param Entry $entry Entry to update
* @param string $url Url of the content
* @param array $content Array with content provided for import with AT LEAST keys title, html, url to skip the fetchContent from the url
* @param bool $disableContentUpdate Whether to skip trying to fetch content using Graby
*/
public function updateEntry(Entry $entry, $url, array $content = [], $disableContentUpdate = false)
{
$this->graby->toggleImgNoReferrer(true);
if (!empty($content['html'])) {
$content['html'] = $this->graby->cleanupHtml($content['html'], $url);
}
if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) {
$fetchedContent = $this->graby->fetchContent($url);
$fetchedContent['title'] = $this->sanitizeContentTitle(
$fetchedContent['title'],
isset($fetchedContent['headers']['content-type']) ? $fetchedContent['headers']['content-type'] : ''
);
// when content is imported, we have information in $content
// in case fetching content goes bad, we'll keep the imported information instead of overriding them
if (empty($content) || $fetchedContent['html'] !== $this->fetchingErrorMessage) {
$content = $fetchedContent;
}
}
// be sure to keep the url in case of error
// so we'll be able to refetch it in the future
$content['url'] = !empty($content['url']) ? $content['url'] : $url;
// In one case (at least in tests), url is empty here
// so we set it using $url provided in the updateEntry call.
// Not sure what are the other possible cases where this property is empty
if (empty($entry->getUrl()) && !empty($url)) {
$entry->setUrl($url);
}
$entry->setGivenUrl($url);
$this->stockEntry($entry, $content);
}
/**
* Use a Symfony validator to ensure the language is well formatted.
*
* @param string $value Language to validate and save
*/
public function updateLanguage(Entry $entry, $value)
{
// some lang are defined as fr-FR, es-ES.
// replacing - by _ might increase language support
$value = str_replace('-', '_', $value);
$errors = $this->validator->validate(
$value,
new LocaleConstraint(['canonicalize' => true])
);
if (0 === \count($errors)) {
$entry->setLanguage($value);
return;
}
$this->logger->warning('Language validation failed. ' . (string) $errors);
}
/**
* Use a Symfony validator to ensure the preview picture is a real url.
*
* @param string $value URL to validate and save
*/
public function updatePreviewPicture(Entry $entry, $value)
{
$errors = $this->validator->validate(
$value,
new UrlConstraint()
);
if (0 === \count($errors)) {
$entry->setPreviewPicture($value);
return;
}
$this->logger->warning('PreviewPicture validation failed. ' . (string) $errors);
}
/**
* Update date.
*
* @param string $value Date to validate and save
*/
public function updatePublishedAt(Entry $entry, $value)
{
$date = $value;
// is it a timestamp?
if (false !== filter_var($date, \FILTER_VALIDATE_INT)) {
$date = '@' . $date;
}
try {
// is it already a DateTime?
// (it's inside the try/catch in case of fail to be parse time string)
if (!$date instanceof \DateTime) {
$date = new \DateTime($date);
}
$entry->setPublishedAt($date);
} catch (\Exception $e) {
$this->logger->warning('Error while defining date', ['e' => $e, 'url' => $entry->getUrl(), 'date' => $value]);
}
}
/**
* Helper to extract and save host from entry url.
*/
public function setEntryDomainName(Entry $entry)
{
$domainName = parse_url($entry->getUrl(), \PHP_URL_HOST);
if (false !== $domainName) {
$entry->setDomainName($domainName);
}
}
/**
* Helper to set a default title using:
* - url basename, if applicable
* - hostname.
*/
public function setDefaultEntryTitle(Entry $entry)
{
$url = parse_url($entry->getUrl());
$path = pathinfo($url['path'], \PATHINFO_BASENAME);
if (empty($path)) {
$path = $url['host'];
}
$entry->setTitle($path);
}
/**
* Try to sanitize the title of the fetched content from wrong character encodings and invalid UTF-8 character.
*
* @param string $title
* @param string $contentType
*
* @return string
*/
private function sanitizeContentTitle($title, $contentType)
{
if ('application/pdf' === $contentType) {
$title = $this->convertPdfEncodingToUTF8($title);
}
return $this->sanitizeUTF8Text($title);
}
/**
* If the title from the fetched content comes from a PDF, then its very possible that the character encoding is not
* UTF-8. This methods tries to identify the character encoding and translate the title to UTF-8.
*
* @return string (maybe contains invalid UTF-8 character)
*/
private function convertPdfEncodingToUTF8($title)
{
// first try UTF-8 because its easier to detect its present/absence
foreach (['UTF-8', 'UTF-16BE', 'WINDOWS-1252'] as $encoding) {
if (mb_check_encoding($title, $encoding)) {
return mb_convert_encoding($title, 'UTF-8', $encoding);
}
}
return $title;
}
/**
* Remove invalid UTF-8 characters from the given string.
*
* @param string $rawText
*
* @return string
*/
private function sanitizeUTF8Text($rawText)
{
if (mb_check_encoding($rawText, 'UTF-8')) {
return $rawText;
}
mb_substitute_character('none');
return mb_convert_encoding($rawText, 'UTF-8', 'UTF-8');
}
/**
* Stock entry with fetched or imported content.
* Will fall back to OpenGraph data if available.
*
* @param Entry $entry Entry to stock
* @param array $content Array with at least title, url & html
*/
private function stockEntry(Entry $entry, array $content)
{
$this->updateOriginUrl($entry, $content['url']);
$this->setEntryDomainName($entry);
if (!empty($content['title'])) {
$entry->setTitle($content['title']);
}
if (empty($content['html'])) {
$content['html'] = $this->fetchingErrorMessage;
$entry->setNotParsed(true);
if (!empty($content['description'])) {
$content['html'] .= '<p><i>But we found a short description: </i></p>';
$content['html'] .= $content['description'];
}
}
$entry->setContent($content['html']);
$entry->setReadingTime(Utils::getReadingTime($content['html']));
if (!empty($content['status'])) {
$entry->setHttpStatus($content['status']);
}
if (!empty($content['authors']) && \is_array($content['authors'])) {
$entry->setPublishedBy($content['authors']);
}
if (!empty($content['headers'])) {
$entry->setHeaders($content['headers']);
}
if (!empty($content['date'])) {
$this->updatePublishedAt($entry, $content['date']);
}
if (!empty($content['language'])) {
$this->updateLanguage($entry, $content['language']);
}
$previewPictureUrl = '';
if (!empty($content['image'])) {
$previewPictureUrl = $content['image'];
}
// if content is an image, define it as a preview too
if (!empty($content['headers']['content-type']) && \in_array(current($this->mimeTypes->getExtensions($content['headers']['content-type'])), ['jpeg', 'jpg', 'gif', 'png'], true)) {
$previewPictureUrl = $content['url'];
} elseif (empty($previewPictureUrl)) {
$this->logger->debug('Extracting images from content to provide a default preview picture');
$imagesUrls = DownloadImages::extractImagesUrlsFromHtml($content['html']);
$this->logger->debug(\count($imagesUrls) . ' pictures found');
if (!empty($imagesUrls)) {
$previewPictureUrl = $imagesUrls[0];
}
}
if (!empty($content['headers']['content-type'])) {
$entry->setMimetype($content['headers']['content-type']);
}
if (!empty($previewPictureUrl)) {
$this->updatePreviewPicture($entry, $previewPictureUrl);
}
try {
$this->tagger->tag($entry);
} catch (\Exception $e) {
$this->logger->error('Error while trying to automatically tag an entry.', [
'entry_url' => $content['url'],
'error_msg' => $e->getMessage(),
]);
}
}
/**
* Update the origin_url field when a redirection occurs
* This field is set if it is empty and new url does not match ignore list.
*
* @param string $url
*/
private function updateOriginUrl(Entry $entry, $url)
{
if (empty($url) || $entry->getUrl() === $url) {
return false;
}
$parsed_entry_url = parse_url($entry->getUrl());
$parsed_content_url = parse_url($url);
/**
* The following part computes the list of part changes between two
* parse_url arrays.
*
* As array_diff_assoc only computes changes to go from the left array
* to the right one, we make two different arrays to have both
* directions. We merge these two arrays and sort keys before passing
* the result to the switch.
*
* The resulting array gives us all changing parts between the two
* urls: scheme, host, path, query and/or fragment.
*/
$diff_ec = array_diff_assoc($parsed_entry_url, $parsed_content_url);
$diff_ce = array_diff_assoc($parsed_content_url, $parsed_entry_url);
$diff = array_merge($diff_ec, $diff_ce);
$diff_keys = array_keys($diff);
sort($diff_keys);
if ($this->ignoreOriginProcessor->process($entry)) {
$entry->setUrl($url);
return false;
}
/**
* This switch case lets us apply different behaviors according to
* changing parts of urls.
*
* As $diff_keys is an array, we provide arrays as cases. ['path'] means
* 'only the path is different between the two urls' whereas
* ['fragment', 'query'] means 'only fragment and query string parts are
* different between the two urls'.
*
* Note that values in $diff_keys are sorted.
*/
switch ($diff_keys) {
case ['path']:
if (($parsed_entry_url['path'] . '/' === $parsed_content_url['path']) // diff is trailing slash, we only replace the url of the entry
|| ($url === urldecode($entry->getUrl()))) { // we update entry url if new url is a decoded version of it, see EntryRepository#findByUrlAndUserId
$entry->setUrl($url);
}
break;
case ['scheme']:
$entry->setUrl($url);
break;
case ['fragment']:
// noop
break;
default:
if (empty($entry->getOriginUrl())) {
$entry->setOriginUrl($entry->getUrl());
}
$entry->setUrl($url);
break;
}
}
/**
* Validate that the given content has at least a title, an html and a url.
*
* @return bool true if valid otherwise false
*/
private function validateContent(array $content)
{
return !empty($content['title']) && !empty($content['html']) && !empty($content['url']);
}
}

View file

@ -0,0 +1,86 @@
<?php
namespace Wallabag\CoreBundle\Helper;
use Defuse\Crypto\Crypto;
use Defuse\Crypto\Exception\WrongKeyOrModifiedCiphertextException;
use Defuse\Crypto\Key;
use Psr\Log\LoggerInterface;
/**
* This is a proxy to crypt and decrypt password used by SiteCredential entity.
* BTW, It might be re-use for sth else.
*/
class CryptoProxy
{
private $logger;
private $encryptionKey;
public function __construct($encryptionKeyPath, LoggerInterface $logger)
{
$this->logger = $logger;
if (!file_exists($encryptionKeyPath)) {
$key = Key::createNewRandomKey();
file_put_contents($encryptionKeyPath, $key->saveToAsciiSafeString());
chmod($encryptionKeyPath, 0600);
}
$this->encryptionKey = file_get_contents($encryptionKeyPath);
}
/**
* Ensure the given value will be crypted.
*
* @param string $secretValue Secret value to crypt
*
* @return string
*/
public function crypt($secretValue)
{
$this->logger->debug('Crypto: crypting value: ' . $this->mask($secretValue));
return Crypto::encrypt($secretValue, $this->loadKey());
}
/**
* Ensure the given crypted value will be decrypted.
*
* @param string $cryptedValue The value to be decrypted
*
* @return string
*/
public function decrypt($cryptedValue)
{
$this->logger->debug('Crypto: decrypting value: ' . $this->mask($cryptedValue));
try {
return Crypto::decrypt($cryptedValue, $this->loadKey());
} catch (WrongKeyOrModifiedCiphertextException $e) {
throw new \RuntimeException('Decrypt fail: ' . $e->getMessage());
}
}
/**
* Load the private key.
*
* @return Key
*/
private function loadKey()
{
return Key::loadFromAsciiSafeString($this->encryptionKey);
}
/**
* Keep first and last character and put some stars in between.
*
* @param string $value Value to mask
*
* @return string
*/
private function mask($value)
{
return \strlen($value) > 0 ? $value[0] . '*****' . $value[\strlen($value) - 1] : 'Empty value';
}
}

View file

@ -0,0 +1,394 @@
<?php
namespace Wallabag\CoreBundle\Helper;
use enshrined\svgSanitize\Sanitizer;
use GuzzleHttp\Psr7\Uri;
use GuzzleHttp\Psr7\UriResolver;
use Psr\Log\LoggerInterface;
use Symfony\Component\DomCrawler\Crawler;
use Symfony\Component\Finder\Finder;
use Symfony\Component\HttpFoundation\Request;
use Symfony\Component\Mime\MimeTypes;
use Symfony\Contracts\HttpClient\HttpClientInterface;
use Symfony\Contracts\HttpClient\ResponseInterface;
class DownloadImages
{
public const REGENERATE_PICTURES_QUALITY = 80;
private $client;
private $baseFolder;
private $logger;
private $mimeTypes;
private $wallabagUrl;
public function __construct(HttpClientInterface $downloadImagesClient, $baseFolder, $wallabagUrl, LoggerInterface $logger)
{
$this->client = $downloadImagesClient;
$this->baseFolder = $baseFolder;
$this->wallabagUrl = rtrim($wallabagUrl, '/');
$this->logger = $logger;
$this->mimeTypes = new MimeTypes();
$this->setFolder();
}
public function getBaseFolder()
{
return $this->baseFolder;
}
/**
* Process the html and extract images URLs from it.
*
* @param string $html
*
* @return string[]
*/
public static function extractImagesUrlsFromHtml($html)
{
$crawler = new Crawler($html);
$imagesCrawler = $crawler->filterXpath('//img');
$imagesUrls = $imagesCrawler->extract(['src']);
$imagesSrcsetUrls = self::getSrcsetUrls($imagesCrawler);
return array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
}
/**
* Process the html and extract image from it, save them to local and return the updated html.
*
* @param int $entryId ID of the entry
* @param string $html
* @param string $url Used as a base path for relative image and folder
*
* @return string
*/
public function processHtml($entryId, $html, $url)
{
$imagesUrls = self::extractImagesUrlsFromHtml($html);
// ensure images aren't overlapping
arsort($imagesUrls);
$relativePath = $this->getRelativePath($entryId);
// download and save the image to the folder
foreach ($imagesUrls as $image) {
$newImage = $this->processSingleImage($entryId, $image, $url, $relativePath);
if (false === $newImage) {
continue;
}
$html = str_replace($image, $newImage, $html);
// if image contains "&" and we can't find it in the html it might be because it's encoded as &amp; or unicode
if (false !== stripos($image, '&') && false === stripos($html, $image)) {
$imageAmp = str_replace('&', '&amp;', $image);
$html = str_replace($imageAmp, $newImage, $html);
$imageUnicode = str_replace('&', '&#038;', $image);
$html = str_replace($imageUnicode, $newImage, $html);
}
}
return $html;
}
/**
* Process a single image:
* - retrieve it
* - re-saved it (for security reason)
* - return the new local path.
*
* @param int $entryId ID of the entry
* @param string $imagePath Path to the image to retrieve
* @param string $url Url from where the image were found
* @param string $relativePath Relative local path to saved the image
*
* @return string|false Relative url to access the image from the web
*/
public function processSingleImage($entryId, $imagePath, $url, $relativePath = null)
{
if (null === $imagePath) {
return false;
}
if (null === $relativePath) {
$relativePath = $this->getRelativePath($entryId);
}
$this->logger->debug('DownloadImages: working on image: ' . $imagePath);
$folderPath = $this->baseFolder . '/' . $relativePath;
// build image path
$absolutePath = $this->getAbsoluteLink($url, $imagePath);
if (false === $absolutePath) {
$this->logger->error('DownloadImages: Can not determine the absolute path for that image, skipping.');
return false;
}
try {
$res = $this->client->request(Request::METHOD_GET, $absolutePath);
} catch (\Exception $e) {
$this->logger->error('DownloadImages: Can not retrieve image, skipping.', ['exception' => $e]);
return false;
}
$ext = $this->getExtensionFromResponse($res, $imagePath);
if (false === $res) {
return false;
}
$hashImage = hash('crc32', $absolutePath);
$localPath = $folderPath . '/' . $hashImage . '.' . $ext;
$urlPath = $this->wallabagUrl . '/assets/images/' . $relativePath . '/' . $hashImage . '.' . $ext;
// custom case for SVG (because GD doesn't support SVG)
if ('svg' === $ext) {
try {
$sanitizer = new Sanitizer();
$sanitizer->minify(true);
$sanitizer->removeRemoteReferences(true);
$cleanSVG = $sanitizer->sanitize($res->getContent());
// add an extra validation by checking about `<svg `
if (false === $cleanSVG || !str_contains($cleanSVG, '<svg ')) {
$this->logger->error('DownloadImages: Bad SVG given', ['path' => $imagePath]);
return false;
}
file_put_contents($localPath, $cleanSVG);
return $urlPath;
} catch (\Exception $e) {
$this->logger->error('DownloadImages: Error while sanitize SVG', ['path' => $imagePath, 'message' => $e->getMessage()]);
return false;
}
}
try {
$im = imagecreatefromstring($res->getContent());
} catch (\Exception $e) {
$im = false;
}
if (false === $im) {
$this->logger->error('DownloadImages: Error while regenerating image', ['path' => $localPath]);
return false;
}
switch ($ext) {
case 'gif':
// use Imagick if available to keep GIF animation
if (class_exists(\Imagick::class)) {
try {
$imagick = new \Imagick();
$imagick->readImageBlob($res->getContent());
$imagick->setImageFormat('gif');
$imagick->writeImages($localPath, true);
} catch (\Exception $e) {
// if Imagick fail, fallback to the default solution
imagegif($im, $localPath);
}
} else {
imagegif($im, $localPath);
}
$this->logger->debug('DownloadImages: Re-creating gif');
break;
case 'jpeg':
case 'jpg':
imagejpeg($im, $localPath, self::REGENERATE_PICTURES_QUALITY);
$this->logger->debug('DownloadImages: Re-creating jpg');
break;
case 'png':
imagealphablending($im, false);
imagesavealpha($im, true);
imagepng($im, $localPath, ceil(self::REGENERATE_PICTURES_QUALITY / 100 * 9));
$this->logger->debug('DownloadImages: Re-creating png');
break;
case 'webp':
imagewebp($im, $localPath, self::REGENERATE_PICTURES_QUALITY);
$this->logger->debug('DownloadImages: Re-creating webp');
}
imagedestroy($im);
return $urlPath;
}
/**
* Remove all images for the given entry id.
*
* @param int $entryId ID of the entry
*/
public function removeImages($entryId)
{
$relativePath = $this->getRelativePath($entryId);
$folderPath = $this->baseFolder . '/' . $relativePath;
$finder = new Finder();
$finder
->files()
->ignoreDotFiles(true)
->in($folderPath);
foreach ($finder as $file) {
@unlink($file->getRealPath());
}
@rmdir($folderPath);
}
/**
* Generate the folder where we are going to save images based on the entry url.
*
* @param int $entryId ID of the entry
* @param bool $createFolder Should we create the folder for the given id?
*
* @return string
*/
public function getRelativePath($entryId, $createFolder = true)
{
$hashId = hash('crc32', $entryId);
$relativePath = $hashId[0] . '/' . $hashId[1] . '/' . $hashId;
$folderPath = $this->baseFolder . '/' . $relativePath;
if (!file_exists($folderPath) && $createFolder) {
mkdir($folderPath, 0777, true);
}
$this->logger->debug('DownloadImages: Folder used for that Entry id', ['folder' => $folderPath, 'entryId' => $entryId]);
return $relativePath;
}
/**
* Get images urls from the srcset image attribute.
*
* @return array An array of urls
*/
private static function getSrcsetUrls(Crawler $imagesCrawler)
{
$urls = [];
$iterator = $imagesCrawler->getIterator();
while ($iterator->valid()) {
$node = $iterator->current();
\assert($node instanceof \DOMElement);
$srcsetAttribute = $node->getAttribute('srcset');
if ('' !== $srcsetAttribute) {
// Couldn't start with " OR ' OR a white space
// Could be one or more white space
// Must be one or more digits followed by w OR x
$pattern = "/(?:[^\"'\s]+\s*(?:\d+[wx])+)/";
preg_match_all($pattern, $srcsetAttribute, $matches);
$srcset = \call_user_func_array('array_merge', $matches);
$srcsetUrls = array_map(function ($src) {
return trim(explode(' ', $src, 2)[0]);
}, $srcset);
$urls = array_merge($srcsetUrls, $urls);
}
$iterator->next();
}
return $urls;
}
/**
* Setup base folder where all images are going to be saved.
*/
private function setFolder()
{
// if folder doesn't exist, attempt to create one and store the folder name in property $folder
if (!file_exists($this->baseFolder)) {
mkdir($this->baseFolder, 0755, true);
}
}
/**
* Make an $url absolute based on the $base.
*
* @see Graby->makeAbsoluteStr
*
* @param string $base Base url
* @param string $url Url to make it absolute
*
* @return false|string
*/
private function getAbsoluteLink($base, $url)
{
if (preg_match('!^https?://!i', $url)) {
// already absolute
return $url;
}
$base = new Uri($base);
// in case the url has no scheme & host
if ('' === $base->getAuthority() || '' === $base->getScheme()) {
$this->logger->error('DownloadImages: Can not make an absolute link', ['base' => $base, 'url' => $url]);
return false;
}
return (string) UriResolver::resolve($base, new Uri($url));
}
/**
* Retrieve and validate the extension from the response of the url of the image.
*
* @param ResponseInterface $res Http Response
* @param string $imagePath Path from the src image from the content (used for log only)
*
* @return string|false Extension name or false if validation failed
*/
private function getExtensionFromResponse(ResponseInterface $res, $imagePath)
{
if (200 !== $res->getStatusCode()) {
return false;
}
$ext = current($this->mimeTypes->getExtensions(current($res->getHeaders()['content-type'] ?? [])));
$this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeaders()['content-type'] ?? []]);
// ok header doesn't have the extension, try a different way
if (empty($ext)) {
$types = [
'jpeg' => "\xFF\xD8\xFF",
'gif' => 'GIF',
'png' => "\x89\x50\x4e\x47\x0d\x0a",
'webp' => "\x52\x49\x46\x46",
];
$bytes = substr($res->getContent(), 0, 8);
foreach ($types as $type => $header) {
if (str_starts_with($bytes, $header)) {
$ext = $type;
break;
}
}
$this->logger->debug('DownloadImages: Checking extension (alternative)', ['ext' => $ext]);
}
if (!\in_array($ext, ['jpeg', 'jpg', 'gif', 'png', 'webp', 'svg'], true)) {
$this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: ' . $imagePath);
return false;
}
return $ext;
}
}

View file

@ -0,0 +1,24 @@
<?php
namespace Wallabag\CoreBundle\Helper;
use Doctrine\ORM\Mapping as ORM;
/**
* Trait to handle created & updated date of an Entity.
*/
trait EntityTimestampsTrait
{
/**
* @ORM\PrePersist
* @ORM\PreUpdate
*/
public function timestamps()
{
if (null === $this->createdAt) {
$this->createdAt = new \DateTime();
}
$this->updatedAt = new \DateTime();
}
}

View file

@ -0,0 +1,485 @@
<?php
namespace Wallabag\CoreBundle\Helper;
use Html2Text\Html2Text;
use JMS\Serializer\SerializationContext;
use JMS\Serializer\SerializerBuilder;
use PHPePub\Core\EPub;
use PHPePub\Core\Structure\OPF\DublinCore;
use Symfony\Component\HttpFoundation\Response;
use Symfony\Component\Security\Core\Authentication\Token\Storage\TokenStorageInterface;
use Symfony\Contracts\Translation\TranslatorInterface;
use Wallabag\CoreBundle\Entity\Entry;
use Wallabag\CoreBundle\Entity\User;
/**
* This class doesn't have unit test BUT it's fully covered by a functional test with ExportControllerTest.
*/
class EntriesExport
{
private $wallabagUrl;
private $logoPath;
private $translator;
private $tokenStorage;
private $title = '';
private $entries = [];
private $author = 'wallabag';
private $language = '';
/**
* @param TranslatorInterface $translator Translator service
* @param string $wallabagUrl Wallabag instance url
* @param string $logoPath Path to the logo FROM THE BUNDLE SCOPE
* @param TokenStorageInterface $tokenStorage Needed to retrieve the current user
*/
public function __construct(TranslatorInterface $translator, $wallabagUrl, $logoPath, TokenStorageInterface $tokenStorage)
{
$this->translator = $translator;
$this->wallabagUrl = $wallabagUrl;
$this->logoPath = $logoPath;
$this->tokenStorage = $tokenStorage;
}
/**
* Define entries.
*
* @param array|Entry $entries An array of entries or one entry
*
* @return EntriesExport
*/
public function setEntries($entries)
{
if (!\is_array($entries)) {
$this->language = $entries->getLanguage();
$entries = [$entries];
}
$this->entries = $entries;
return $this;
}
/**
* Sets the category of which we want to get articles, or just one entry.
*
* @param string $method Method to get articles
*
* @return EntriesExport
*/
public function updateTitle($method)
{
$this->title = $method . ' articles';
if ('entry' === $method) {
$this->title = $this->entries[0]->getTitle();
}
return $this;
}
/**
* Sets the author for one entry or category.
*
* The publishers are used, or the domain name if empty.
*
* @param string $method Method to get articles
*
* @return EntriesExport
*/
public function updateAuthor($method)
{
if ('entry' !== $method) {
$this->author = 'Various authors';
return $this;
}
$this->author = $this->entries[0]->getDomainName();
$publishedBy = $this->entries[0]->getPublishedBy();
if (!empty($publishedBy)) {
$this->author = implode(', ', $publishedBy);
}
return $this;
}
/**
* Sets the output format.
*
* @param string $format
*
* @return Response
*/
public function exportAs($format)
{
$functionName = 'produce' . ucfirst($format);
if (method_exists($this, $functionName)) {
return $this->$functionName();
}
throw new \InvalidArgumentException(sprintf('The format "%s" is not yet supported.', $format));
}
public function exportJsonData()
{
return $this->prepareSerializingContent('json');
}
/**
* Use PHPePub to dump a .epub file.
*
* @return Response
*/
private function produceEpub()
{
$user = $this->tokenStorage->getToken() ? $this->tokenStorage->getToken()->getUser() : null;
\assert($user instanceof User);
/*
* Start and End of the book
*/
$content_start =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
. "<html xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:epub=\"http://www.idpf.org/2007/ops\">\n"
. '<head>'
. "<meta http-equiv=\"Default-Style\" content=\"text/html; charset=utf-8\" />\n"
. "<title>wallabag articles book</title>\n"
. "</head>\n"
. "<body>\n";
$bookEnd = "</body>\n</html>\n";
$book = new EPub(EPub::BOOK_VERSION_EPUB3);
/*
* Book metadata
*/
$book->setTitle($this->title);
// EPub specification requires BCP47-compliant languages, thus we replace _ with -
$book->setLanguage(str_replace('_', '-', $this->language));
$book->setDescription('Some articles saved on my wallabag');
$book->setAuthor($this->author, $this->author);
// I hope this is a non-existent address :)
$book->setPublisher('wallabag', 'wallabag');
// Strictly not needed as the book date defaults to time().
$book->setDate(time());
$book->setSourceURL($this->wallabagUrl);
$book->addDublinCoreMetadata(DublinCore::CONTRIBUTOR, 'PHP');
$book->addDublinCoreMetadata(DublinCore::CONTRIBUTOR, 'wallabag');
$entryIds = [];
$entryCount = \count($this->entries);
$i = 0;
/*
* Adding actual entries
*/
// set tags as subjects
foreach ($this->entries as $entry) {
++$i;
/*
* Front page
* Set if there's only one entry in the given set
*/
if (1 === $entryCount && null !== $entry->getPreviewPicture()) {
$book->setCoverImage($entry->getPreviewPicture());
}
foreach ($entry->getTags() as $tag) {
$book->setSubject($tag->getLabel());
}
$filename = sha1(sprintf('%s:%s', $entry->getUrl(), $entry->getTitle()));
$publishedBy = $entry->getPublishedBy();
$authors = $this->translator->trans('export.unknown');
if (!empty($publishedBy)) {
$authors = implode(',', $publishedBy);
}
$publishedAt = $entry->getPublishedAt();
$publishedDate = $this->translator->trans('export.unknown');
if (!empty($publishedAt)) {
$publishedDate = $entry->getPublishedAt()->format('Y-m-d');
}
$readingTime = round($entry->getReadingTime() / $user->getConfig()->getReadingSpeed() * 200);
$titlepage = $content_start .
'<h1>' . $entry->getTitle() . '</h1>' .
'<dl>' .
'<dt>' . $this->translator->trans('entry.view.published_by') . '</dt><dd>' . $authors . '</dd>' .
'<dt>' . $this->translator->trans('entry.metadata.published_on') . '</dt><dd>' . $publishedDate . '</dd>' .
'<dt>' . $this->translator->trans('entry.metadata.reading_time') . '</dt><dd>' . $this->translator->trans('entry.metadata.reading_time_minutes_short', ['%readingTime%' => $readingTime]) . '</dd>' .
'<dt>' . $this->translator->trans('entry.metadata.added_on') . '</dt><dd>' . $entry->getCreatedAt()->format('Y-m-d') . '</dd>' .
'<dt>' . $this->translator->trans('entry.metadata.address') . '</dt><dd><a href="' . $entry->getUrl() . '">' . $entry->getUrl() . '</a></dd>' .
'</dl>' .
$bookEnd;
$book->addChapter("Entry {$i} of {$entryCount}", "{$filename}_cover.html", $titlepage, true, EPub::EXTERNAL_REF_ADD);
$chapter = $content_start . $entry->getContent() . $bookEnd;
$entryIds[] = $entry->getId();
$book->addChapter($entry->getTitle(), "{$filename}.html", $chapter, true, EPub::EXTERNAL_REF_ADD);
}
$book->addChapter('Notices', 'Cover2.html', $content_start . $this->getExportInformation('PHPePub') . $bookEnd);
// Could also be the ISBN number, prefered for published books, or a UUID.
$hash = sha1(sprintf('%s:%s', $this->wallabagUrl, implode(',', $entryIds)));
$book->setIdentifier(sprintf('urn:wallabag:%s', $hash), EPub::IDENTIFIER_URI);
return Response::create(
$book->getBook(),
200,
[
'Content-Description' => 'File Transfer',
'Content-type' => 'application/epub+zip',
'Content-Disposition' => 'attachment; filename="' . $this->getSanitizedFilename() . '.epub"',
'Content-Transfer-Encoding' => 'binary',
]
);
}
/**
* Use TCPDF to dump a .pdf file.
*
* @return Response
*/
private function producePdf()
{
$user = $this->tokenStorage->getToken() ? $this->tokenStorage->getToken()->getUser() : null;
\assert($user instanceof User);
$pdf = new \TCPDF(PDF_PAGE_ORIENTATION, PDF_UNIT, PDF_PAGE_FORMAT, true, 'UTF-8', false);
/*
* Book metadata
*/
$pdf->SetCreator(PDF_CREATOR);
$pdf->SetAuthor($this->author);
$pdf->SetTitle($this->title);
$pdf->SetSubject('Articles via wallabag');
$pdf->SetKeywords('wallabag');
/*
* Adding actual entries
*/
foreach ($this->entries as $entry) {
foreach ($entry->getTags() as $tag) {
$pdf->SetKeywords($tag->getLabel());
}
$publishedBy = $entry->getPublishedBy();
$authors = $this->translator->trans('export.unknown');
if (!empty($publishedBy)) {
$authors = implode(',', $publishedBy);
}
$readingTime = $entry->getReadingTime() / $user->getConfig()->getReadingSpeed() * 200;
$pdf->addPage();
$html = '<h1>' . $entry->getTitle() . '</h1>' .
'<dl>' .
'<dt>' . $this->translator->trans('entry.view.published_by') . '</dt><dd>' . $authors . '</dd>' .
'<dt>' . $this->translator->trans('entry.metadata.reading_time') . '</dt><dd>' . $this->translator->trans('entry.metadata.reading_time_minutes_short', ['%readingTime%' => $readingTime]) . '</dd>' .
'<dt>' . $this->translator->trans('entry.metadata.added_on') . '</dt><dd>' . $entry->getCreatedAt()->format('Y-m-d') . '</dd>' .
'<dt>' . $this->translator->trans('entry.metadata.address') . '</dt><dd><a href="' . $entry->getUrl() . '">' . $entry->getUrl() . '</a></dd>' .
'</dl>';
$pdf->writeHTMLCell(0, 0, '', '', $html, 0, 1, 0, true, '', true);
$pdf->AddPage();
$html = '<h1>' . $entry->getTitle() . '</h1>';
$html .= $entry->getContent();
$pdf->writeHTMLCell(0, 0, '', '', $html, 0, 1, 0, true, '', true);
}
/*
* Last page
*/
$pdf->AddPage();
$html = $this->getExportInformation('tcpdf');
$pdf->writeHTMLCell(0, 0, '', '', $html, 0, 1, 0, true, '', true);
// set image scale factor
$pdf->setImageScale(PDF_IMAGE_SCALE_RATIO);
return Response::create(
$pdf->Output('', 'S'),
200,
[
'Content-Description' => 'File Transfer',
'Content-type' => 'application/pdf',
'Content-Disposition' => 'attachment; filename="' . $this->getSanitizedFilename() . '.pdf"',
'Content-Transfer-Encoding' => 'binary',
]
);
}
/**
* Inspired from CsvFileDumper.
*
* @return Response
*/
private function produceCsv()
{
$delimiter = ';';
$enclosure = '"';
$handle = fopen('php://memory', 'b+r');
fputcsv($handle, ['Title', 'URL', 'Content', 'Tags', 'MIME Type', 'Language', 'Creation date'], $delimiter, $enclosure);
foreach ($this->entries as $entry) {
fputcsv(
$handle,
[
$entry->getTitle(),
$entry->getURL(),
// remove new line to avoid crazy results
str_replace(["\r\n", "\r", "\n"], '', $entry->getContent()),
implode(', ', $entry->getTags()->toArray()),
$entry->getMimetype(),
$entry->getLanguage(),
$entry->getCreatedAt()->format('d/m/Y h:i:s'),
],
$delimiter,
$enclosure
);
}
rewind($handle);
$output = stream_get_contents($handle);
fclose($handle);
return Response::create(
$output,
200,
[
'Content-type' => 'application/csv',
'Content-Disposition' => 'attachment; filename="' . $this->getSanitizedFilename() . '.csv"',
'Content-Transfer-Encoding' => 'UTF-8',
]
);
}
/**
* Dump a JSON file.
*
* @return Response
*/
private function produceJson()
{
return Response::create(
$this->prepareSerializingContent('json'),
200,
[
'Content-type' => 'application/json',
'Content-Disposition' => 'attachment; filename="' . $this->getSanitizedFilename() . '.json"',
'Content-Transfer-Encoding' => 'UTF-8',
]
);
}
/**
* Dump a XML file.
*
* @return Response
*/
private function produceXml()
{
return Response::create(
$this->prepareSerializingContent('xml'),
200,
[
'Content-type' => 'application/xml',
'Content-Disposition' => 'attachment; filename="' . $this->getSanitizedFilename() . '.xml"',
'Content-Transfer-Encoding' => 'UTF-8',
]
);
}
/**
* Dump a TXT file.
*
* @return Response
*/
private function produceTxt()
{
$content = '';
$bar = str_repeat('=', 100);
foreach ($this->entries as $entry) {
$content .= "\n\n" . $bar . "\n\n" . $entry->getTitle() . "\n\n" . $bar . "\n\n";
$html = new Html2Text($entry->getContent(), ['do_links' => 'none', 'width' => 100]);
$content .= $html->getText();
}
return Response::create(
$content,
200,
[
'Content-type' => 'text/plain',
'Content-Disposition' => 'attachment; filename="' . $this->getSanitizedFilename() . '.txt"',
'Content-Transfer-Encoding' => 'UTF-8',
]
);
}
/**
* Return a Serializer object for producing processes that need it (JSON & XML).
*
* @param string $format
*
* @return string
*/
private function prepareSerializingContent($format)
{
$serializer = SerializerBuilder::create()->build();
return $serializer->serialize(
$this->entries,
$format,
SerializationContext::create()->setGroups(['entries_for_user'])
);
}
/**
* Return a kind of footer / information for the epub.
*
* @param string $type Generator of the export, can be: tdpdf, PHPePub, PHPMobi
*
* @return string
*/
private function getExportInformation($type)
{
$info = $this->translator->trans('export.footer_template', [
'%method%' => $type,
]);
if ('tcpdf' === $type) {
return str_replace('%IMAGE%', '<img src="' . $this->logoPath . '" />', $info);
}
return str_replace('%IMAGE%', '', $info);
}
/**
* Return a sanitized version of the title by applying translit iconv
* and removing non alphanumeric characters, - and space.
*
* @return string Sanitized filename
*/
private function getSanitizedFilename()
{
$transliterator = \Transliterator::createFromRules(':: Any-Latin; :: Latin-ASCII; :: NFD; :: [:Nonspacing Mark:] Remove; :: NFC;', \Transliterator::FORWARD);
return preg_replace('/[^A-Za-z0-9\- \']/', '', $transliterator->transliterate($this->title));
}
}

View file

@ -0,0 +1,67 @@
<?php
namespace Wallabag\CoreBundle\Helper;
use GuzzleHttp\Cookie\FileCookieJar as BaseFileCookieJar;
use GuzzleHttp\Cookie\SetCookie;
use GuzzleHttp\Utils;
use Psr\Log\LoggerInterface;
/**
* Overidden Cookie behavior to:
* - ignore error when the cookie file is malformatted (resulting in clearing it).
*/
class FileCookieJar extends BaseFileCookieJar
{
private $logger;
/**
* @param LoggerInterface $logger Only used to log info when something goes wrong
* @param string $cookieFile File to store the cookie data
*/
public function __construct(LoggerInterface $logger, $cookieFile)
{
parent::__construct($cookieFile);
$this->logger = $logger;
}
/**
* Load cookies from a JSON formatted file.
*
* Old cookies are kept unless overwritten by newly loaded ones.
*
* @param string $filename cookie file to load
*
* @throws \RuntimeException if the file cannot be loaded
*/
public function load($filename)
{
$json = file_get_contents($filename);
if (false === $json) {
// @codeCoverageIgnoreStart
throw new \RuntimeException("Unable to load file {$filename}");
// @codeCoverageIgnoreEnd
}
try {
$data = Utils::jsonDecode($json, true);
} catch (\InvalidArgumentException $e) {
$this->logger->error('JSON inside the cookie is broken', [
'json' => $json,
'error_msg' => $e->getMessage(),
]);
// cookie file is invalid, just ignore the exception and it'll reset the whole cookie file
$data = '';
}
if (\is_array($data)) {
foreach (Utils::jsonDecode($json, true) as $cookie) {
$this->setCookie(new SetCookie($cookie));
}
} elseif (\strlen($data)) {
throw new \RuntimeException("Invalid cookie file: {$filename}");
}
}
}

View file

@ -0,0 +1,74 @@
<?php
namespace Wallabag\CoreBundle\Helper;
use GuzzleHttp\Client as GuzzleClient;
use GuzzleHttp\Cookie\CookieJar;
use GuzzleHttp\Event\SubscriberInterface;
use Http\Adapter\Guzzle5\Client as GuzzleAdapter;
use Http\Client\HttpClient;
use Http\HttplugBundle\ClientFactory\ClientFactory;
use Psr\Log\LoggerInterface;
/**
* Builds and configures the HTTP client.
*/
class HttpClientFactory implements ClientFactory
{
/** @var SubscriberInterface[] */
private $subscribers = [];
/** @var CookieJar */
private $cookieJar;
private $restrictedAccess;
private $logger;
/**
* HttpClientFactory constructor.
*
* @param string $restrictedAccess This param is a kind of boolean. Values: 0 or 1
*/
public function __construct(CookieJar $cookieJar, $restrictedAccess, LoggerInterface $logger)
{
$this->cookieJar = $cookieJar;
$this->restrictedAccess = $restrictedAccess;
$this->logger = $logger;
}
/**
* Adds a subscriber to the HTTP client.
*/
public function addSubscriber(SubscriberInterface $subscriber)
{
$this->subscribers[] = $subscriber;
}
/**
* Input an array of configuration to be able to create a HttpClient.
*
* @return HttpClient
*/
public function createClient(array $config = [])
{
$this->logger->log('debug', 'Restricted access config enabled?', ['enabled' => (int) $this->restrictedAccess]);
if (0 === (int) $this->restrictedAccess) {
return new GuzzleAdapter(new GuzzleClient($config));
}
// we clear the cookie to avoid websites who use cookies for analytics
$this->cookieJar->clear();
if (!isset($config['defaults']['cookies'])) {
// need to set the (shared) cookie jar
$config['defaults']['cookies'] = $this->cookieJar;
}
$guzzle = new GuzzleClient($config);
foreach ($this->subscribers as $subscriber) {
$guzzle->getEmitter()->attach($subscriber);
}
return new GuzzleAdapter($guzzle);
}
}

View file

@ -0,0 +1,40 @@
<?php
namespace Wallabag\CoreBundle\Helper;
use Pagerfanta\Adapter\AdapterInterface;
use Pagerfanta\Adapter\NullAdapter;
use Pagerfanta\Pagerfanta;
use Symfony\Component\Security\Core\Authentication\Token\Storage\TokenStorageInterface;
use Wallabag\CoreBundle\Entity\User;
class PreparePagerForEntries
{
private $tokenStorage;
public function __construct(TokenStorageInterface $tokenStorage)
{
$this->tokenStorage = $tokenStorage;
}
/**
* @param User $user If user isn't logged in, we can force it (like for feed)
*
* @return Pagerfanta
*/
public function prepare(AdapterInterface $adapter, ?User $user = null)
{
if (null === $user) {
$user = $this->tokenStorage->getToken() ? $this->tokenStorage->getToken()->getUser() : null;
}
if (!$user instanceof User) {
return new Pagerfanta(new NullAdapter());
}
$entries = new Pagerfanta($adapter);
$entries->setMaxPerPage($user->getConfig()->getItemsPerPage());
return $entries;
}
}

62
src/Helper/Redirect.php Normal file
View file

@ -0,0 +1,62 @@
<?php
namespace Wallabag\CoreBundle\Helper;
use GuzzleHttp\Psr7\Uri;
use Symfony\Component\Routing\Generator\UrlGeneratorInterface;
use Symfony\Component\Security\Core\Authentication\Token\Storage\TokenStorageInterface;
use Wallabag\CoreBundle\Entity\Config;
use Wallabag\CoreBundle\Entity\User;
/**
* Manage redirections to avoid redirecting to empty routes.
*/
class Redirect
{
private $router;
private $tokenStorage;
public function __construct(UrlGeneratorInterface $router, TokenStorageInterface $tokenStorage)
{
$this->router = $router;
$this->tokenStorage = $tokenStorage;
}
/**
* @param string $url URL to redirect
* @param bool $ignoreActionMarkAsRead Ignore configured action when mark as read
*
* @return string
*/
public function to($url, $ignoreActionMarkAsRead = false)
{
$user = $this->tokenStorage->getToken() ? $this->tokenStorage->getToken()->getUser() : null;
if (!$user instanceof User) {
if (null === $url) {
return $this->router->generate('homepage');
}
if (!Uri::isAbsolutePathReference(new Uri($url))) {
return $this->router->generate('homepage');
}
return $url;
}
if (!$ignoreActionMarkAsRead
&& Config::REDIRECT_TO_HOMEPAGE === $user->getConfig()->getActionMarkAsRead()) {
return $this->router->generate('homepage');
}
if (null === $url) {
return $this->router->generate('homepage');
}
if (!Uri::isAbsolutePathReference(new Uri($url))) {
return $this->router->generate('homepage');
}
return $url;
}
}

View file

@ -0,0 +1,50 @@
<?php
namespace Wallabag\CoreBundle\Helper;
use Psr\Log\LoggerInterface;
use RulerZ\RulerZ;
use Wallabag\CoreBundle\Entity\Entry;
use Wallabag\CoreBundle\Repository\IgnoreOriginInstanceRuleRepository;
class RuleBasedIgnoreOriginProcessor
{
protected $rulerz;
protected $logger;
protected $ignoreOriginInstanceRuleRepository;
public function __construct(RulerZ $rulerz, LoggerInterface $logger, IgnoreOriginInstanceRuleRepository $ignoreOriginInstanceRuleRepository)
{
$this->rulerz = $rulerz;
$this->logger = $logger;
$this->ignoreOriginInstanceRuleRepository = $ignoreOriginInstanceRuleRepository;
}
/**
* @param Entry $entry Entry to process
*
* @return bool
*/
public function process(Entry $entry)
{
$url = $entry->getUrl();
$userRules = $entry->getUser()->getConfig()->getIgnoreOriginRules()->toArray();
$rules = array_merge($this->ignoreOriginInstanceRuleRepository->findAll(), $userRules);
$parsed_url = parse_url($url);
// We add the former url as a new key _all for pattern matching
$parsed_url['_all'] = $url;
foreach ($rules as $rule) {
if ($this->rulerz->satisfies($parsed_url, $rule->getRule())) {
$this->logger->info('Origin url matching ignore rule.', [
'rule' => $rule->getRule(),
]);
return true;
}
}
return false;
}
}

View file

@ -0,0 +1,141 @@
<?php
namespace Wallabag\CoreBundle\Helper;
use Doctrine\Common\Collections\ArrayCollection;
use Psr\Log\LoggerInterface;
use RulerZ\RulerZ;
use Wallabag\CoreBundle\Entity\Entry;
use Wallabag\CoreBundle\Entity\Tag;
use Wallabag\CoreBundle\Entity\TaggingRule;
use Wallabag\CoreBundle\Entity\User;
use Wallabag\CoreBundle\Repository\EntryRepository;
use Wallabag\CoreBundle\Repository\TagRepository;
class RuleBasedTagger
{
private $rulerz;
private $tagRepository;
private $entryRepository;
private $logger;
public function __construct(RulerZ $rulerz, TagRepository $tagRepository, EntryRepository $entryRepository, LoggerInterface $logger)
{
$this->rulerz = $rulerz;
$this->tagRepository = $tagRepository;
$this->entryRepository = $entryRepository;
$this->logger = $logger;
}
/**
* Add tags from rules defined by the user.
*
* @param Entry $entry Entry to tag
*/
public function tag(Entry $entry)
{
$rules = $this->getRulesForUser($entry->getUser());
$clonedEntry = $this->fixEntry($entry);
foreach ($rules as $rule) {
if (!$this->rulerz->satisfies($clonedEntry, $rule->getRule())) {
continue;
}
$this->logger->info('Matching rule.', [
'rule' => $rule->getRule(),
'tags' => $rule->getTags(),
]);
foreach ($rule->getTags() as $label) {
$tag = $this->getTag($label);
$entry->addTag($tag);
}
}
}
/**
* Apply all the tagging rules defined by a user on its entries.
*
* @return array<Entry> A list of modified entries
*/
public function tagAllForUser(User $user)
{
$rules = $this->getRulesForUser($user);
$entriesToUpdate = [];
$tagsCache = [];
$entries = $this->entryRepository
->getBuilderForAllByUser($user->getId())
->getQuery()
->getResult();
foreach ($entries as $entry) {
$clonedEntry = $this->fixEntry($entry);
foreach ($rules as $rule) {
if (!$this->rulerz->satisfies($clonedEntry, $rule->getRule())) {
continue;
}
foreach ($rule->getTags() as $label) {
// avoid new tag duplicate by manually caching them
if (!isset($tagsCache[$label])) {
$tagsCache[$label] = $this->getTag($label);
}
$tag = $tagsCache[$label];
$entry->addTag($tag);
$entriesToUpdate[] = $entry;
}
}
}
return $entriesToUpdate;
}
/**
* Fetch a tag.
*
* @param string $label The tag's label
*
* @return Tag
*/
private function getTag($label)
{
$label = mb_convert_case($label, \MB_CASE_LOWER);
$tag = $this->tagRepository->findOneByLabel($label);
if (!$tag) {
$tag = new Tag();
$tag->setLabel($label);
}
return $tag;
}
/**
* Retrieves the tagging rules for a given user.
*
* @return ArrayCollection<TaggingRule>
*/
private function getRulesForUser(User $user)
{
return $user->getConfig()->getTaggingRules();
}
/**
* Update reading time on the fly to match the proper words per minute from the user.
*/
private function fixEntry(Entry $entry)
{
$clonedEntry = clone $entry;
$clonedEntry->setReadingTime($entry->getReadingTime() / $entry->getUser()->getConfig()->getReadingSpeed() * 200);
return $clonedEntry;
}
}

View file

@ -0,0 +1,74 @@
<?php
namespace Wallabag\CoreBundle\Helper;
use Wallabag\CoreBundle\Entity\Entry;
use Wallabag\CoreBundle\Entity\Tag;
use Wallabag\CoreBundle\Repository\TagRepository;
class TagsAssigner
{
/**
* @var TagRepository
*/
protected $tagRepository;
public function __construct(TagRepository $tagRepository)
{
$this->tagRepository = $tagRepository;
}
/**
* Assign some tags to an entry.
*
* @param array|string $tags An array of tag or a string coma separated of tag
* @param array $entitiesReady Entities from the EntityManager which are persisted but not yet flushed
* It is mostly to fix duplicate tag on import @see http://stackoverflow.com/a/7879164/569101
*
* @return Tag[]
*/
public function assignTagsToEntry(Entry $entry, $tags, array $entitiesReady = [])
{
$tagsEntities = [];
if (!\is_array($tags)) {
$tags = explode(',', $tags);
}
// keeps only Tag entity from the "not yet flushed entities"
$tagsNotYetFlushed = [];
foreach ($entitiesReady as $entity) {
if ($entity instanceof Tag) {
$tagsNotYetFlushed[$entity->getLabel()] = $entity;
}
}
foreach ($tags as $label) {
$label = trim(mb_convert_case($label, \MB_CASE_LOWER));
// avoid empty tag
if ('' === $label) {
continue;
}
if (isset($tagsNotYetFlushed[$label])) {
$tagEntity = $tagsNotYetFlushed[$label];
} else {
$tagEntity = $this->tagRepository->findOneByLabel($label);
if (null === $tagEntity) {
$tagEntity = new Tag();
$tagEntity->setLabel($label);
}
}
// only add the tag on the entry if the relation doesn't exist
if (false === $entry->getTags()->contains($tagEntity)) {
$entry->addTag($tagEntity);
$tagsEntities[] = $tagEntity;
}
}
return $tagsEntities;
}
}

22
src/Helper/UrlHasher.php Normal file
View file

@ -0,0 +1,22 @@
<?php
namespace Wallabag\CoreBundle\Helper;
/**
* Hash URLs for privacy and performance.
*/
class UrlHasher
{
/**
* Hash the given url using the given algorithm.
* Hashed url are faster to be retrieved in the database than the real url.
*
* @param string $algorithm
*
* @return string
*/
public static function hashUrl(string $url, $algorithm = 'sha1')
{
return hash($algorithm, urldecode($url));
}
}