1
0
Fork 0
mirror of https://github.com/wallabag/wallabag.git synced 2025-08-26 18:21:02 +00:00

Move source files directly under src/ directory

This commit is contained in:
Yassine Guedidi 2024-02-19 00:39:48 +01:00
parent 804261bc26
commit a37b385c23
190 changed files with 19 additions and 21 deletions

View file

@ -0,0 +1,223 @@
<?php
namespace Wallabag\CoreBundle\Import;
use Doctrine\ORM\EntityManagerInterface;
use OldSound\RabbitMqBundle\RabbitMq\ProducerInterface;
use Psr\Log\LoggerInterface;
use Symfony\Component\EventDispatcher\EventDispatcherInterface;
use Wallabag\CoreBundle\Entity\Entry;
use Wallabag\CoreBundle\Entity\Tag;
use Wallabag\CoreBundle\Entity\User;
use Wallabag\CoreBundle\Event\EntrySavedEvent;
use Wallabag\CoreBundle\Helper\ContentProxy;
use Wallabag\CoreBundle\Helper\TagsAssigner;
abstract class AbstractImport implements ImportInterface
{
protected $em;
protected $logger;
protected $contentProxy;
protected $tagsAssigner;
protected $eventDispatcher;
protected $producer;
protected $user;
protected $markAsRead;
protected $disableContentUpdate = false;
protected $skippedEntries = 0;
protected $importedEntries = 0;
protected $queuedEntries = 0;
public function __construct(EntityManagerInterface $em, ContentProxy $contentProxy, TagsAssigner $tagsAssigner, EventDispatcherInterface $eventDispatcher, LoggerInterface $logger)
{
$this->em = $em;
$this->logger = $logger;
$this->contentProxy = $contentProxy;
$this->tagsAssigner = $tagsAssigner;
$this->eventDispatcher = $eventDispatcher;
}
public function setLogger(LoggerInterface $logger)
{
$this->logger = $logger;
}
/**
* Set RabbitMQ/Redis Producer to send each entry to a queue.
* This method should be called when user has enabled RabbitMQ.
*/
public function setProducer(ProducerInterface $producer)
{
$this->producer = $producer;
}
/**
* Set current user.
* Could the current *connected* user or one retrieve by the consumer.
*/
public function setUser(User $user)
{
$this->user = $user;
}
/**
* Set whether articles must be all marked as read.
*
* @param bool $markAsRead
*/
public function setMarkAsRead($markAsRead)
{
$this->markAsRead = $markAsRead;
return $this;
}
/**
* Get whether articles must be all marked as read.
*/
public function getMarkAsRead()
{
return $this->markAsRead;
}
/**
* Set whether articles should be fetched for updated content.
*
* @param bool $disableContentUpdate
*/
public function setDisableContentUpdate($disableContentUpdate)
{
$this->disableContentUpdate = $disableContentUpdate;
return $this;
}
public function getSummary()
{
return [
'skipped' => $this->skippedEntries,
'imported' => $this->importedEntries,
'queued' => $this->queuedEntries,
];
}
/**
* Parse one entry.
*
* @return Entry|null
*/
abstract public function parseEntry(array $importedEntry);
/**
* Validate that an entry is valid (like has some required keys, etc.).
*
* @return bool
*/
abstract public function validateEntry(array $importedEntry);
/**
* Fetch content from the ContentProxy (using graby).
* If it fails return the given entry to be saved in all case (to avoid user to loose the content).
*
* @param Entry $entry Entry to update
* @param string $url Url to grab content for
* @param array $content An array with AT LEAST keys title, html, url, language & content_type to skip the fetchContent from the url
*/
protected function fetchContent(Entry $entry, $url, array $content = [])
{
try {
$this->contentProxy->updateEntry($entry, $url, $content, $this->disableContentUpdate);
} catch (\Exception $e) {
$this->logger->error('Error trying to import an entry.', [
'entry_url' => $url,
'error_msg' => $e->getMessage(),
]);
}
}
/**
* Parse and insert all given entries.
*/
protected function parseEntries(array $entries)
{
$i = 1;
$entryToBeFlushed = [];
foreach ($entries as $importedEntry) {
if ($this->markAsRead) {
$importedEntry = $this->setEntryAsRead($importedEntry);
}
if (false === $this->validateEntry($importedEntry)) {
continue;
}
$entry = $this->parseEntry($importedEntry);
if (null === $entry) {
continue;
}
// store each entry to be flushed so we can trigger the entry.saved event for each of them
// entry.saved needs the entry to be persisted in db because it needs it id to generate
// images (at least)
$entryToBeFlushed[] = $entry;
// flush every 20 entries
if (0 === ($i % 20)) {
$this->em->flush();
foreach ($entryToBeFlushed as $entry) {
$this->eventDispatcher->dispatch(new EntrySavedEvent($entry), EntrySavedEvent::NAME);
}
$entryToBeFlushed = [];
// clear only affected entities
$this->em->clear(Entry::class);
$this->em->clear(Tag::class);
}
++$i;
}
$this->em->flush();
if (!empty($entryToBeFlushed)) {
foreach ($entryToBeFlushed as $entry) {
$this->eventDispatcher->dispatch(new EntrySavedEvent($entry), EntrySavedEvent::NAME);
}
}
}
/**
* Parse entries and send them to the queue.
* It should just be a simple loop on all item, no call to the database should be done
* to speedup queuing.
*
* Faster parse entries for Producer.
* We don't care to make check at this time. They'll be done by the consumer.
*/
protected function parseEntriesForProducer(array $entries)
{
foreach ($entries as $importedEntry) {
// set userId for the producer (it won't know which user is connected)
$importedEntry['userId'] = $this->user->getId();
if ($this->markAsRead) {
$importedEntry = $this->setEntryAsRead($importedEntry);
}
++$this->queuedEntries;
$this->producer->publish(json_encode($importedEntry));
}
}
/**
* Set current imported entry to archived / read.
* Implementation is different across all imports.
*
* @return array
*/
abstract protected function setEntryAsRead(array $importedEntry);
}

View file

@ -0,0 +1,214 @@
<?php
namespace Wallabag\CoreBundle\Import;
use Wallabag\CoreBundle\Entity\Entry;
use Wallabag\CoreBundle\Event\EntrySavedEvent;
abstract class BrowserImport extends AbstractImport
{
protected $filepath;
abstract public function getName();
abstract public function getUrl();
abstract public function getDescription();
public function import()
{
if (!$this->user) {
$this->logger->error('Wallabag Browser Import: user is not defined');
return false;
}
if (!file_exists($this->filepath) || !is_readable($this->filepath)) {
$this->logger->error('Wallabag Browser Import: unable to read file', ['filepath' => $this->filepath]);
return false;
}
$data = json_decode(file_get_contents($this->filepath), true);
if (empty($data)) {
$this->logger->error('Wallabag Browser: no entries in imported file');
return false;
}
if ($this->producer) {
$this->parseEntriesForProducer($data);
return true;
}
$this->parseEntries($data);
return true;
}
/**
* Set file path to the json file.
*
* @param string $filepath
*/
public function setFilepath($filepath)
{
$this->filepath = $filepath;
return $this;
}
public function parseEntry(array $importedEntry)
{
if ((!\array_key_exists('guid', $importedEntry) || (!\array_key_exists('id', $importedEntry))) && \is_array(reset($importedEntry))) {
if ($this->producer) {
$this->parseEntriesForProducer($importedEntry);
return null;
}
$this->parseEntries($importedEntry);
return null;
}
if (\array_key_exists('children', $importedEntry)) {
if ($this->producer) {
$this->parseEntriesForProducer($importedEntry['children']);
return null;
}
$this->parseEntries($importedEntry['children']);
return null;
}
if (!\array_key_exists('uri', $importedEntry) && !\array_key_exists('url', $importedEntry)) {
return null;
}
$url = \array_key_exists('uri', $importedEntry) ? $importedEntry['uri'] : $importedEntry['url'];
$existingEntry = $this->em
->getRepository(Entry::class)
->findByUrlAndUserId($url, $this->user->getId());
if (false !== $existingEntry) {
++$this->skippedEntries;
return null;
}
$data = $this->prepareEntry($importedEntry);
$entry = new Entry($this->user);
$entry->setUrl($data['url']);
$entry->setTitle($data['title']);
// update entry with content (in case fetching failed, the given entry will be return)
$this->fetchContent($entry, $data['url'], $data);
if (\array_key_exists('tags', $data)) {
$this->tagsAssigner->assignTagsToEntry(
$entry,
$data['tags']
);
}
$entry->updateArchived($data['is_archived']);
if (!empty($data['created_at'])) {
$dt = new \DateTime();
$entry->setCreatedAt($dt->setTimestamp($data['created_at']));
}
$this->em->persist($entry);
++$this->importedEntries;
return $entry;
}
/**
* Parse and insert all given entries.
*/
protected function parseEntries(array $entries)
{
$i = 1;
$entryToBeFlushed = [];
foreach ($entries as $importedEntry) {
if ((array) $importedEntry !== $importedEntry) {
continue;
}
$entry = $this->parseEntry($importedEntry);
if (null === $entry) {
continue;
}
// @see AbstractImport
$entryToBeFlushed[] = $entry;
// flush every 20 entries
if (0 === ($i % 20)) {
$this->em->flush();
foreach ($entryToBeFlushed as $entry) {
$this->eventDispatcher->dispatch(new EntrySavedEvent($entry), EntrySavedEvent::NAME);
}
$entryToBeFlushed = [];
}
++$i;
}
$this->em->flush();
if (!empty($entryToBeFlushed)) {
foreach ($entryToBeFlushed as $entry) {
$this->eventDispatcher->dispatch(new EntrySavedEvent($entry), EntrySavedEvent::NAME);
}
}
}
/**
* Parse entries and send them to the queue.
* It should just be a simple loop on all item, no call to the database should be done
* to speedup queuing.
*
* Faster parse entries for Producer.
* We don't care to make check at this time. They'll be done by the consumer.
*/
protected function parseEntriesForProducer(array $entries)
{
foreach ($entries as $importedEntry) {
if ((array) $importedEntry !== $importedEntry) {
continue;
}
// set userId for the producer (it won't know which user is connected)
$importedEntry['userId'] = $this->user->getId();
if ($this->markAsRead) {
$importedEntry = $this->setEntryAsRead($importedEntry);
}
++$this->queuedEntries;
$this->producer->publish(json_encode($importedEntry));
}
}
protected function setEntryAsRead(array $importedEntry)
{
$importedEntry['is_archived'] = 1;
return $importedEntry;
}
abstract protected function prepareEntry(array $entry = []);
}

View file

@ -0,0 +1,51 @@
<?php
namespace Wallabag\CoreBundle\Import;
class ChromeImport extends BrowserImport
{
protected $filepath;
public function getName()
{
return 'Chrome';
}
public function getUrl()
{
return 'import_chrome';
}
public function getDescription()
{
return 'import.chrome.description';
}
public function validateEntry(array $importedEntry)
{
if (empty($importedEntry['url'])) {
return false;
}
return true;
}
protected function prepareEntry(array $entry = [])
{
$data = [
'title' => $entry['name'],
'html' => false,
'url' => $entry['url'],
'is_archived' => (int) $this->markAsRead,
'is_starred' => false,
'tags' => '',
'created_at' => substr($entry['date_added'], 0, 10),
];
if (\array_key_exists('tags', $entry) && '' !== $entry['tags']) {
$data['tags'] = $entry['tags'];
}
return $data;
}
}

View file

@ -0,0 +1,130 @@
<?php
namespace Wallabag\CoreBundle\Import;
use Wallabag\CoreBundle\Entity\Entry;
class DeliciousImport extends AbstractImport
{
private $filepath;
public function getName()
{
return 'Delicious';
}
public function getUrl()
{
return 'import_delicious';
}
public function getDescription()
{
return 'import.delicious.description';
}
/**
* Set file path to the json file.
*
* @param string $filepath
*/
public function setFilepath($filepath)
{
$this->filepath = $filepath;
return $this;
}
public function import()
{
if (!$this->user) {
$this->logger->error('DeliciousImport: user is not defined');
return false;
}
if (!file_exists($this->filepath) || !is_readable($this->filepath)) {
$this->logger->error('DeliciousImport: unable to read file', ['filepath' => $this->filepath]);
return false;
}
$data = json_decode(file_get_contents($this->filepath), true);
if (empty($data)) {
$this->logger->error('DeliciousImport: no entries in imported file');
return false;
}
if ($this->producer) {
$this->parseEntriesForProducer($data);
return true;
}
$this->parseEntries($data);
return true;
}
public function validateEntry(array $importedEntry)
{
if (empty($importedEntry['url'])) {
return false;
}
return true;
}
public function parseEntry(array $importedEntry)
{
$existingEntry = $this->em
->getRepository(Entry::class)
->findByUrlAndUserId($importedEntry['url'], $this->user->getId());
if (false !== $existingEntry) {
++$this->skippedEntries;
return null;
}
$data = [
'title' => $importedEntry['title'],
'url' => $importedEntry['url'],
'is_archived' => $this->markAsRead,
'is_starred' => false,
'created_at' => $importedEntry['created'],
'tags' => $importedEntry['tags'],
];
$entry = new Entry($this->user);
$entry->setUrl($data['url']);
$entry->setTitle($data['title']);
// update entry with content (in case fetching failed, the given entry will be return)
$this->fetchContent($entry, $data['url'], $data);
if (!empty($data['tags'])) {
$this->tagsAssigner->assignTagsToEntry(
$entry,
$data['tags'],
$this->em->getUnitOfWork()->getScheduledEntityInsertions()
);
}
$entry->updateArchived($data['is_archived']);
$entry->setStarred($data['is_starred']);
$entry->setCreatedAt(\DateTime::createFromFormat('U', $data['created_at']));
$this->em->persist($entry);
++$this->importedEntries;
return $entry;
}
protected function setEntryAsRead(array $importedEntry)
{
return $importedEntry;
}
}

View file

@ -0,0 +1,39 @@
<?php
namespace Wallabag\CoreBundle\Import;
class ElcuratorImport extends WallabagImport
{
public function getName()
{
return 'elcurator';
}
public function getUrl()
{
return 'import_elcurator';
}
public function getDescription()
{
return 'import.elcurator.description';
}
protected function prepareEntry($entry = [])
{
return [
'url' => $entry['url'],
'title' => $entry['title'],
'created_at' => $entry['created_at'],
'is_archived' => 0,
'is_starred' => $entry['is_saved'],
] + $entry;
}
protected function setEntryAsRead(array $importedEntry)
{
$importedEntry['is_archived'] = 1;
return $importedEntry;
}
}

View file

@ -0,0 +1,51 @@
<?php
namespace Wallabag\CoreBundle\Import;
class FirefoxImport extends BrowserImport
{
protected $filepath;
public function getName()
{
return 'Firefox';
}
public function getUrl()
{
return 'import_firefox';
}
public function getDescription()
{
return 'import.firefox.description';
}
public function validateEntry(array $importedEntry)
{
if (empty($importedEntry['uri'])) {
return false;
}
return true;
}
protected function prepareEntry(array $entry = [])
{
$data = [
'title' => $entry['title'],
'html' => false,
'url' => $entry['uri'],
'is_archived' => (int) $this->markAsRead,
'is_starred' => false,
'tags' => '',
'created_at' => substr($entry['dateAdded'], 0, 10),
];
if (\array_key_exists('tags', $entry) && '' !== $entry['tags']) {
$data['tags'] = $entry['tags'];
}
return $data;
}
}

192
src/Import/HtmlImport.php Normal file
View file

@ -0,0 +1,192 @@
<?php
namespace Wallabag\CoreBundle\Import;
use Wallabag\CoreBundle\Entity\Entry;
use Wallabag\CoreBundle\Event\EntrySavedEvent;
abstract class HtmlImport extends AbstractImport
{
protected $filepath;
abstract public function getName();
abstract public function getUrl();
abstract public function getDescription();
public function import()
{
if (!$this->user) {
$this->logger->error('Wallabag HTML Import: user is not defined');
return false;
}
if (!file_exists($this->filepath) || !is_readable($this->filepath)) {
$this->logger->error('Wallabag HTML Import: unable to read file', ['filepath' => $this->filepath]);
return false;
}
$html = new \DOMDocument();
libxml_use_internal_errors(true);
$html->loadHTMLFile($this->filepath);
$hrefs = $html->getElementsByTagName('a');
libxml_use_internal_errors(false);
if (0 === $hrefs->length) {
$this->logger->error('Wallabag HTML: no entries in imported file');
return false;
}
$entries = [];
foreach ($hrefs as $href) {
$entry = [];
$entry['url'] = $href->getAttribute('href');
$entry['tags'] = $href->getAttribute('tags');
$entry['created_at'] = $href->getAttribute('add_date');
$entries[] = $entry;
}
if ($this->producer) {
$this->parseEntriesForProducer($entries);
return true;
}
$this->parseEntries($entries);
return true;
}
/**
* Set file path to the html file.
*
* @param string $filepath
*/
public function setFilepath($filepath)
{
$this->filepath = $filepath;
return $this;
}
public function parseEntry(array $importedEntry)
{
$url = $importedEntry['url'];
$existingEntry = $this->em
->getRepository(Entry::class)
->findByUrlAndUserId($url, $this->user->getId());
if (false !== $existingEntry) {
++$this->skippedEntries;
return null;
}
$data = $this->prepareEntry($importedEntry);
$entry = new Entry($this->user);
$entry->setUrl($data['url']);
$entry->updateArchived($data['is_archived']);
$createdAt = new \DateTime();
$createdAt->setTimestamp($data['created_at']);
$entry->setCreatedAt($createdAt);
// update entry with content (in case fetching failed, the given entry will be return)
$this->fetchContent($entry, $data['url'], $data);
if (\array_key_exists('tags', $data)) {
$this->tagsAssigner->assignTagsToEntry(
$entry,
$data['tags']
);
}
$this->em->persist($entry);
++$this->importedEntries;
return $entry;
}
/**
* Parse and insert all given entries.
*/
protected function parseEntries(array $entries)
{
$i = 1;
$entryToBeFlushed = [];
foreach ($entries as $importedEntry) {
$entry = $this->parseEntry($importedEntry);
if (null === $entry) {
continue;
}
// @see AbstractImport
$entryToBeFlushed[] = $entry;
// flush every 20 entries
if (0 === ($i % 20)) {
$this->em->flush();
foreach ($entryToBeFlushed as $entry) {
$this->eventDispatcher->dispatch(new EntrySavedEvent($entry), EntrySavedEvent::NAME);
}
$entryToBeFlushed = [];
}
++$i;
}
$this->em->flush();
if (!empty($entryToBeFlushed)) {
foreach ($entryToBeFlushed as $entry) {
$this->eventDispatcher->dispatch(new EntrySavedEvent($entry), EntrySavedEvent::NAME);
}
}
}
/**
* Parse entries and send them to the queue.
* It should just be a simple loop on all item, no call to the database should be done
* to speedup queuing.
*
* Faster parse entries for Producer.
* We don't care to make check at this time. They'll be done by the consumer.
*/
protected function parseEntriesForProducer(array $entries)
{
foreach ($entries as $importedEntry) {
if ((array) $importedEntry !== $importedEntry) {
continue;
}
// set userId for the producer (it won't know which user is connected)
$importedEntry['userId'] = $this->user->getId();
if ($this->markAsRead) {
$importedEntry = $this->setEntryAsRead($importedEntry);
}
++$this->queuedEntries;
$this->producer->publish(json_encode($importedEntry));
}
}
protected function setEntryAsRead(array $importedEntry)
{
$importedEntry['is_archived'] = 1;
return $importedEntry;
}
abstract protected function prepareEntry(array $entry = []);
}

View file

@ -0,0 +1,33 @@
<?php
namespace Wallabag\CoreBundle\Import;
class ImportChain
{
private $imports;
public function __construct()
{
$this->imports = [];
}
/**
* Add an import to the chain.
*
* @param string $alias
*/
public function addImport(ImportInterface $import, $alias)
{
$this->imports[$alias] = $import;
}
/**
* Get all imports.
*
* @return array<ImportInterface>
*/
public function getAll()
{
return $this->imports;
}
}

View file

@ -0,0 +1,33 @@
<?php
namespace Wallabag\CoreBundle\Import;
use Symfony\Component\DependencyInjection\Compiler\CompilerPassInterface;
use Symfony\Component\DependencyInjection\ContainerBuilder;
use Symfony\Component\DependencyInjection\Reference;
class ImportCompilerPass implements CompilerPassInterface
{
public function process(ContainerBuilder $container)
{
if (!$container->hasDefinition(ImportChain::class)) {
return;
}
$definition = $container->getDefinition(
ImportChain::class
);
$taggedServices = $container->findTaggedServiceIds(
'wallabag_core.import'
);
foreach ($taggedServices as $id => $tagAttributes) {
foreach ($tagAttributes as $attributes) {
$definition->addMethodCall(
'addImport',
[new Reference($id), $attributes['alias']]
);
}
}
}
}

View file

@ -0,0 +1,45 @@
<?php
namespace Wallabag\CoreBundle\Import;
use Psr\Log\LoggerAwareInterface;
interface ImportInterface extends LoggerAwareInterface
{
/**
* Name of the import.
*
* @return string
*/
public function getName();
/**
* Url to start the import.
*
* @return string
*/
public function getUrl();
/**
* Description of the import.
*
* @return string
*/
public function getDescription();
/**
* Import content using the user token.
*
* @return bool
*/
public function import();
/**
* Return an array with summary info about the import, with keys:
* - skipped
* - imported.
*
* @return array
*/
public function getSummary();
}

View file

@ -0,0 +1,150 @@
<?php
namespace Wallabag\CoreBundle\Import;
use Wallabag\CoreBundle\Entity\Entry;
class InstapaperImport extends AbstractImport
{
private $filepath;
public function getName()
{
return 'Instapaper';
}
public function getUrl()
{
return 'import_instapaper';
}
public function getDescription()
{
return 'import.instapaper.description';
}
/**
* Set file path to the json file.
*
* @param string $filepath
*/
public function setFilepath($filepath)
{
$this->filepath = $filepath;
return $this;
}
public function import()
{
if (!$this->user) {
$this->logger->error('InstapaperImport: user is not defined');
return false;
}
if (!file_exists($this->filepath) || !is_readable($this->filepath)) {
$this->logger->error('InstapaperImport: unable to read file', ['filepath' => $this->filepath]);
return false;
}
$entries = [];
$handle = fopen($this->filepath, 'r');
while (false !== ($data = fgetcsv($handle, 10240))) {
if ('URL' === $data[0]) {
continue;
}
// last element in the csv is the folder where the content belong
// BUT it can also be the status (since status = folder in Instapaper)
// and we don't want archive, unread & starred to become a tag
$tags = null;
if (false === \in_array($data[3], ['Archive', 'Unread', 'Starred'], true)) {
$tags = [$data[3]];
}
$entries[] = [
'url' => $data[0],
'title' => $data[1],
'is_archived' => 'Archive' === $data[3] || 'Starred' === $data[3],
'is_starred' => 'Starred' === $data[3],
'html' => false,
'tags' => $tags,
];
}
fclose($handle);
if (empty($entries)) {
$this->logger->error('InstapaperImport: no entries in imported file');
return false;
}
// most recent articles are first, which means we should create them at the end so they will show up first
// as Instapaper doesn't export the creation date of the article
$entries = array_reverse($entries);
if ($this->producer) {
$this->parseEntriesForProducer($entries);
return true;
}
$this->parseEntries($entries);
return true;
}
public function validateEntry(array $importedEntry)
{
if (empty($importedEntry['url'])) {
return false;
}
return true;
}
public function parseEntry(array $importedEntry)
{
$existingEntry = $this->em
->getRepository(Entry::class)
->findByUrlAndUserId($importedEntry['url'], $this->user->getId());
if (false !== $existingEntry) {
++$this->skippedEntries;
return null;
}
$entry = new Entry($this->user);
$entry->setUrl($importedEntry['url']);
$entry->setTitle($importedEntry['title']);
// update entry with content (in case fetching failed, the given entry will be return)
$this->fetchContent($entry, $importedEntry['url'], $importedEntry);
if (!empty($importedEntry['tags'])) {
$this->tagsAssigner->assignTagsToEntry(
$entry,
$importedEntry['tags'],
$this->em->getUnitOfWork()->getScheduledEntityInsertions()
);
}
$entry->updateArchived($importedEntry['is_archived']);
$entry->setStarred($importedEntry['is_starred']);
$this->em->persist($entry);
++$this->importedEntries;
return $entry;
}
protected function setEntryAsRead(array $importedEntry)
{
$importedEntry['is_archived'] = 1;
return $importedEntry;
}
}

View file

@ -0,0 +1,132 @@
<?php
namespace Wallabag\CoreBundle\Import;
use Wallabag\CoreBundle\Entity\Entry;
class PinboardImport extends AbstractImport
{
private $filepath;
public function getName()
{
return 'Pinboard';
}
public function getUrl()
{
return 'import_pinboard';
}
public function getDescription()
{
return 'import.pinboard.description';
}
/**
* Set file path to the json file.
*
* @param string $filepath
*/
public function setFilepath($filepath)
{
$this->filepath = $filepath;
return $this;
}
public function import()
{
if (!$this->user) {
$this->logger->error('PinboardImport: user is not defined');
return false;
}
if (!file_exists($this->filepath) || !is_readable($this->filepath)) {
$this->logger->error('PinboardImport: unable to read file', ['filepath' => $this->filepath]);
return false;
}
$data = json_decode(file_get_contents($this->filepath), true);
if (empty($data)) {
$this->logger->error('PinboardImport: no entries in imported file');
return false;
}
if ($this->producer) {
$this->parseEntriesForProducer($data);
return true;
}
$this->parseEntries($data);
return true;
}
public function validateEntry(array $importedEntry)
{
if (empty($importedEntry['href'])) {
return false;
}
return true;
}
public function parseEntry(array $importedEntry)
{
$existingEntry = $this->em
->getRepository(Entry::class)
->findByUrlAndUserId($importedEntry['href'], $this->user->getId());
if (false !== $existingEntry) {
++$this->skippedEntries;
return null;
}
$data = [
'title' => $importedEntry['description'],
'url' => $importedEntry['href'],
'is_archived' => ('no' === $importedEntry['toread']) || $this->markAsRead,
'is_starred' => false,
'created_at' => $importedEntry['time'],
'tags' => explode(' ', $importedEntry['tags']),
];
$entry = new Entry($this->user);
$entry->setUrl($data['url']);
$entry->setTitle($data['title']);
// update entry with content (in case fetching failed, the given entry will be return)
$this->fetchContent($entry, $data['url'], $data);
if (!empty($data['tags'])) {
$this->tagsAssigner->assignTagsToEntry(
$entry,
$data['tags'],
$this->em->getUnitOfWork()->getScheduledEntityInsertions()
);
}
$entry->updateArchived($data['is_archived']);
$entry->setStarred($data['is_starred']);
$entry->setCreatedAt(new \DateTime($data['created_at']));
$this->em->persist($entry);
++$this->importedEntries;
return $entry;
}
protected function setEntryAsRead(array $importedEntry)
{
$importedEntry['toread'] = 'no';
return $importedEntry;
}
}

View file

@ -0,0 +1,98 @@
<?php
namespace Wallabag\CoreBundle\Import;
class PocketHtmlImport extends HtmlImport
{
protected $filepath;
public function getName()
{
return 'Pocket HTML';
}
public function getUrl()
{
return 'import_pocket_html';
}
public function getDescription()
{
return 'import.pocket_html.description';
}
public function validateEntry(array $importedEntry)
{
if (empty($importedEntry['url'])) {
return false;
}
return true;
}
public function import()
{
if (!$this->user) {
$this->logger->error('Pocket HTML Import: user is not defined');
return false;
}
if (!file_exists($this->filepath) || !is_readable($this->filepath)) {
$this->logger->error('Pocket HTML Import: unable to read file', ['filepath' => $this->filepath]);
return false;
}
$html = new \DOMDocument();
libxml_use_internal_errors(true);
$html->loadHTMLFile($this->filepath);
$hrefs = $html->getElementsByTagName('a');
libxml_use_internal_errors(false);
if (0 === $hrefs->length) {
$this->logger->error('Pocket HTML: no entries in imported file');
return false;
}
$entries = [];
foreach ($hrefs as $href) {
$entry = [];
$entry['url'] = $href->getAttribute('href');
$entry['tags'] = $href->getAttribute('tags');
$entry['created_at'] = $href->getAttribute('time_added');
$entries[] = $entry;
}
if ($this->producer) {
$this->parseEntriesForProducer($entries);
return true;
}
$this->parseEntries($entries);
return true;
}
protected function prepareEntry(array $entry = [])
{
$data = [
'title' => '',
'html' => false,
'url' => $entry['url'],
'is_archived' => (int) $this->markAsRead,
'is_starred' => false,
'tags' => '',
'created_at' => $entry['created_at'],
];
if (\array_key_exists('tags', $entry) && '' !== $entry['tags']) {
$data['tags'] = $entry['tags'];
}
return $data;
}
}

225
src/Import/PocketImport.php Normal file
View file

@ -0,0 +1,225 @@
<?php
namespace Wallabag\CoreBundle\Import;
use Symfony\Component\HttpFoundation\Request;
use Symfony\Contracts\HttpClient\Exception\ExceptionInterface;
use Symfony\Contracts\HttpClient\HttpClientInterface;
use Wallabag\CoreBundle\Entity\Entry;
class PocketImport extends AbstractImport
{
public const NB_ELEMENTS = 5000;
/**
* @var HttpClientInterface
*/
private $client;
private $accessToken;
/**
* Only used for test purpose.
*
* @return string
*/
public function getAccessToken()
{
return $this->accessToken;
}
public function getName()
{
return 'Pocket';
}
public function getUrl()
{
return 'import_pocket';
}
public function getDescription()
{
return 'import.pocket.description';
}
/**
* Return the oauth url to authenticate the client.
*
* @param string $redirectUri Redirect url in case of error
*
* @return string|false request_token for callback method
*/
public function getRequestToken($redirectUri)
{
try {
$response = $this->client->request(Request::METHOD_POST, 'https://getpocket.com/v3/oauth/request', [
'json' => [
'consumer_key' => $this->user->getConfig()->getPocketConsumerKey(),
'redirect_uri' => $redirectUri,
],
]);
return $response->toArray()['code'];
} catch (ExceptionInterface $e) {
$this->logger->error(sprintf('PocketImport: Failed to request token: %s', $e->getMessage()), ['exception' => $e]);
return false;
}
}
/**
* Usually called by the previous callback to authorize the client.
* Then it return a token that can be used for next requests.
*
* @param string $code request_token from getRequestToken
*
* @return bool
*/
public function authorize($code)
{
try {
$response = $this->client->request(Request::METHOD_POST, 'https://getpocket.com/v3/oauth/authorize', [
'json' => [
'consumer_key' => $this->user->getConfig()->getPocketConsumerKey(),
'code' => $code,
],
]);
$this->accessToken = $response->toArray()['access_token'];
return true;
} catch (ExceptionInterface $e) {
$this->logger->error(sprintf('PocketImport: Failed to authorize client: %s', $e->getMessage()), ['exception' => $e]);
return false;
}
}
public function import($offset = 0)
{
static $run = 0;
try {
$response = $this->client->request(Request::METHOD_POST, 'https://getpocket.com/v3/get', [
'json' => [
'consumer_key' => $this->user->getConfig()->getPocketConsumerKey(),
'access_token' => $this->accessToken,
'detailType' => 'complete',
'state' => 'all',
'sort' => 'newest',
'count' => self::NB_ELEMENTS,
'offset' => $offset,
],
]);
$entries = $response->toArray();
if ($this->producer) {
$this->parseEntriesForProducer($entries['list']);
} else {
$this->parseEntries($entries['list']);
}
// if we retrieve exactly the amount of items requested it means we can get more
// re-call import and offset item by the amount previous received:
// - first call get 5k offset 0
// - second call get 5k offset 5k
// - and so on
if (self::NB_ELEMENTS === \count($entries['list'])) {
++$run;
return $this->import(self::NB_ELEMENTS * $run);
}
return true;
} catch (ExceptionInterface $e) {
$this->logger->error(sprintf('PocketImport: Failed to import: %s', $e->getMessage()), ['exception' => $e]);
return false;
}
}
/**
* Set the Http client.
*/
public function setClient(HttpClientInterface $pocketClient)
{
$this->client = $pocketClient;
}
public function validateEntry(array $importedEntry)
{
if (empty($importedEntry['resolved_url']) && empty($importedEntry['given_url'])) {
return false;
}
return true;
}
/**
* @see https://getpocket.com/developer/docs/v3/retrieve
*/
public function parseEntry(array $importedEntry)
{
$url = isset($importedEntry['resolved_url']) && '' !== $importedEntry['resolved_url'] ? $importedEntry['resolved_url'] : $importedEntry['given_url'];
$existingEntry = $this->em
->getRepository(Entry::class)
->findByUrlAndUserId($url, $this->user->getId());
if (false !== $existingEntry) {
++$this->skippedEntries;
return null;
}
$entry = new Entry($this->user);
$entry->setUrl($url);
// update entry with content (in case fetching failed, the given entry will be return)
$this->fetchContent($entry, $url);
// 0, 1, 2 - 1 if the item is archived - 2 if the item should be deleted
$entry->updateArchived(1 === (int) $importedEntry['status'] || $this->markAsRead);
// 0 or 1 - 1 if the item is starred
$entry->setStarred(1 === (int) $importedEntry['favorite']);
$title = 'Untitled';
if (isset($importedEntry['resolved_title']) && '' !== $importedEntry['resolved_title']) {
$title = $importedEntry['resolved_title'];
} elseif (isset($importedEntry['given_title']) && '' !== $importedEntry['given_title']) {
$title = $importedEntry['given_title'];
}
$entry->setTitle($title);
// 0, 1, or 2 - 1 if the item has images in it - 2 if the item is an image
if (isset($importedEntry['has_image']) && $importedEntry['has_image'] > 0 && isset($importedEntry['images'][1])) {
$entry->setPreviewPicture($importedEntry['images'][1]['src']);
}
if (isset($importedEntry['tags']) && !empty($importedEntry['tags'])) {
$this->tagsAssigner->assignTagsToEntry(
$entry,
array_keys($importedEntry['tags']),
$this->em->getUnitOfWork()->getScheduledEntityInsertions()
);
}
if (!empty($importedEntry['time_added'])) {
$entry->setCreatedAt((new \DateTime())->setTimestamp($importedEntry['time_added']));
}
$this->em->persist($entry);
++$this->importedEntries;
return $entry;
}
protected function setEntryAsRead(array $importedEntry)
{
$importedEntry['status'] = '1';
return $importedEntry;
}
}

View file

@ -0,0 +1,124 @@
<?php
namespace Wallabag\CoreBundle\Import;
use Wallabag\CoreBundle\Entity\Entry;
class ReadabilityImport extends AbstractImport
{
private $filepath;
public function getName()
{
return 'Readability';
}
public function getUrl()
{
return 'import_readability';
}
public function getDescription()
{
return 'import.readability.description';
}
/**
* Set file path to the json file.
*
* @param string $filepath
*/
public function setFilepath($filepath)
{
$this->filepath = $filepath;
return $this;
}
public function import()
{
if (!$this->user) {
$this->logger->error('ReadabilityImport: user is not defined');
return false;
}
if (!file_exists($this->filepath) || !is_readable($this->filepath)) {
$this->logger->error('ReadabilityImport: unable to read file', ['filepath' => $this->filepath]);
return false;
}
$data = json_decode(file_get_contents($this->filepath), true);
if (empty($data) || empty($data['bookmarks'])) {
$this->logger->error('ReadabilityImport: no entries in imported file');
return false;
}
if ($this->producer) {
$this->parseEntriesForProducer($data['bookmarks']);
return true;
}
$this->parseEntries($data['bookmarks']);
return true;
}
public function validateEntry(array $importedEntry)
{
if (empty($importedEntry['article__url'])) {
return false;
}
return true;
}
public function parseEntry(array $importedEntry)
{
$existingEntry = $this->em
->getRepository(Entry::class)
->findByUrlAndUserId($importedEntry['article__url'], $this->user->getId());
if (false !== $existingEntry) {
++$this->skippedEntries;
return null;
}
$data = [
'title' => $importedEntry['article__title'],
'url' => $importedEntry['article__url'],
'is_archived' => $importedEntry['archive'] || $this->markAsRead,
'is_starred' => $importedEntry['favorite'],
'created_at' => $importedEntry['date_added'],
'html' => false,
];
$entry = new Entry($this->user);
$entry->setUrl($data['url']);
$entry->setTitle($data['title']);
// update entry with content (in case fetching failed, the given entry will be return)
$this->fetchContent($entry, $data['url'], $data);
$entry->updateArchived($data['is_archived']);
$entry->setStarred($data['is_starred']);
$entry->setCreatedAt(new \DateTime($data['created_at']));
$this->em->persist($entry);
++$this->importedEntries;
return $entry;
}
protected function setEntryAsRead(array $importedEntry)
{
$importedEntry['archive'] = 1;
return $importedEntry;
}
}

View file

@ -0,0 +1,51 @@
<?php
namespace Wallabag\CoreBundle\Import;
class ShaarliImport extends HtmlImport
{
protected $filepath;
public function getName()
{
return 'Shaarli';
}
public function getUrl()
{
return 'import_shaarli';
}
public function getDescription()
{
return 'import.shaarli.description';
}
public function validateEntry(array $importedEntry)
{
if (empty($importedEntry['url'])) {
return false;
}
return true;
}
protected function prepareEntry(array $entry = [])
{
$data = [
'title' => '',
'html' => false,
'url' => $entry['url'],
'is_archived' => (int) $this->markAsRead,
'is_starred' => false,
'tags' => '',
'created_at' => $entry['created_at'],
];
if (\array_key_exists('tags', $entry) && '' !== $entry['tags']) {
$data['tags'] = $entry['tags'];
}
return $data;
}
}

View file

@ -0,0 +1,144 @@
<?php
namespace Wallabag\CoreBundle\Import;
use Wallabag\CoreBundle\Entity\Entry;
abstract class WallabagImport extends AbstractImport
{
protected $filepath;
// untitled in all languages from v1
protected $untitled = [
'Untitled',
'Sans titre',
'podle nadpisu',
'Sin título',
'با عنوان',
'per titolo',
'Sem título',
'Без названия',
'po naslovu',
'Без назви',
'No title found',
'',
];
abstract public function getName();
abstract public function getUrl();
abstract public function getDescription();
public function import()
{
if (!$this->user) {
$this->logger->error('WallabagImport: user is not defined');
return false;
}
if (!file_exists($this->filepath) || !is_readable($this->filepath)) {
$this->logger->error('WallabagImport: unable to read file', ['filepath' => $this->filepath]);
return false;
}
$data = json_decode(file_get_contents($this->filepath), true);
if (empty($data)) {
$this->logger->error('WallabagImport: no entries in imported file');
return false;
}
if ($this->producer) {
$this->parseEntriesForProducer($data);
return true;
}
$this->parseEntries($data);
return true;
}
/**
* Set file path to the json file.
*
* @param string $filepath
*/
public function setFilepath($filepath)
{
$this->filepath = $filepath;
return $this;
}
public function validateEntry(array $importedEntry)
{
if (empty($importedEntry['url'])) {
return false;
}
return true;
}
public function parseEntry(array $importedEntry)
{
$existingEntry = $this->em
->getRepository(Entry::class)
->findByUrlAndUserId($importedEntry['url'], $this->user->getId());
if (false !== $existingEntry) {
++$this->skippedEntries;
return null;
}
$data = $this->prepareEntry($importedEntry);
$entry = new Entry($this->user);
$entry->setUrl($data['url']);
$entry->setTitle($data['title']);
if (\array_key_exists('is_parsed', $data)) {
$entry->setNotParsed(true);
}
// update entry with content (in case fetching failed, the given entry will be return)
$this->fetchContent($entry, $data['url'], $data);
if (\array_key_exists('tags', $data)) {
$this->tagsAssigner->assignTagsToEntry(
$entry,
$data['tags'],
$this->em->getUnitOfWork()->getScheduledEntityInsertions()
);
}
if (isset($importedEntry['preview_picture'])) {
$entry->setPreviewPicture($importedEntry['preview_picture']);
}
$entry->updateArchived($data['is_archived']);
$entry->setStarred($data['is_starred']);
if (!empty($data['created_at'])) {
$entry->setCreatedAt(new \DateTime($data['created_at']));
}
$this->em->persist($entry);
++$this->importedEntries;
return $entry;
}
/**
* This should return a cleaned array for a given entry to be given to `updateEntry`.
*
* @param array $entry Data from the imported file
*
* @return array
*/
abstract protected function prepareEntry($entry = []);
}

View file

@ -0,0 +1,72 @@
<?php
namespace Wallabag\CoreBundle\Import;
use Doctrine\ORM\EntityManagerInterface;
use Psr\Log\LoggerInterface;
use Symfony\Component\EventDispatcher\EventDispatcherInterface;
use Wallabag\CoreBundle\Helper\ContentProxy;
use Wallabag\CoreBundle\Helper\TagsAssigner;
class WallabagV1Import extends WallabagImport
{
protected $fetchingErrorMessage;
protected $fetchingErrorMessageTitle;
public function __construct(EntityManagerInterface $em, ContentProxy $contentProxy, TagsAssigner $tagsAssigner, EventDispatcherInterface $eventDispatcher, LoggerInterface $logger, $fetchingErrorMessageTitle, $fetchingErrorMessage)
{
$this->fetchingErrorMessageTitle = $fetchingErrorMessageTitle;
$this->fetchingErrorMessage = $fetchingErrorMessage;
parent::__construct($em, $contentProxy, $tagsAssigner, $eventDispatcher, $logger);
}
public function getName()
{
return 'wallabag v1';
}
public function getUrl()
{
return 'import_wallabag_v1';
}
public function getDescription()
{
return 'import.wallabag_v1.description';
}
protected function prepareEntry($entry = [])
{
$data = [
'title' => $entry['title'],
'html' => $entry['content'],
'url' => $entry['url'],
'is_archived' => $entry['is_read'] || $this->markAsRead,
'is_starred' => $entry['is_fav'],
'tags' => '',
'created_at' => '',
];
// In case of a bad fetch in v1, replace title and content with v2 error strings
// If fetching fails again, they will get this instead of the v1 strings
if (\in_array($entry['title'], $this->untitled, true)) {
$data['title'] = $this->fetchingErrorMessageTitle;
$data['html'] = $this->fetchingErrorMessage;
$entry['is_not_parsed'] = 1;
}
if (\array_key_exists('tags', $entry) && '' !== $entry['tags']) {
$data['tags'] = $entry['tags'];
}
return $data;
}
protected function setEntryAsRead(array $importedEntry)
{
$importedEntry['is_read'] = 1;
return $importedEntry;
}
}

View file

@ -0,0 +1,40 @@
<?php
namespace Wallabag\CoreBundle\Import;
class WallabagV2Import extends WallabagImport
{
public function getName()
{
return 'wallabag v2';
}
public function getUrl()
{
return 'import_wallabag_v2';
}
public function getDescription()
{
return 'import.wallabag_v2.description';
}
protected function prepareEntry($entry = [])
{
return [
'html' => $entry['content'],
'headers' => [
'content-type' => $entry['mimetype'],
],
'is_archived' => (bool) ($entry['is_archived'] || $this->markAsRead),
'is_starred' => (bool) $entry['is_starred'],
] + $entry;
}
protected function setEntryAsRead(array $importedEntry)
{
$importedEntry['is_archived'] = 1;
return $importedEntry;
}
}