1
0
Fork 0
mirror of https://github.com/wallabag/wallabag.git synced 2025-09-15 18:57:05 +00:00

Add Shaarli and Pocket HTML imports

This commit is contained in:
Nicolas Lœuillet 2023-07-26 12:49:30 +02:00 committed by Nicolas Lœuillet
parent 40aeeafea2
commit 2af48b8174
26 changed files with 1730 additions and 10 deletions

View file

@ -13,10 +13,13 @@ use Symfony\Component\Security\Core\Authentication\Token\Storage\TokenStorageInt
use Symfony\Component\Security\Core\Authentication\Token\UsernamePasswordToken;
use Wallabag\ImportBundle\Import\ChromeImport;
use Wallabag\ImportBundle\Import\DeliciousImport;
use Wallabag\ImportBundle\Import\ElcuratorImport;
use Wallabag\ImportBundle\Import\FirefoxImport;
use Wallabag\ImportBundle\Import\InstapaperImport;
use Wallabag\ImportBundle\Import\PinboardImport;
use Wallabag\ImportBundle\Import\PocketHtmlImport;
use Wallabag\ImportBundle\Import\ReadabilityImport;
use Wallabag\ImportBundle\Import\ShaarliImport;
use Wallabag\ImportBundle\Import\WallabagV1Import;
use Wallabag\ImportBundle\Import\WallabagV2Import;
use Wallabag\UserBundle\Entity\User;
@ -35,9 +38,26 @@ class ImportCommand extends Command
private PinboardImport $pinboardImport;
private DeliciousImport $deliciousImport;
private WallabagV1Import $wallabagV1Import;
private ElcuratorImport $elcuratorImport;
private ShaarliImport $shaarliImport;
private PocketHtmlImport $pocketHtmlImport;
public function __construct(EntityManagerInterface $entityManager, TokenStorageInterface $tokenStorage, UserRepository $userRepository, WallabagV2Import $wallabagV2Import, FirefoxImport $firefoxImport, ChromeImport $chromeImport, ReadabilityImport $readabilityImport, InstapaperImport $instapaperImport, PinboardImport $pinboardImport, DeliciousImport $deliciousImport, WallabagV1Import $wallabagV1Import)
{
public function __construct(
EntityManagerInterface $entityManager,
TokenStorageInterface $tokenStorage,
UserRepository $userRepository,
WallabagV2Import $wallabagV2Import,
FirefoxImport $firefoxImport,
ChromeImport $chromeImport,
ReadabilityImport $readabilityImport,
InstapaperImport $instapaperImport,
PinboardImport $pinboardImport,
DeliciousImport $deliciousImport,
WallabagV1Import $wallabagV1Import,
ElcuratorImport $elcuratorImport,
ShaarliImport $shaarliImport,
PocketHtmlImport $pocketHtmlImport
) {
$this->entityManager = $entityManager;
$this->tokenStorage = $tokenStorage;
$this->userRepository = $userRepository;
@ -49,6 +69,9 @@ class ImportCommand extends Command
$this->pinboardImport = $pinboardImport;
$this->deliciousImport = $deliciousImport;
$this->wallabagV1Import = $wallabagV1Import;
$this->elcuratorImport = $elcuratorImport;
$this->shaarliImport = $shaarliImport;
$this->pocketHtmlImport = $pocketHtmlImport;
parent::__construct();
}
@ -60,7 +83,7 @@ class ImportCommand extends Command
->setDescription('Import entries from a JSON export')
->addArgument('username', InputArgument::REQUIRED, 'User to populate')
->addArgument('filepath', InputArgument::REQUIRED, 'Path to the JSON file')
->addOption('importer', null, InputOption::VALUE_OPTIONAL, 'The importer to use: v1, v2, instapaper, pinboard, delicious, readability, firefox or chrome', 'v1')
->addOption('importer', null, InputOption::VALUE_OPTIONAL, 'The importer to use: v1, v2, instapaper, pinboard, delicious, readability, firefox, chrome, elcurator, shaarli or pocket', 'v1')
->addOption('markAsRead', null, InputOption::VALUE_OPTIONAL, 'Mark all entries as read', false)
->addOption('useUserId', null, InputOption::VALUE_NONE, 'Use user id instead of username to find account')
->addOption('disableContentUpdate', null, InputOption::VALUE_NONE, 'Disable fetching updated content from URL')
@ -120,6 +143,15 @@ class ImportCommand extends Command
case 'delicious':
$import = $this->deliciousImport;
break;
case 'elcurator':
$import = $this->elcuratorImport;
break;
case 'shaarli':
$import = $this->shaarliImport;
break;
case 'pocket':
$import = $this->pocketHtmlImport;
break;
default:
$import = $this->wallabagV1Import;
}

View file

@ -20,9 +20,23 @@ class RabbitMQConsumerTotalProxy
private Consumer $pinboardConsumer;
private Consumer $deliciousConsumer;
private Consumer $elcuratorConsumer;
private Consumer $shaarliConsumer;
private Consumer $pocketHtmlConsumer;
public function __construct(Consumer $pocketConsumer, Consumer $readabilityConsumer, Consumer $wallabagV1Consumer, Consumer $wallabagV2Consumer, Consumer $firefoxConsumer, Consumer $chromeConsumer, Consumer $instapaperConsumer, Consumer $pinboardConsumer, Consumer $deliciousConsumer, Consumer $elcuratorConsumer)
{
public function __construct(
Consumer $pocketConsumer,
Consumer $readabilityConsumer,
Consumer $wallabagV1Consumer,
Consumer $wallabagV2Consumer,
Consumer $firefoxConsumer,
Consumer $chromeConsumer,
Consumer $instapaperConsumer,
Consumer $pinboardConsumer,
Consumer $deliciousConsumer,
Consumer $elcuratorConsumer,
Consumer $shaarliConsumer,
Consumer $pocketHtmlConsumer
) {
$this->pocketConsumer = $pocketConsumer;
$this->readabilityConsumer = $readabilityConsumer;
$this->wallabagV1Consumer = $wallabagV1Consumer;
@ -33,6 +47,8 @@ class RabbitMQConsumerTotalProxy
$this->pinboardConsumer = $pinboardConsumer;
$this->deliciousConsumer = $deliciousConsumer;
$this->elcuratorConsumer = $elcuratorConsumer;
$this->shaarliConsumer = $shaarliConsumer;
$this->pocketHtmlConsumer = $pocketHtmlConsumer;
}
/**
@ -77,6 +93,12 @@ class RabbitMQConsumerTotalProxy
case 'elcurator':
$consumer = $this->elcuratorConsumer;
break;
case 'shaarli':
$consumer = $this->shaarliConsumer;
break;
case 'pocket_html':
$consumer = $this->pocketHtmlConsumer;
break;
default:
return 0;
}

View file

@ -0,0 +1,83 @@
<?php
namespace Wallabag\ImportBundle\Controller;
use Symfony\Bundle\FrameworkBundle\Controller\AbstractController;
use Symfony\Component\HttpFoundation\Request;
use Symfony\Component\HttpFoundation\Response;
use Symfony\Component\Routing\Annotation\Route;
use Symfony\Contracts\Translation\TranslatorInterface;
use Wallabag\ImportBundle\Form\Type\UploadImportType;
use Wallabag\ImportBundle\Import\ImportInterface;
abstract class HtmlController extends AbstractController
{
/**
* @Route("/html", name="import_html")
*
* @return Response
*/
public function indexAction(Request $request, TranslatorInterface $translator)
{
$form = $this->createForm(UploadImportType::class);
$form->handleRequest($request);
$wallabag = $this->getImportService();
$wallabag->setUser($this->getUser());
if ($form->isSubmitted() && $form->isValid()) {
$file = $form->get('file')->getData();
$markAsRead = $form->get('mark_as_read')->getData();
$name = $this->getUser()->getId() . '.html';
if (null !== $file && \in_array($file->getClientMimeType(), $this->getParameter('wallabag_import.allow_mimetypes'), true) && $file->move($this->getParameter('wallabag_import.resource_dir'), $name)) {
$res = $wallabag
->setFilepath($this->getParameter('wallabag_import.resource_dir') . '/' . $name)
->setMarkAsRead($markAsRead)
->import();
$message = 'flashes.import.notice.failed';
if (true === $res) {
$summary = $wallabag->getSummary();
$message = $translator->trans('flashes.import.notice.summary', [
'%imported%' => $summary['imported'],
'%skipped%' => $summary['skipped'],
]);
if (0 < $summary['queued']) {
$message = $translator->trans('flashes.import.notice.summary_with_queue', [
'%queued%' => $summary['queued'],
]);
}
unlink($this->getParameter('wallabag_import.resource_dir') . '/' . $name);
}
$this->addFlash('notice', $message);
return $this->redirect($this->generateUrl('homepage'));
}
$this->addFlash('notice', 'flashes.import.notice.failed_on_file');
}
return $this->render($this->getImportTemplate(), [
'form' => $form->createView(),
'import' => $wallabag,
]);
}
/**
* Return the service to handle the import.
*
* @return ImportInterface
*/
abstract protected function getImportService();
/**
* Return the template used for the form.
*
* @return string
*/
abstract protected function getImportTemplate();
}

View file

@ -57,6 +57,8 @@ class ImportController extends AbstractController
+ $this->rabbitMQConsumerTotalProxy->getTotalMessage('pinboard')
+ $this->rabbitMQConsumerTotalProxy->getTotalMessage('delicious')
+ $this->rabbitMQConsumerTotalProxy->getTotalMessage('elcurator')
+ $this->rabbitMQConsumerTotalProxy->getTotalMessage('shaarli')
+ $this->rabbitMQConsumerTotalProxy->getTotalMessage('pocket_html')
;
} catch (\Exception $e) {
$rabbitNotInstalled = true;
@ -75,6 +77,8 @@ class ImportController extends AbstractController
+ $redis->llen('wallabag.import.pinboard')
+ $redis->llen('wallabag.import.delicious')
+ $redis->llen('wallabag.import.elcurator')
+ $redis->llen('wallabag.import.shaarli')
+ $redis->llen('wallabag.import.pocket_html')
;
} catch (\Exception $e) {
$redisNotInstalled = true;

View file

@ -0,0 +1,57 @@
<?php
namespace Wallabag\ImportBundle\Controller;
use Craue\ConfigBundle\Util\Config;
use OldSound\RabbitMqBundle\RabbitMq\Producer as RabbitMqProducer;
use Symfony\Component\HttpFoundation\Request;
use Symfony\Component\Routing\Annotation\Route;
use Symfony\Contracts\Translation\TranslatorInterface;
use Wallabag\ImportBundle\Import\PocketHtmlImport;
use Wallabag\ImportBundle\Redis\Producer as RedisProducer;
class PocketHtmlController extends HtmlController
{
private PocketHtmlImport $pocketHtmlImport;
private Config $craueConfig;
private RabbitMqProducer $rabbitMqProducer;
private RedisProducer $redisProducer;
public function __construct(PocketHtmlImport $pocketHtmlImport, Config $craueConfig, RabbitMqProducer $rabbitMqProducer, RedisProducer $redisProducer)
{
$this->pocketHtmlImport = $pocketHtmlImport;
$this->craueConfig = $craueConfig;
$this->rabbitMqProducer = $rabbitMqProducer;
$this->redisProducer = $redisProducer;
}
/**
* @Route("/pocket_html", name="import_pocket_html")
*/
public function indexAction(Request $request, TranslatorInterface $translator)
{
return parent::indexAction($request, $translator);
}
/**
* {@inheritdoc}
*/
protected function getImportService()
{
if ($this->craueConfig->get('import_with_rabbitmq')) {
$this->pocketHtmlImport->setProducer($this->rabbitMqProducer);
} elseif ($this->craueConfig->get('import_with_redis')) {
$this->pocketHtmlImport->setProducer($this->redisProducer);
}
return $this->pocketHtmlImport;
}
/**
* {@inheritdoc}
*/
protected function getImportTemplate()
{
return '@WallabagImport/PocketHtml/index.html.twig';
}
}

View file

@ -0,0 +1,57 @@
<?php
namespace Wallabag\ImportBundle\Controller;
use Craue\ConfigBundle\Util\Config;
use OldSound\RabbitMqBundle\RabbitMq\Producer as RabbitMqProducer;
use Symfony\Component\HttpFoundation\Request;
use Symfony\Component\Routing\Annotation\Route;
use Symfony\Contracts\Translation\TranslatorInterface;
use Wallabag\ImportBundle\Import\ShaarliImport;
use Wallabag\ImportBundle\Redis\Producer as RedisProducer;
class ShaarliController extends HtmlController
{
private ShaarliImport $shaarliImport;
private Config $craueConfig;
private RabbitMqProducer $rabbitMqProducer;
private RedisProducer $redisProducer;
public function __construct(ShaarliImport $shaarliImport, Config $craueConfig, RabbitMqProducer $rabbitMqProducer, RedisProducer $redisProducer)
{
$this->shaarliImport = $shaarliImport;
$this->craueConfig = $craueConfig;
$this->rabbitMqProducer = $rabbitMqProducer;
$this->redisProducer = $redisProducer;
}
/**
* @Route("/shaarli", name="import_shaarli")
*/
public function indexAction(Request $request, TranslatorInterface $translator)
{
return parent::indexAction($request, $translator);
}
/**
* {@inheritdoc}
*/
protected function getImportService()
{
if ($this->craueConfig->get('import_with_rabbitmq')) {
$this->shaarliImport->setProducer($this->rabbitMqProducer);
} elseif ($this->craueConfig->get('import_with_redis')) {
$this->shaarliImport->setProducer($this->redisProducer);
}
return $this->shaarliImport;
}
/**
* {@inheritdoc}
*/
protected function getImportTemplate()
{
return '@WallabagImport/Shaarli/index.html.twig';
}
}

View file

@ -0,0 +1,210 @@
<?php
namespace Wallabag\ImportBundle\Import;
use Wallabag\CoreBundle\Entity\Entry;
use Wallabag\CoreBundle\Event\EntrySavedEvent;
abstract class HtmlImport extends AbstractImport
{
protected $filepath;
/**
* {@inheritdoc}
*/
abstract public function getName();
/**
* {@inheritdoc}
*/
abstract public function getUrl();
/**
* {@inheritdoc}
*/
abstract public function getDescription();
/**
* {@inheritdoc}
*/
public function import()
{
if (!$this->user) {
$this->logger->error('Wallabag HTML Import: user is not defined');
return false;
}
if (!file_exists($this->filepath) || !is_readable($this->filepath)) {
$this->logger->error('Wallabag HTML Import: unable to read file', ['filepath' => $this->filepath]);
return false;
}
$html = new \DOMDocument();
libxml_use_internal_errors(true);
$html->loadHTMLFile($this->filepath);
$hrefs = $html->getElementsByTagName('a');
libxml_use_internal_errors(false);
if (0 === $hrefs->length) {
$this->logger->error('Wallabag HTML: no entries in imported file');
return false;
}
$entries = [];
foreach ($hrefs as $href) {
$entry = [];
$entry['url'] = $href->getAttribute('href');
$entry['tags'] = $href->getAttribute('tags');
$entry['created_at'] = $href->getAttribute('add_date');
$entries[] = $entry;
}
if ($this->producer) {
$this->parseEntriesForProducer($entries);
return true;
}
$this->parseEntries($entries);
return true;
}
/**
* Set file path to the html file.
*
* @param string $filepath
*/
public function setFilepath($filepath)
{
$this->filepath = $filepath;
return $this;
}
/**
* {@inheritdoc}
*/
public function parseEntry(array $importedEntry)
{
$url = $importedEntry['url'];
$existingEntry = $this->em
->getRepository(Entry::class)
->findByUrlAndUserId($url, $this->user->getId());
if (false !== $existingEntry) {
++$this->skippedEntries;
return null;
}
$data = $this->prepareEntry($importedEntry);
$entry = new Entry($this->user);
$entry->setUrl($data['url']);
$entry->updateArchived($data['is_archived']);
$createdAt = new \DateTime();
$createdAt->setTimestamp($data['created_at']);
$entry->setCreatedAt($createdAt);
// update entry with content (in case fetching failed, the given entry will be return)
$this->fetchContent($entry, $data['url'], $data);
if (\array_key_exists('tags', $data)) {
$this->tagsAssigner->assignTagsToEntry(
$entry,
$data['tags']
);
}
$this->em->persist($entry);
++$this->importedEntries;
return $entry;
}
/**
* Parse and insert all given entries.
*/
protected function parseEntries(array $entries)
{
$i = 1;
$entryToBeFlushed = [];
foreach ($entries as $importedEntry) {
$entry = $this->parseEntry($importedEntry);
if (null === $entry) {
continue;
}
// @see AbstractImport
$entryToBeFlushed[] = $entry;
// flush every 20 entries
if (0 === ($i % 20)) {
$this->em->flush();
foreach ($entryToBeFlushed as $entry) {
$this->eventDispatcher->dispatch(new EntrySavedEvent($entry), EntrySavedEvent::NAME);
}
$entryToBeFlushed = [];
}
++$i;
}
$this->em->flush();
if (!empty($entryToBeFlushed)) {
foreach ($entryToBeFlushed as $entry) {
$this->eventDispatcher->dispatch(new EntrySavedEvent($entry), EntrySavedEvent::NAME);
}
}
}
/**
* Parse entries and send them to the queue.
* It should just be a simple loop on all item, no call to the database should be done
* to speedup queuing.
*
* Faster parse entries for Producer.
* We don't care to make check at this time. They'll be done by the consumer.
*/
protected function parseEntriesForProducer(array $entries)
{
foreach ($entries as $importedEntry) {
if ((array) $importedEntry !== $importedEntry) {
continue;
}
// set userId for the producer (it won't know which user is connected)
$importedEntry['userId'] = $this->user->getId();
if ($this->markAsRead) {
$importedEntry = $this->setEntryAsRead($importedEntry);
}
++$this->queuedEntries;
$this->producer->publish(json_encode($importedEntry));
}
}
/**
* {@inheritdoc}
*/
protected function setEntryAsRead(array $importedEntry)
{
$importedEntry['is_archived'] = 1;
return $importedEntry;
}
abstract protected function prepareEntry(array $entry = []);
}

View file

@ -0,0 +1,113 @@
<?php
namespace Wallabag\ImportBundle\Import;
class PocketHtmlImport extends HtmlImport
{
protected $filepath;
/**
* {@inheritdoc}
*/
public function getName()
{
return 'Pocket HTML';
}
/**
* {@inheritdoc}
*/
public function getUrl()
{
return 'import_pocket_html';
}
/**
* {@inheritdoc}
*/
public function getDescription()
{
return 'import.pocket_html.description';
}
/**
* {@inheritdoc}
*/
public function validateEntry(array $importedEntry)
{
if (empty($importedEntry['url'])) {
return false;
}
return true;
}
public function import()
{
if (!$this->user) {
$this->logger->error('Pocket HTML Import: user is not defined');
return false;
}
if (!file_exists($this->filepath) || !is_readable($this->filepath)) {
$this->logger->error('Pocket HTML Import: unable to read file', ['filepath' => $this->filepath]);
return false;
}
$html = new \DOMDocument();
libxml_use_internal_errors(true);
$html->loadHTMLFile($this->filepath);
$hrefs = $html->getElementsByTagName('a');
libxml_use_internal_errors(false);
if (0 === $hrefs->length) {
$this->logger->error('Pocket HTML: no entries in imported file');
return false;
}
$entries = [];
foreach ($hrefs as $href) {
$entry = [];
$entry['url'] = $href->getAttribute('href');
$entry['tags'] = $href->getAttribute('tags');
$entry['created_at'] = $href->getAttribute('time_added');
$entries[] = $entry;
}
if ($this->producer) {
$this->parseEntriesForProducer($entries);
return true;
}
$this->parseEntries($entries);
return true;
}
/**
* {@inheritdoc}
*/
protected function prepareEntry(array $entry = [])
{
$data = [
'title' => '',
'html' => false,
'url' => $entry['url'],
'is_archived' => (int) $this->markAsRead,
'is_starred' => false,
'tags' => '',
'created_at' => $entry['created_at'],
];
if (\array_key_exists('tags', $entry) && '' !== $entry['tags']) {
$data['tags'] = $entry['tags'];
}
return $data;
}
}

View file

@ -0,0 +1,66 @@
<?php
namespace Wallabag\ImportBundle\Import;
class ShaarliImport extends HtmlImport
{
protected $filepath;
/**
* {@inheritdoc}
*/
public function getName()
{
return 'Shaarli';
}
/**
* {@inheritdoc}
*/
public function getUrl()
{
return 'import_shaarli';
}
/**
* {@inheritdoc}
*/
public function getDescription()
{
return 'import.shaarli.description';
}
/**
* {@inheritdoc}
*/
public function validateEntry(array $importedEntry)
{
if (empty($importedEntry['url'])) {
return false;
}
return true;
}
/**
* {@inheritdoc}
*/
protected function prepareEntry(array $entry = [])
{
$data = [
'title' => '',
'html' => false,
'url' => $entry['url'],
'is_archived' => (int) $this->markAsRead,
'is_starred' => false,
'tags' => '',
'created_at' => $entry['created_at'],
];
if (\array_key_exists('tags', $entry) && '' !== $entry['tags']) {
$data['tags'] = $entry['tags'];
}
return $data;
}
}

View file

@ -0,0 +1,45 @@
{% extends "@WallabagCore/layout.html.twig" %}
{% block title %}{{ 'import.pocket_html.page_title'|trans }}{% endblock %}
{% block content %}
<div class="row">
<div class="col s12">
<div class="card-panel settings">
{% include '@WallabagImport/Import/_information.html.twig' %}
<div class="row">
<blockquote>{{ import.description|trans|raw }}</blockquote>
<p>{{ 'import.pocket_html.how_to'|trans }}</p>
<div class="col s12">
{{ form_start(form, {'method': 'POST'}) }}
{{ form_errors(form) }}
<div class="row">
<div class="file-field input-field col s12">
{{ form_errors(form.file) }}
<div class="btn">
<span>{{ form.file.vars.label|trans }}</span>
{{ form_widget(form.file) }}
</div>
<div class="file-path-wrapper">
<input class="file-path validate" type="text">
</div>
</div>
<div class="input-field col s6 with-checkbox">
<h6>{{ 'import.form.mark_as_read_title'|trans }}</h6>
{{ form_widget(form.mark_as_read) }}
{{ form_label(form.mark_as_read) }}
</div>
</div>
{{ form_widget(form.save, {'attr': {'class': 'btn waves-effect waves-light'}}) }}
{{ form_rest(form) }}
</form>
</div>
</div>
</div>
</div>
</div>
{% endblock %}

View file

@ -0,0 +1,45 @@
{% extends "@WallabagCore/layout.html.twig" %}
{% block title %}{{ 'import.shaarli.page_title'|trans }}{% endblock %}
{% block content %}
<div class="row">
<div class="col s12">
<div class="card-panel settings">
{% include '@WallabagImport/Import/_information.html.twig' %}
<div class="row">
<blockquote>{{ import.description|trans|raw }}</blockquote>
<p>{{ 'import.shaarli.how_to'|trans }}</p>
<div class="col s12">
{{ form_start(form, {'method': 'POST'}) }}
{{ form_errors(form) }}
<div class="row">
<div class="file-field input-field col s12">
{{ form_errors(form.file) }}
<div class="btn">
<span>{{ form.file.vars.label|trans }}</span>
{{ form_widget(form.file) }}
</div>
<div class="file-path-wrapper">
<input class="file-path validate" type="text">
</div>
</div>
<div class="input-field col s6 with-checkbox">
<h6>{{ 'import.form.mark_as_read_title'|trans }}</h6>
{{ form_widget(form.mark_as_read) }}
{{ form_label(form.mark_as_read) }}
</div>
</div>
{{ form_widget(form.save, {'attr': {'class': 'btn waves-effect waves-light'}}) }}
{{ form_rest(form) }}
</form>
</div>
</div>
</div>
</div>
</div>
{% endblock %}