mirror of
https://github.com/wallabag/wallabag.git
synced 2025-07-17 17:08:37 +00:00
Use graby ContentExtractor to clean html
It might be better to re-use some graby functionalities to clean html instead of building a new system.
This commit is contained in:
parent
fb436e8ca0
commit
74a75f7d43
4 changed files with 66 additions and 2 deletions
|
@ -336,7 +336,6 @@ class EntryRestController extends WallabagRestController
|
|||
$entry->setUrl($url);
|
||||
}
|
||||
|
||||
|
||||
if (!empty($tags)) {
|
||||
$this->get('wallabag_core.tags_assigner')->assignTagsToEntry($entry, $tags);
|
||||
}
|
||||
|
|
|
@ -47,6 +47,16 @@ class ContentProxy
|
|||
{
|
||||
// ensure content is a bit cleaned up
|
||||
if (!empty($content['html'])) {
|
||||
$extractor = $this->graby->getExtractor();
|
||||
$contentExtracted = $extractor->process($content['html'], $url);
|
||||
|
||||
if ($contentExtracted) {
|
||||
$contentBlock = $extractor->getContent();
|
||||
$contentBlock->normalize();
|
||||
|
||||
$content['html'] = trim($contentBlock->innerHTML);
|
||||
}
|
||||
|
||||
$content['html'] = htmLawed($content['html'], [
|
||||
'safe' => 1,
|
||||
// which means: do not remove iframe elements
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue