1
0
Fork 0
mirror of https://github.com/wallabag/wallabag.git synced 2025-08-01 17:38:38 +00:00

Use graby ContentExtractor to clean html

It might be better to re-use some graby functionalities to clean html instead of building a new system.
This commit is contained in:
Jeremy Benoist 2017-05-12 07:53:21 +02:00
parent fb436e8ca0
commit 74a75f7d43
No known key found for this signature in database
GPG key ID: BCA73962457ACC3C
4 changed files with 66 additions and 2 deletions

View file

@ -8,6 +8,7 @@ use Wallabag\CoreBundle\Entity\Entry;
use Wallabag\CoreBundle\Entity\Tag;
use Wallabag\UserBundle\Entity\User;
use Wallabag\CoreBundle\Helper\RuleBasedTagger;
use Graby\Graby;
class ContentProxyTest extends \PHPUnit_Framework_TestCase
{
@ -253,6 +254,60 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
$this->assertCount(0, $entry->getTags());
}
public function dataForCrazyHtml()
{
return [
'script and comment' => [
'<strong>Script inside:</strong> <!--[if gte IE 4]><script>alert(\'lol\');</script><![endif]--><br />',
'lol'
],
'script' => [
'<strong>Script inside:</strong><script>alert(\'lol\');</script>',
'script'
],
];
}
/**
* @dataProvider dataForCrazyHtml
*/
public function testWithCrazyHtmlContent($html, $escapedString)
{
$tagger = $this->getTaggerMock();
$tagger->expects($this->once())
->method('tag');
$graby = new Graby();
$proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger(), $this->fetchingErrorMessage);
$entry = $proxy->updateEntry(
new Entry(new User()),
'http://1.1.1.1',
[
'html' => $html,
'title' => 'this is my title',
'url' => 'http://1.1.1.1',
'content_type' => 'text/html',
'language' => 'fr',
'status' => '200',
'open_graph' => [
'og_title' => 'my OG title',
'og_description' => 'OG desc',
'og_image' => 'http://3.3.3.3/cover.jpg',
],
]
);
$this->assertEquals('http://1.1.1.1', $entry->getUrl());
$this->assertEquals('this is my title', $entry->getTitle());
$this->assertNotContains($escapedString, $entry->getContent());
$this->assertEquals('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
$this->assertEquals('text/html', $entry->getMimetype());
$this->assertEquals('fr', $entry->getLanguage());
$this->assertEquals('200', $entry->getHttpStatus());
$this->assertEquals('1.1.1.1', $entry->getDomainName());
}
private function getTaggerMock()
{
return $this->getMockBuilder(RuleBasedTagger::class)