mirror of
https://github.com/wallabag/wallabag.git
synced 2025-08-01 17:38:38 +00:00
fix bug #209: titles with colon bad parsed
This commit is contained in:
parent
084ec2a63d
commit
b9523a0ba0
3 changed files with 49 additions and 9 deletions
46
inc/poche/PocheReadability.php
Normal file
46
inc/poche/PocheReadability.php
Normal file
|
@ -0,0 +1,46 @@
|
|||
<?php
|
||||
|
||||
class PocheReadability extends Readability
|
||||
{
|
||||
/**
|
||||
* Get the article title as an H1.
|
||||
*
|
||||
* @return DOMElement
|
||||
*/
|
||||
protected function getArticleTitle() {
|
||||
$curTitle = '';
|
||||
$origTitle = '';
|
||||
|
||||
try {
|
||||
$curTitle = $origTitle = $this->getInnerText($this->dom->getElementsByTagName('title')->item(0));
|
||||
} catch(Exception $e) {}
|
||||
|
||||
if (preg_match('/ [\|\-] /', $curTitle))
|
||||
{
|
||||
$curTitle = preg_replace('/(.*)[\|\-] .*/i', '$1', $origTitle);
|
||||
|
||||
if (count(explode(' ', $curTitle)) < 3) {
|
||||
$curTitle = preg_replace('/[^\|\-]*[\|\-](.*)/i', '$1', $origTitle);
|
||||
}
|
||||
}
|
||||
else if(strlen($curTitle) > 150 || strlen($curTitle) < 15)
|
||||
{
|
||||
$hOnes = $this->dom->getElementsByTagName('h1');
|
||||
if($hOnes->length == 1)
|
||||
{
|
||||
$curTitle = $this->getInnerText($hOnes->item(0));
|
||||
}
|
||||
}
|
||||
|
||||
$curTitle = trim($curTitle);
|
||||
|
||||
if (count(explode(' ', $curTitle)) <= 4) {
|
||||
$curTitle = $origTitle;
|
||||
}
|
||||
|
||||
$articleTitle = $this->dom->createElement('h1');
|
||||
$articleTitle->innerHTML = $curTitle;
|
||||
|
||||
return $articleTitle;
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue