1
0
Fork 0
mirror of https://github.com/wallabag/wallabag.git synced 2025-08-26 18:21:02 +00:00

merge refactor and dev

This commit is contained in:
tcit 2014-09-27 17:54:13 +02:00
commit 04a7674bdd
32 changed files with 1150 additions and 470 deletions

17
inc/3rdparty/libraries/readability/Readability.php vendored Normal file → Executable file
View file

@ -679,6 +679,7 @@ class Readability
} else {
$topCandidate->innerHTML = $page->documentElement->innerHTML;
$page->documentElement->innerHTML = '';
$this->reinitBody();
$page->documentElement->appendChild($topCandidate);
}
} else {
@ -794,8 +795,7 @@ class Readability
{
// TODO: find out why element disappears sometimes, e.g. for this URL http://www.businessinsider.com/6-hedge-fund-etfs-for-average-investors-2011-7
// in the meantime, we check and create an empty element if it's not there.
if (!isset($this->body->childNodes)) $this->body = $this->dom->createElement('body');
$this->body->innerHTML = $this->bodyCache;
$this->reinitBody();
if ($this->flagIsActive(self::FLAG_STRIP_UNLIKELYS)) {
$this->removeFlag(self::FLAG_STRIP_UNLIKELYS);
@ -1134,5 +1134,18 @@ class Readability
public function removeFlag($flag) {
$this->flags = $this->flags & ~$flag;
}
/**
* Will recreate previously deleted body property
*
* @return void
*/
protected function reinitBody() {
if (!isset($this->body->childNodes)) {
$this->body = $this->dom->createElement('body');
$this->body->innerHTML = $this->bodyCache;
}
}
}
?>

View file

@ -0,0 +1,45 @@
# Author: zinnober
tidy: no
prune: no
# Set author
author: //a[@rel='author']
# Set date
date: //span[@class='Datum']
# Content is here
body: //div[@class='Artikel']
# Tidy up before article
strip: //div[@id='FAZHeaderNeu']
strip: //h2[@itemprop='headline']
strip: //span[@class='Datum']
strip: //span[@class='Autor']
strip_id_or_class: ArticlePagerTop
strip: //div[@class='FAZArtikelEinleitung']/h2
# General cleanup
strip: //div[@class='clear']
strip: //span[@class='Bildnachweis']
strip: //iframe
strip_id_or_class: Community
strip: ' · '
# Remove tracking and ads
strip_image_src: /l.gif?
strip: //img[@width='1']
strip_id_or_class: invisible
strip_id_or_class: Anzeige
strip_id_or_class: billboard
# Remove clutter after article
strip_id_or_class: Tagline
strip_id_or_class: ArtikelAbbinder
strip_id_or_class: FAZArtikelKommentare
strip_id_or_class: ArtikelKommentieren
strip_id_or_class: FAZContentRight
# Try it yourself
test_url: http://blogs.faz.net/wost/2014/08/17/viel-fuck-und-wenig-guter-sex-1239/

View file

@ -0,0 +1,14 @@
body: //div[@id='articlebody']
title: //h1
author: //p[@id='by']//a
next_page_link: //span[@class='next']/a
# Not the same as below!
prune: yes
tidy: no
# Annoying 'next' links plainly inside the article body
strip: //*[text()[contains(.,'Next: ')]]
test_url: http://psychology.about.com/od/theoriesofpersonality/ss/defensemech.htm

9
inc/3rdparty/site_config/standard/dn.pt.txt vendored Executable file
View file

@ -0,0 +1,9 @@
single_page_link: concat('http://www.dn.pt/Common/print.aspx?content_id=', //input[@type='hidden' and @name='link-comments']/@value)
#<input type="hidden" name="link-comments" class="link-comments" value="3972244">
title: //h1
author: //div[@class="Author"]
strip: //div[@class="Patrocinio"]
test_url: http://www.dn.pt/inicio/opiniao/interior.aspx?content_id=3972244&seccao=Alberto%20Gon%E7alves&tag=Opini%E3o%20-%20Em%20Foco&page=1

113
inc/3rdparty/site_config/standard/faz.net.txt vendored Executable file → Normal file
View file

@ -1,36 +1,101 @@
# Author: zinnober
# Complete rewrite of the faz.net template as the standard one is broken
# I tried to consider as many page variants as possible, which was some serious work
tidy: no
prune: no
# Title
title: //p[@class='Content HeadlineShort']
# Authors
# some are known and have a link, others don't
author: substring-after(//span[@class='Autor'], 'Von')
# Set author
author: substring-after(//span[@class='Autor'], 'von ')
author: //span[@class='caps last']/span[@class='caps last']
author: //a[@rel='author']
# Date
# Set date
date: //span[@class='Datum']
date: //span[@class='Datum'],/span
# Body
# Fetch full multipage articles
next_page_link: //a[@title='Nächste Seite']
# Content is here
body: //div[@class='Artikel']
# Removements before body text
strip: //div[@class='Breadcrumbs']
strip: //div[@class='QuickSearchBox']
strip: //div[@class='FAZArtikelEinleitung']
strip: //div[@class='FAZArtikelReiter']
# Tidy up before article
strip: //div[@id='FAZHeaderNeu']
strip: //h2[@itemprop='headline']
strip: //span[@class='Datum']
strip: //span[@class='Autor']
strip_id_or_class: ArticlePagerTop
# General cleanup
strip: //div[@class='clear']
strip: //a[@title='Zur Homepage FAZ.NET']
strip: //iframe
replace_string( · ):
# General removements
strip: //span[@class='Bildnachweis']
strip: //img[@class='MediaIcon']
strip: //div[@class='ArtikelMediaLink']
dissolve: //a[img]
# Remove tracking and ads
strip_image_src: /l.gif?
strip: //div[contains(@style, 'background-image')]
strip: //img[@width='1']
strip_id_or_class: invisible
strip_id_or_class: Anzeige
strip_id_or_class: billboard
# Removements after body text
strip: //div[@class='ArtikelAbbinder']
strip: //div[@class='ArtikelKommentieren Artikelfuss GETS;tk;boxen.top-lesermeinungen;tp;content']
strip: //div[@class='FAZArtikelKommentare FAZArtikelContent']
strip: //div[@class='FAZArtikelFunktionen']
strip: //div[@id='FAZContentRight']
# Remove various text boxes and social media foo
strip_id_or_class: WeitereBeitraege
strip_id_or_class: WBListe
strip_id_or_class: AutorenModul
strip_id_or_class: Community
strip_id_or_class: SocialMediaStatus
strip_id_or_class: RelatedLinkBox
strip_id_or_class: MultimediaNavigation
strip_id_or_class: IndexTitel
# Fix picture caps and pictures (use better resolution and remove clutter)
strip_id_or_class: LightBoxOverlay
strip_id_or_class: exitLarge
strip_id_or_class: PagerBox
strip_id_or_class: Bildnachweis
strip_id_or_class: Bildueberschrift
strip_id_or_class: Bildbeschreibung
strip_id_or_class: ArtikelBild610
strip_id_or_class: MediaLink
strip_id_or_class: FotoBoxInnerLeft
strip_id_or_class: BilderRelatedLinks
# Remove clutter after article
strip_id_or_class: ArticlePagerBottom
strip_id_or_class: backToHome
strip_id_or_class: ArtikelAbbinder
strip_id_or_class: lesermeinungscontainer
strip_id_or_class: ThemenLinks
strip_id_or_class: rechtehinweis
strip_id_or_class: FAZArtikelMap
strip_id_or_class: FAZArtikelKommentare
strip_id_or_class: ArtikelKommentieren
strip_id_or_class: FAZArtikelFunktionen
strip_id_or_class: mailLB
strip_id_or_class: FAZContentRight
strip_id_or_class: stageModule
strip_id_or_class: ContentFooter
strip_id_or_class: ServicesFooter
strip_id_or_class: FAZFooter
# Clean up stuff present just in some articles
strip_id_or_class: Teaser620
strip_id_or_class: TeaserMultimedia
strip_id_or_class: VideoBox
# Remove as soon as Wallabag maight be able to embed flash video
strip_id_or_class: mmoObjectAsTeaserInArticle
strip_id_or_class: additionalStylesAudioVideo
strip_id_or_class: hideMMElements
# Try it yourself
test_url: http://www.faz.net/aktuell/feuilleton/zum-tod-von-margaret-thatcher-die-reizfigur-12141919.html#Drucken
test_url: http://www.faz.net/aktuell/politik/inland/allensbach-analyse-im-namen-des-volkes-13106492.html
test_url: http://www.faz.net/aktuell/feuilleton/kino/video-filmkritiken/video-filmkritik-when-animals-dream-zerrissene-jugend-13105772.html
# Fix picture captions
wrap_in(small): //span[@class='Bildunterschrift']/text()
test_url: http://www.faz.net/aktuell/feuilleton/zum-tod-von-margaret-thatcher-die-reizfigur-12141919.html#Drucken

View file

@ -0,0 +1,21 @@
title: //span[@class="post_title"]
author: //div[@class="author"]
date: //div[@class="published
body: //div[@class='content html_format'] | //div[@id='comments']
strip: //a[@class="link_to_comment"]
strip: //div[@class="show_tree"]
strip: //a[@class="to_parent"]
replace_string(class="reply_comments"): style="padding-left: 20px"
replace_string(class="voting "): style="float: right"
replace_string(src="//habrastorage.org/getpro/habr/avatars/): style="width:24px; height:24px;" class="123" src="//habrastorage.org/getpro/habr/avatars/
replace_string(class="info "): style="padding-top:5px;font-size:0.85em;line-height:24px;"
prune: no
tidy: no
test_url: http://habrahabr.ru/post/229883/

View file

@ -23,6 +23,10 @@ class Database {
{
switch (STORAGE) {
case 'sqlite':
// Check if /db is writeable
if ( !is_writable(STORAGE_SQLITE) || !is_writable(dirname(STORAGE_SQLITE))) {
die('An error occured: "db" directory must be writeable for your web server user!');
}
$db_path = 'sqlite:' . STORAGE_SQLITE;
$this->handle = new PDO($db_path);
break;

View file

@ -524,6 +524,14 @@ class Poche
$longlastingsession = isset($_POST['longlastingsession']);
$passwordTest = ($isauthenticated) ? $user['password'] : Tools::encodeString($password . $login);
Session::login($user['username'], $user['password'], $login, $passwordTest, $longlastingsession, array('poche_user' => new User($user)));
# reload l10n
$language = $user['config']['language'];
@putenv('LC_ALL=' . $language);
setlocale(LC_ALL, $language);
bindtextdomain($language, LOCALE);
textdomain($language);
$this->messages->add('s', _('welcome to your wallabag'));
Tools::logm('login successful');
Tools::redirect($referer);
@ -551,42 +559,39 @@ class Poche
* import datas into your wallabag
* @return boolean
*/
public function import()
{
if (isset($_FILES['file'])) {
Tools::logm('Import stated: parsing file');
// assume, that file is in json format
public function import() {
$str_data = file_get_contents($_FILES['file']['tmp_name']);
$data = json_decode($str_data, true);
if ($data === null) {
if ( isset($_FILES['file']) && $_FILES['file']['tmp_name'] ) {
Tools::logm('Import stated: parsing file');
// not json - assume html
// assume, that file is in json format
$str_data = file_get_contents($_FILES['file']['tmp_name']);
$data = json_decode($str_data, true);
$html = new simple_html_dom();
$html->load_file($_FILES['file']['tmp_name']);
$data = array();
$read = 0;
foreach(array('ol','ul') as $list) {
foreach($html->find($list) as $ul) {
foreach($ul->find('li') as $li) {
$tmpEntry = array();
$a = $li->find('a');
$tmpEntry['url'] = $a[0]->href;
$tmpEntry['tags'] = $a[0]->tags;
$tmpEntry['is_read'] = $read;
if ($tmpEntry['url']) {
$data[] = $tmpEntry;
}
}
// the second <ol/ul> is for read links
$read = ((sizeof($data) && $read) ? 0 : 1);
}
}
if ( $data === null ) {
//not json - assume html
$html = new simple_html_dom();
$html->load_file($_FILES['file']['tmp_name']);
$data = array();
$read = 0;
foreach (array('ol','ul') as $list) {
foreach ($html->find($list) as $ul) {
foreach ($ul->find('li') as $li) {
$tmpEntry = array();
$a = $li->find('a');
$tmpEntry['url'] = $a[0]->href;
$tmpEntry['tags'] = $a[0]->tags;
$tmpEntry['is_read'] = $read;
if ($tmpEntry['url']) {
$data[] = $tmpEntry;
}
}
# the second <ol/ul> is for read links
$read = ((sizeof($data) && $read)?0:1);
}
}
}
// for readability structure
@ -629,9 +634,11 @@ class Poche
$this->messages->add('s', _('Articles inserted: ') . $i . _('. Please note, that some may be marked as "read".'));
}
Tools::logm('Import of articles finished: ' . $i . ' articles added (w/o content if not provided).');
}
Tools::logm('Import of articles finished: '.$i.' articles added (w/o content if not provided).');
}
else {
$this->messages->add('s', _('Did you forget to select a file?'));
}
// file parsing finished here
// now download article contents if any
// check if we need to download any content
@ -750,8 +757,8 @@ class Poche
die(sprintf(_('User with this id (%d) does not exist.'), $user_id));
}
if (!in_array($type, $allowed_types) || $token != $config['token']) {
die(_('Uh, there is a problem while generating feeds.'));
if (!in_array($type, $allowed_types) || !isset($config['token']) || $token != $config['token']) {
die(_('Uh, there is a problem while generating feed. Wrong token used?'));
}
$feed = new FeedWriter(RSS2);

79
inc/poche/Routing.class.php Normal file → Executable file
View file

@ -98,49 +98,50 @@ class Routing
private function _launchAction()
{
if (isset($_GET['login'])) {
// hello you
$this->wallabag->login($this->referer);
} elseif (isset($_GET['logout'])) {
// see you soon !
$this->wallabag->logout();
} elseif (isset($_GET['config'])) {
// update password
$this->wallabag->updatePassword($_POST['password'], $_POST['password_repeat']);
} elseif (isset($_GET['newuser'])) {
$this->wallabag->createNewUser($_POST['newusername'], $_POST['password4newuser']);
} elseif (isset($_GET['deluser'])) {
$this->wallabag->deleteUser($_POST['password4deletinguser']);
} elseif (isset($_GET['epub'])) {
$epub = new WallabagEpub($this->wallabag, $_GET['method'], $_GET['value']);
$epub->run();
} elseif (isset($_GET['import'])) {
$import = $this->wallabag->import();
$tplVars = array_merge($this->vars, $import);
} elseif (isset($_GET['download'])) {
Tools::downloadDb();
} elseif (isset($_GET['empty-cache'])) {
Tools::emptyCache();
} elseif (isset($_GET['export'])) {
$this->wallabag->export();
} elseif (isset($_GET['updatetheme'])) {
$this->wallabag->tpl->updateTheme($_POST['theme']);
} elseif (isset($_GET['updatelanguage'])) {
$this->wallabag->language->updateLanguage($_POST['language']);
} elseif (isset($_GET['uploadfile'])) {
$this->wallabag->uploadFile();
} elseif (isset($_GET['feed'])) {
if (isset($_GET['action']) && $_GET['action'] == 'generate') {
// hello to you
$this->wallabag->login($this->referer);
} elseif (isset($_GET['feed']) && isset($_GET['user_id'])) {
$tag_id = (isset($_GET['tag_id']) ? intval($_GET['tag_id']) : 0);
$this->wallabag->generateFeeds($_GET['token'], filter_var($_GET['user_id'],FILTER_SANITIZE_NUMBER_INT), $tag_id, $_GET['type']);
}
//allowed ONLY to logged in user
if (\Session::isLogged() === true)
{
if (isset($_GET['logout'])) {
// see you soon !
$this->wallabag->logout();
} elseif (isset($_GET['config'])) {
// update password
$this->wallabag->updatePassword($_POST['password'], $_POST['password_repeat']);
} elseif (isset($_GET['newuser'])) {
$this->wallabag->createNewUser($_POST['newusername'], $_POST['password4newuser']);
} elseif (isset($_GET['deluser'])) {
$this->wallabag->deleteUser($_POST['password4deletinguser']);
} elseif (isset($_GET['epub'])) {
$epub = new WallabagEpub($this->wallabag, $_GET['method'], $_GET['id'], $_GET['value']);
$epub->run();
} elseif (isset($_GET['import'])) {
$import = $this->wallabag->import();
$tplVars = array_merge($this->vars, $import);
} elseif (isset($_GET['empty-cache'])) {
Tools::emptyCache();
} elseif (isset($_GET['export'])) {
$this->wallabag->export();
} elseif (isset($_GET['updatetheme'])) {
$this->wallabag->tpl->updateTheme($_POST['theme']);
} elseif (isset($_GET['updatelanguage'])) {
$this->wallabag->language->updateLanguage($_POST['language']);
} elseif (isset($_GET['uploadfile'])) {
$this->wallabag->uploadFile();
} elseif (isset($_GET['feed']) && isset($_GET['action']) && $_GET['action'] == 'generate') {
$this->wallabag->updateToken();
}
else {
$tag_id = (isset($_GET['tag_id']) ? intval($_GET['tag_id']) : 0);
$this->wallabag->generateFeeds($_GET['token'], filter_var($_GET['user_id'],FILTER_SANITIZE_NUMBER_INT), $tag_id, $_GET['type']);
elseif (isset($_GET['plainurl']) && !empty($_GET['plainurl'])) {
$plainUrl = new Url(base64_encode($_GET['plainurl']));
$this->wallabag->action('add', $plainUrl);
}
}
elseif (isset($_GET['plainurl']) && !empty($_GET['plainurl'])) {
$plainUrl = new Url(base64_encode($_GET['plainurl']));
$this->wallabag->action('add', $plainUrl);
}
}
public function _render($file, $vars)

View file

@ -54,6 +54,10 @@ final class Tools
|| ($https && $_SERVER["SERVER_PORT"] == '443')
|| ($https && $_SERVER["SERVER_PORT"]==SSL_PORT) //Custom HTTPS port detection
? '' : ':' . $_SERVER["SERVER_PORT"]);
if (isset($_SERVER["HTTP_X_FORWARDED_PORT"])) {
$serverport = ':' . $_SERVER["HTTP_X_FORWARDED_PORT"];
}
$scriptname = str_replace('/index.php', '/', $_SERVER["SCRIPT_NAME"]);
@ -294,21 +298,6 @@ final class Tools
}
}
/**
* Download the sqlite database
*/
public static function downloadDb()
{
header('Content-Disposition: attachment; filename="poche.sqlite.gz"');
self::_status(200);
header('Content-Transfer-Encoding: binary');
header('Content-Type: application/octet-stream');
echo gzencode(file_get_contents(STORAGE_SQLITE));
exit;
}
/**
* Get the content for a given URL (by a call to FullTextFeed)
*

View file

@ -59,7 +59,7 @@
@define ('LOCALE', ROOT . '/locale');
@define ('CACHE', ROOT . '/cache');
@define ('PAGINATION', '10');
@define ('PAGINATION', '12');
//limit for download of articles during import
@define ('IMPORT_LIMIT', 5);

View file

@ -33,7 +33,7 @@ final class Picture
}
if (self::_downloadPictures($absolute_path, $fullpath) === true) {
$content = str_replace($matches[$i][2], $fullpath, $content);
$content = str_replace($matches[$i][2], Tools::getPocheUrl() . $fullpath, $content);
}
$processing_pictures[] = $absolute_path;