mirror of
https://github.com/wallabag/wallabag.git
synced 2025-07-27 17:28:39 +00:00
update config from @fivefilters
This commit is contained in:
parent
7efe6a03d5
commit
3bb6a8ed2a
112 changed files with 734 additions and 121 deletions
24
inc/3rdparty/site_config/standard/20min.ch.txt
vendored
Executable file
24
inc/3rdparty/site_config/standard/20min.ch.txt
vendored
Executable file
|
@ -0,0 +1,24 @@
|
|||
# Author: cirnod@gmail.com
|
||||
|
||||
tidy: no
|
||||
prune: no
|
||||
|
||||
title: //h1
|
||||
date: /html/body/div[3]/div[1]/div[6]/div/div[1]/div[2]/div[1]/div/p
|
||||
body: //div[@class='published clearfix'] | //div[@class='story_titles']/h3 | //div[@class='story_text']
|
||||
|
||||
# General Cleanup
|
||||
strip_id_or_class: info_panel
|
||||
strip_id_or_class: info_poll
|
||||
strip_id_or_class: teaser
|
||||
strip_id_or_class: panelbox
|
||||
strip_id_or_class: polls
|
||||
strip_id_or_class: warning
|
||||
strip_id_or_class: vplaceholder
|
||||
|
||||
# visual removal only -> complete removal doesn't work
|
||||
replace_string(Print</a>): </a>
|
||||
|
||||
# Try yourself
|
||||
test_url: http://www.20min.ch/wissen/news/story/31588952
|
||||
test_url: http://www.20min.ch/digital/dossier/apple/story/So-einfach-laesst-sich-das-iPhone-6-Plus-verbiegen-24651169
|
8
inc/3rdparty/site_config/standard/24.ae.txt
vendored
Executable file
8
inc/3rdparty/site_config/standard/24.ae.txt
vendored
Executable file
|
@ -0,0 +1,8 @@
|
|||
title: //div[@id='DivTitle']
|
||||
body: //div[@id='divImages' or @id='Divkhabarcontent']
|
||||
author: //div[@id='DivAuthor']
|
||||
|
||||
prune: no
|
||||
|
||||
test_url: http://24.ae/article.aspx?ArticleId=123304
|
||||
test_url: http://24.ae/rss.aspx?pageId=30
|
6
inc/3rdparty/site_config/standard/9gag.com.txt
vendored
Executable file
6
inc/3rdparty/site_config/standard/9gag.com.txt
vendored
Executable file
|
@ -0,0 +1,6 @@
|
|||
# Generated by FiveFilters.org's web-based selection tool
|
||||
# Place this file inside your site_config/custom/ folder
|
||||
# Source: http://siteconfig.fivefilters.org/grab.php?url=http%3A%2F%2F9gag.com%2Fgag%2FaDwQnO7
|
||||
|
||||
body: //div[contains(concat(' ',normalize-space(@class),' '),' badge-post-container ')]
|
||||
test_url: http://9gag.com/gag/aDwQnO7
|
7
inc/3rdparty/site_config/standard/ad.nl.txt
vendored
Executable file
7
inc/3rdparty/site_config/standard/ad.nl.txt
vendored
Executable file
|
@ -0,0 +1,7 @@
|
|||
#bypass cookie check
|
||||
single_page_link: //a[contains(@href, '/acceptCookieCheck.do?url=')]
|
||||
|
||||
test_url: http://www.ad.nl/ad/nl/10444/Offside/article/detail/4043834/2015/05/31/Dani-Alves-voetbalt-met-drol-op-zijn-hoofd.dhtml
|
||||
test_contains: De nieuwe coupe van Alves
|
||||
|
||||
test_url: http://www.ad.nl/digitaal/rss.xml
|
|
@ -1,5 +1,7 @@
|
|||
body: //div[@id='main-column']//div[@class='content']
|
||||
|
||||
strip_id_or_class: social-buttons
|
||||
|
||||
prune: no
|
||||
|
||||
test_url: http://www.albayan.ae/across-the-uae/education/2013-08-29-1.1949645
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
body: //div[@class='post_content']
|
||||
date: //div[@class='date_day'] | div[@class='date_month']
|
||||
strip_id_or_class: author-box
|
||||
author: //h2[@class='author-box-heading']/a
|
||||
|
||||
test_url: http://www.androidpolice.com/2014/03/30/music-boss-for-pebble-can-now-control-playback-and-volume-on-chromecast-content-from-your-smartwatch/
|
||||
|
||||
|
|
6
inc/3rdparty/site_config/standard/artofmanliness.com.txt
vendored
Executable file
6
inc/3rdparty/site_config/standard/artofmanliness.com.txt
vendored
Executable file
|
@ -0,0 +1,6 @@
|
|||
parser: html5php
|
||||
date: //article/p[contains(@class, 'single-date')]
|
||||
author: //article/p[contains(@class, 'byline')]
|
||||
|
||||
test_url: http://www.artofmanliness.com/2013/01/31/relationship-red-flags/
|
||||
test_contains: It seems that once we get close to a person
|
12
inc/3rdparty/site_config/standard/au.businessinsider.com.txt
vendored
Executable file
12
inc/3rdparty/site_config/standard/au.businessinsider.com.txt
vendored
Executable file
|
@ -0,0 +1,12 @@
|
|||
title://div[@class="sl-layout-post"]/h1
|
||||
body: //div[@id='content_post']
|
||||
strip: //div[contains(@class, "post-sidebar")]
|
||||
strip: //div[@id='related-links']
|
||||
strip: //img[@class='size_xlarge']
|
||||
author://div[@class="byline"]/a
|
||||
date://div[@class="byline"]/span[@class="date"]
|
||||
prune: no
|
||||
tidy: no
|
||||
|
||||
|
||||
test_url: http://www.businessinsider.com/microsoft-just-put-one-of-its-hardcore-technical-geniuses-on-xbox-2012-1
|
4
inc/3rdparty/site_config/standard/au.news.yahoo.com.txt
vendored
Executable file
4
inc/3rdparty/site_config/standard/au.news.yahoo.com.txt
vendored
Executable file
|
@ -0,0 +1,4 @@
|
|||
strip: //a[contains(text(), "RELATED:")]
|
||||
author: //div[@class="info"]//span[@class="association printer-source"]
|
||||
author: //div[@class="info"]//span[@class="stamp printer-date"]
|
||||
|
|
@ -30,6 +30,12 @@ strip: //div[contains(@class, 'comment-introduction')]
|
|||
strip: //div[contains(@class, 'share-tools')]
|
||||
strip: //div[@id='also-related-links']
|
||||
|
||||
find_string: http://ichef.bbci.co.uk/news/200/
|
||||
replace_string: http://ichef.bbci.co.uk/news/624/
|
||||
|
||||
find_string: http://ichef.bbci.co.uk/news/304/
|
||||
replace_string: http://ichef.bbci.co.uk/news/624/
|
||||
|
||||
strip_id_or_class: share-help
|
||||
strip_id_or_class: comments_module
|
||||
|
||||
|
|
|
@ -33,6 +33,12 @@ strip: //div[@id='also-related-links']
|
|||
strip_id_or_class: share-help
|
||||
strip_id_or_class: comments_module
|
||||
|
||||
find_string: http://ichef.bbci.co.uk/news/200/
|
||||
replace_string: http://ichef.bbci.co.uk/news/624/
|
||||
|
||||
find_string: http://ichef.bbci.co.uk/news/304/
|
||||
replace_string: http://ichef.bbci.co.uk/news/624/
|
||||
|
||||
replace_string(<noscript>): <div>
|
||||
replace_string(</noscript>): </div>
|
||||
|
||||
|
|
|
@ -3,11 +3,7 @@ title: substring-before(//title, '-')
|
|||
|
||||
author: //a[ contains(@href, '/people') ]
|
||||
|
||||
body: //article[contains(concat(' ',normalize-space(@class),' '),' post ')]
|
||||
|
||||
strip_id_or_class: section learn-more
|
||||
strip_id_or_class: section comments
|
||||
strip_id_or_class: disqus_thread
|
||||
body: //div[ @class='post' ]
|
||||
|
||||
# Date is impossible to retrieve since they use those stupid "fuzzy" dates, inserted through javascript, at posterous.
|
||||
test_url: http://blog.cloudflare.com/understanding-analytics-when-is-a-page-view-n
|
||||
test_url: http://blog.cloudflare.com/understanding-analytics-when-is-a-page-view-n
|
|
@ -1,6 +1,6 @@
|
|||
title: //h3[@class="post-name"]
|
||||
author: //span[@class="user-name"]
|
||||
date: //div[@class="post-date"]
|
||||
date: //div[@class="post-date"]/span[@class="value"]
|
||||
body: //div[@class="post-content user-defined-markup"]
|
||||
footnotes: no
|
||||
test_url: http://blogs.msdn.com/b/b8/archive/2011/10/04/designing-the-start-screen.aspx
|
||||
test_url: http://blogs.msdn.com/b/b8/archive/2011/10/04/designing-the-start-screen.aspx
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
# set body
|
||||
body: //div[@id='theContent']
|
||||
|
||||
# set title
|
||||
title: //div[@id='theContent']/h3
|
||||
strip: //div[@id='theContent']/h3
|
||||
test_url: http://www.brandeins.de/archiv/magazin/gegessen-wird-immer/artikel/hunger.html
|
||||
body: //div[@class="articleTeaser"] | //section[@class="contentSection"]
|
||||
|
||||
strip: //section[@class="greenBox italic"]
|
||||
|
||||
author: //div[@class="articleAuthor"]
|
||||
# no publish date on page (the articles are from a monthly periodical)
|
||||
|
||||
test_url: http://www.brandeins.de/archiv/2015/fuehrung/ministry-group-mach-doch-mal-ne-ansage/
|
||||
|
|
2
inc/3rdparty/site_config/standard/brokernews.com.au.txt
vendored
Executable file
2
inc/3rdparty/site_config/standard/brokernews.com.au.txt
vendored
Executable file
|
@ -0,0 +1,2 @@
|
|||
author: //span[@itemprop="author"]
|
||||
date: //span[@itemprop="datePublished"]
|
17
inc/3rdparty/site_config/standard/business.time.com.txt
vendored
Executable file
17
inc/3rdparty/site_config/standard/business.time.com.txt
vendored
Executable file
|
@ -0,0 +1,17 @@
|
|||
# 2011-10-25 - carlo@... - Initial setup.
|
||||
|
||||
single_page_link: //li[@class='print']/a/@href
|
||||
|
||||
title: //h1
|
||||
author: //meta[@name="byline"]/@content
|
||||
date: //meta[@name="date"]/@content
|
||||
|
||||
strip: //span[@class="see"]
|
||||
strip: //div[@class="byline"]
|
||||
strip: //div[@id="date2"]
|
||||
strip: //h1
|
||||
strip: //div[@class='post-rail-ad']
|
||||
strip: //div[@class='post-rail-content']
|
||||
strip: //aside[@class='post-rail']
|
||||
|
||||
test_url: http://www.time.com/time/specials/packages/article/0,28804,2094921_2094923_2094924,00.html
|
4
inc/3rdparty/site_config/standard/choice.com.au.txt
vendored
Executable file
4
inc/3rdparty/site_config/standard/choice.com.au.txt
vendored
Executable file
|
@ -0,0 +1,4 @@
|
|||
|
||||
body: //div[@id='content']//div[@id='mainBlogContentWrapper']//*[self::p or self::img or self::ul] | //div[@class='mainArticleIntro')]
|
||||
|
||||
date: //span[@class='date']
|
17
inc/3rdparty/site_config/standard/cnet.com.au.txt
vendored
Executable file
17
inc/3rdparty/site_config/standard/cnet.com.au.txt
vendored
Executable file
|
@ -0,0 +1,17 @@
|
|||
title: //meta[@property="og:title"]/@content
|
||||
body: //div[contains(@class, 'postBody')]
|
||||
date: //div[@id='nameAndTime']/time
|
||||
author: //div[@id='nameAndTime']/span[@class='author']
|
||||
|
||||
strip_id_or_class: image-credit
|
||||
strip_id_or_class: noAutolink
|
||||
strip_id_or_class: related
|
||||
strip_id_or_class: cite
|
||||
|
||||
prune: no
|
||||
tidy: no
|
||||
|
||||
# early end
|
||||
replace_string(Download today's podcast</a>): Download today's podcast</a></div></body></html>
|
||||
|
||||
test_url: http://www.cnet.com/8301-13952_1-57367607-81/the-404-981-where-the-world-is-a-vampire-podcast/
|
|
@ -2,7 +2,7 @@ title://h1
|
|||
|
||||
author://div[@id="news-meta"]/a
|
||||
|
||||
body://*[@id="main"]/div[1]
|
||||
body: //div[contains(@class, 'text-content')]
|
||||
|
||||
strip://*[@id="main"]/div[2]
|
||||
strip://*[@id="main"]/div[3]
|
||||
|
@ -15,4 +15,4 @@ strip://img
|
|||
|
||||
#figures are not displayed in instapaper...
|
||||
strip://figure | //figcaption
|
||||
test_url: http://www.computerbase.de/news/2012-06/verbraucherzentrale-mahnt-blizzard-fuer-diablo-3-ab/
|
||||
test_url: http://www.computerbase.de/news/2012-06/verbraucherzentrale-mahnt-blizzard-fuer-diablo-3-ab/
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
title: //meta[@name='headline']/@content
|
||||
body://div[@id="drr-container"]
|
||||
date: //meta[@name='date']/@content
|
||||
author: //meta[@name='author']/@content
|
||||
body: //div[contains(@class, 'article')]
|
||||
body://div[@id="article_body"]
|
||||
|
||||
strip_id_or_class: banner
|
||||
strip: //noscript
|
||||
|
@ -16,4 +19,4 @@ next_page_link://div[@id="next_page"]/a
|
|||
single_page_link: concat('http://www.computerworld.com/s/article/print/', substring-after(//link[@rel='canonical']/@href, '/s/article/'))
|
||||
|
||||
test_url: http://www.computerworld.com/s/article/9224348/Apple_s_new_OS_X_tightens_screws_on_some_malware
|
||||
test_url: http://www.computerworld.com/s/article/9227679/Windows_8_Release_Preview_Updated_but_still_uneasy
|
||||
test_url: http://www.computerworld.com/s/article/9227679/Windows_8_Release_Preview_Updated_but_still_uneasy
|
21
inc/3rdparty/site_config/standard/contrepoints.org.txt
vendored
Executable file
21
inc/3rdparty/site_config/standard/contrepoints.org.txt
vendored
Executable file
|
@ -0,0 +1,21 @@
|
|||
# Contrepoints.org
|
||||
# As of 2015-04, it's a wordpress-powered website.
|
||||
|
||||
title: //h1[contains(concat(' ',normalize-space(@class),' '),' page-title ')]//span[contains(concat(' ',normalize-space(@class),' '),' inner-text ')]
|
||||
date: //time[contains(concat(' ',normalize-space(@class),' '),' art-date ')]
|
||||
author: //h1[contains(concat(' ',normalize-space(@class),' '),' author-name ')]
|
||||
body: //article[contains(concat(' ',normalize-space(@class),' '),' plain-art ')]
|
||||
|
||||
# no toolbar, meta, etc, but misses excerpt
|
||||
# body: //div[contains(concat(' ',normalize-space(@class),' '),' entry ')]
|
||||
|
||||
# Thus, we need to strip useless elements from the "plain-art"
|
||||
strip: //div[contains(concat(' ',normalize-space(@class),' '),' plain-post-topbar ')]
|
||||
strip: //div[contains(concat(' ',normalize-space(@class),' '),' single-type-block ')]
|
||||
strip: //header[contains(concat(' ',normalize-space(@class),' '),' entry-header ')]
|
||||
|
||||
# And no pruning is needed because we stripped unwanted elements.
|
||||
prune: no
|
||||
|
||||
test_url: http://www.contrepoints.org/2015/04/25/205709-leconomie-selon-ray-dalio
|
||||
test_url: http://www.contrepoints.org/2015/04/25/205734-huile-et-gaz-de-schiste-revolution-durable
|
|
@ -1,4 +1,4 @@
|
|||
body: //*[contains(@class,'body')]
|
||||
body: //div[contains(@class,'post-body')]
|
||||
date: //abbr[@class='published']
|
||||
|
||||
test_url: http://www.cooper.com/journal/2012/08/2-weeks-left-to-win-your-way-to-the-woodstock-of-ux-coopers-ux-boot-camp.html/
|
||||
test_url: http://www.cooper.com/journal/2015/6/creating-personas
|
||||
|
|
14
inc/3rdparty/site_config/standard/cwnp.com.txt
vendored
Executable file
14
inc/3rdparty/site_config/standard/cwnp.com.txt
vendored
Executable file
|
@ -0,0 +1,14 @@
|
|||
title: //div[@class='entry-pad']//h2
|
||||
body: //div[contains(concat(' ',normalize-space(@class),' '),' entry-pad ')]
|
||||
strip: //h1
|
||||
strip: //p
|
||||
strip: //h2
|
||||
strip: //div[@class='clear']
|
||||
|
||||
prune: no
|
||||
tidy: no
|
||||
|
||||
autodetect_on_failure: no
|
||||
|
||||
test_url: https://www.cwnp.com/wotd.php
|
||||
test_url: https://www.cwnp.com/qotd.php
|
|
@ -7,6 +7,15 @@ strip_id_or_class: googleAds
|
|||
strip_id_or_class: digg-button
|
||||
strip_id_or_class: article-icon-links-container
|
||||
strip_id_or_class: clickToEnlarge
|
||||
strip_id_or_class: articleIconLinksContainer
|
||||
strip_id_or_class: related-carousel
|
||||
strip_id_or_class: reader-comments
|
||||
strip_id_or_class: most-watched
|
||||
strip_id_or_class: most-read
|
||||
|
||||
find_string:blkBorder img-share
|
||||
replace_string: nothing
|
||||
|
||||
tidy: no
|
||||
|
||||
test_url: http://www.dailymail.co.uk/news/article-1375423/Royal-wedding-Texan-billionaire-Joe-Albritton-invited-Prince-Charles.html
|
5
inc/3rdparty/site_config/standard/dailytelegraph.com.au.txt
vendored
Executable file
5
inc/3rdparty/site_config/standard/dailytelegraph.com.au.txt
vendored
Executable file
|
@ -0,0 +1,5 @@
|
|||
title: //h1[@class="heading"]
|
||||
author: //cite[@class='author']
|
||||
date: //li[contains(@class, 'date-and-time')]
|
||||
|
||||
|
1
inc/3rdparty/site_config/standard/deadspin.com.txt
vendored
Executable file
1
inc/3rdparty/site_config/standard/deadspin.com.txt
vendored
Executable file
|
@ -0,0 +1 @@
|
|||
http_header(user-agent): PHP/5.3
|
13
inc/3rdparty/site_config/standard/derbund.ch.txt
vendored
Executable file
13
inc/3rdparty/site_config/standard/derbund.ch.txt
vendored
Executable file
|
@ -0,0 +1,13 @@
|
|||
# Author: cirnod@gmail.com
|
||||
|
||||
tidy: no
|
||||
prune: no
|
||||
|
||||
body: //div[@id="article"]/h3 | //*[@id="mainContent"]
|
||||
|
||||
# General Cleanup
|
||||
#strip_id_or_class: info_panel
|
||||
|
||||
|
||||
# Try yourself
|
||||
test_url: http://www.derbund.ch/bern/nachrichten/Fossilienforscher-stehen-auf-Heavy-Metal/story/20919522
|
2
inc/3rdparty/site_config/standard/designbuildsource.com.au.txt
vendored
Executable file
2
inc/3rdparty/site_config/standard/designbuildsource.com.au.txt
vendored
Executable file
|
@ -0,0 +1,2 @@
|
|||
date: substring-after(//p[@class='post_date'], 'on')
|
||||
|
|
@ -1,11 +1,9 @@
|
|||
#title: substring(substring-after(//title, ':'), 1, string-length(substring-after(//title, ':')) - 10)
|
||||
title: //div[contains(@class, 'SB_Title')]//a
|
||||
body: //div[contains(@class, 'STR_Image')]
|
||||
body: //*[contains(@class, 'SB_Content')]
|
||||
title: //a[@class="post-title"]/text()
|
||||
title: //meta[@name="twitter:title"]/@content
|
||||
body: //img[@class="img-responsive img-comic"]
|
||||
author: string('Scott Adams')
|
||||
date: //*[contains(@class, 'SB_Detail')]/text()[1]
|
||||
|
||||
date: //meta[@property="article:publish_date"]/@content
|
||||
|
||||
test_url: http://dilbert.com/blog/entry/death_by_hypnosis_or_not/
|
||||
test_url: http://dilbert.com/strips/comic/2013-10-22
|
||||
test_url: http://feed.dilbert.com/dilbert/daily_strip
|
||||
test_url: http://feed.dilbert.com/dilbert/daily_strip
|
||||
|
|
6
inc/3rdparty/site_config/standard/dn.se.txt
vendored
6
inc/3rdparty/site_config/standard/dn.se.txt
vendored
|
@ -15,6 +15,9 @@ strip_id_or_class: hook
|
|||
strip_id_or_class: right
|
||||
strip_id_or_class: footer
|
||||
|
||||
strip_id_or_class: ad-head
|
||||
strip_id_or_class: atc-share-title
|
||||
|
||||
# Other news
|
||||
strip: //div[@id="mirrors"]
|
||||
|
||||
|
@ -25,4 +28,5 @@ author: //div[@id="byline"]/div/p/strong
|
|||
date: substring(substring-after(//p[@class="published"], 'Publicerad '), 0, 11)
|
||||
|
||||
test_url: http://www.dn.se/nyheter/varlden/landade-flygplan-mitt-i-villaomrade
|
||||
test_url: http://www.dn.se/m/rss/senaste-nytt
|
||||
test_contains: Ett tekniskt haveri tvingade
|
||||
test_url: http://www.dn.se/rss/senaste-nytt
|
||||
|
|
4
inc/3rdparty/site_config/standard/economie.gouv.fr.txt
vendored
Executable file
4
inc/3rdparty/site_config/standard/economie.gouv.fr.txt
vendored
Executable file
|
@ -0,0 +1,4 @@
|
|||
body: //div[contains(@class, 'txtVisu')]
|
||||
prune: no
|
||||
|
||||
test_url: http://www.economie.gouv.fr/dgccrf/Publications/Vie-pratique/Fiches-pratiques/Assurance
|
7
inc/3rdparty/site_config/standard/entwickler.de.txt
vendored
Executable file
7
inc/3rdparty/site_config/standard/entwickler.de.txt
vendored
Executable file
|
@ -0,0 +1,7 @@
|
|||
title: //h1[@class="post-title"]
|
||||
body: //section[@class="article-content"]
|
||||
author: //div[@class="post-bottom-meta"]/span[@class="post-author"]
|
||||
date: //div[@class="post-date"]/time/@datetime
|
||||
|
||||
test_url: https://entwickler.de/online/mobile-welt-offline-welt-was-der-offline-first-ansatz-fuer-app-entwickler-heisst-140602.html
|
||||
test_url: https://entwickler.de/online/development/plex-docker-joomla-165345.html
|
4
inc/3rdparty/site_config/standard/explosm.net.txt
vendored
Executable file
4
inc/3rdparty/site_config/standard/explosm.net.txt
vendored
Executable file
|
@ -0,0 +1,4 @@
|
|||
body: //img[@id='main-comic']
|
||||
author: substring(//small[@class="author-credit-name"], 4)
|
||||
|
||||
test_url: http://explosm.net/comics/3954/
|
|
@ -1,12 +1,14 @@
|
|||
body: //div[@id='imagestage']
|
||||
body: //div[contains(@class, 'userContentWrapper')]
|
||||
|
||||
body: //div[@id='m_story_permalink_view' or contains(@data-sigil, 'm-story-view')]
|
||||
strip_id_or_class: commentable
|
||||
strip: //div[contains(@data-sigil, 'm-mentions-expand')]
|
||||
|
||||
prune: no
|
||||
tidy: no
|
||||
|
||||
# single_page_link: replace(substring-after(//noscript//meta[@http-equiv="refresh"]/@content, 'URL='), "&", "&")
|
||||
single_page_link: concat("https://m.", substring-after(//link[@rel="alternate" and @media="handheld"]/@href, "//www."))
|
||||
if_page_contains: //link[@rel="alternate" and @media="handheld"]
|
||||
|
||||
test_url: https://www.facebook.com/permalink.php?story_fbid=10154584776550183&id=294468630182
|
||||
test_contains: holding an extraordinary session in Brussels this month
|
||||
|
|
|
@ -1,16 +1,20 @@
|
|||
title: //h1
|
||||
author: //h5[@class='byline']//a
|
||||
date: //h5[@class='date']
|
||||
body: //figure[@class='node-poster'] | //div[contains(@class, "node-content")]
|
||||
strip_id_or_class: article-top-wrapper
|
||||
strip_id_or_class: footer-message
|
||||
strip_id_or_class: print-logo
|
||||
strip: //cite
|
||||
strip://*[@class='timestamp']
|
||||
strip://div[@id='page_right']
|
||||
strip://section[@id='header_region']
|
||||
strip://h1[@class='node-title']
|
||||
strip://div[@class='node-submitted']
|
||||
strip_id_or_class: skipnav
|
||||
author: //div[@class='byline']//a
|
||||
date: //meta[@property='article:published_time']/@content
|
||||
body: //figure[@class='jumbotron'] | //div[@itemprop='body']
|
||||
|
||||
prune: no
|
||||
|
||||
#strip_id_or_class: article-top-wrapper
|
||||
#strip_id_or_class: footer-message
|
||||
#strip_id_or_class: print-logo
|
||||
#strip: //cite
|
||||
#strip://*[@class='timestamp']
|
||||
#strip://div[@id='page_right']
|
||||
#strip://section[@id='header_region']
|
||||
#strip://h1[@class='node-title']
|
||||
#strip://div[@class='node-submitted']
|
||||
#strip_id_or_class: skipnav
|
||||
|
||||
test_url: http://www.fastcompany.com/3000226/link-between-quietness-and-productivity
|
||||
test_url: http://www.fastcompany.com/3003586/6-simple-rituals-reach-your-potential-every-day
|
||||
test_contains: Some of you may have tried to reach me this morning
|
||||
test_url: http://www.fastcompany.com/3003586/6-simple-rituals-reach-your-potential-every-day
|
||||
|
|
4
inc/3rdparty/site_config/standard/fok.nl.txt
vendored
Executable file
4
inc/3rdparty/site_config/standard/fok.nl.txt
vendored
Executable file
|
@ -0,0 +1,4 @@
|
|||
# skip cookie warning
|
||||
single_page_link: concat(//form/@action, '?allowcookies=yes')
|
||||
|
||||
test_url: http://fok.nl/687116
|
|
@ -6,4 +6,5 @@ strip: //div[contains(@class,"aside")]
|
|||
# remove some SharePoint webpart label junk
|
||||
strip: //div[@id="ctl00_PlaceHolderMain_ArticleLeadField_label"]
|
||||
strip: //div[@id="ctl00_PlaceHolderMain_PublishingPageContentField_label"]
|
||||
test_url: http://forsvaret.no/aktuelt/publisert/nyheter/Sider/F5-fly-til-Skedsmo.aspx
|
||||
test_url: https://forsvaret.no/aktuelt/historisk-medaljeutdeling
|
||||
test_contains: Samarbeidet med Marinen har vært en sann glede
|
||||
|
|
14
inc/3rdparty/site_config/standard/france24.com.txt
vendored
Executable file
14
inc/3rdparty/site_config/standard/france24.com.txt
vendored
Executable file
|
@ -0,0 +1,14 @@
|
|||
# Generated by FiveFilters.org's web-based selection tool
|
||||
# Place this file inside your site_config/custom/ folder
|
||||
# Source: http://siteconfig.fivefilters.org/grab.php?url=http%3A%2F%2Fwww.france24.com%2Fen%2F20150427-togo-gnassingbe-poised-extend-power-election%2F
|
||||
|
||||
body: //article[contains(concat(' ',normalize-space(@class),' '),' article-long ')]//div[contains(concat(' ',normalize-space(@class),' '),' bd ')]
|
||||
title: //h1[@class="title"]
|
||||
author://p[@class="author"]
|
||||
date://p[@class="modification"]
|
||||
|
||||
find_string: <p class="modification">Latest update :
|
||||
replace_string: <p class="modification">
|
||||
|
||||
|
||||
test_url: http://www.france24.com/en/20150427-togo-gnassingbe-poised-extend-power-election/
|
3
inc/3rdparty/site_config/standard/galwayindependent.com.txt
vendored
Executable file
3
inc/3rdparty/site_config/standard/galwayindependent.com.txt
vendored
Executable file
|
@ -0,0 +1,3 @@
|
|||
title: //div[@class='leftCol']/h1
|
||||
|
||||
prune: no
|
|
@ -1,5 +1,7 @@
|
|||
title: //meta[@property="og:title"]/@content
|
||||
body: //div[@id='GBTVPlayer'] | //div[contains(@class, 'col490')]
|
||||
author: //span[contains(concat(' ',normalize-space(@class),' '),' author ')]
|
||||
date: //header[@id='gbArticleHeader']//div//time/@datetime
|
||||
|
||||
prune: no
|
||||
|
||||
|
@ -7,4 +9,4 @@ strip_id_or_class: noprint
|
|||
strip: //div[@id='gbNewsTextContent']/following-sibling::*
|
||||
|
||||
test_url: http://www.gameblog.fr/news/26330-les-sims-3-showtime-s-annonce-en-video
|
||||
test_url: http://www.gameblog.fr/news/26306-mise-a-jour-du-dashboard-de-la-xbox-360-disponible
|
||||
test_url: http://www.gameblog.fr/news/26306-mise-a-jour-du-dashboard-de-la-xbox-360-disponible
|
||||
|
|
|
@ -3,4 +3,6 @@ body: //div[@class="post-body"]
|
|||
# Remove 'content is restricted'
|
||||
strip: //div[@id='agegate_IDHERE']
|
||||
|
||||
http_header(user-agent): PHP/5.3
|
||||
|
||||
test_url: http://gawker.com/#!5782070/russian-bomb-squad-successfully-defuses-sex-toy
|
1
inc/3rdparty/site_config/standard/getpocket.com.txt
vendored
Executable file
1
inc/3rdparty/site_config/standard/getpocket.com.txt
vendored
Executable file
|
@ -0,0 +1 @@
|
|||
http_header(user-agent): PHP/5.3
|
|
@ -1,4 +1,6 @@
|
|||
body: //div[@class="highlight"]/pre
|
||||
|
||||
title: //div[contains(@class,'gist-description')]
|
||||
body: //div[contains(@class,'blob-wrapper')]
|
||||
test_url: https://gist.github.com/staltz/868e7e9bc2a7b8c1f754
|
||||
prune: no
|
||||
tidy: no
|
||||
|
||||
test_url: https://gist.github.com/1258908
|
8
inc/3rdparty/site_config/standard/gizmodo.com.au.txt
vendored
Executable file
8
inc/3rdparty/site_config/standard/gizmodo.com.au.txt
vendored
Executable file
|
@ -0,0 +1,8 @@
|
|||
body: //div[@id='content_post' or @class="post-body" or contains(@class, 'illustration top')]
|
||||
author: (//cite//span[@class="plus-icon"])[1]
|
||||
date: //span[@class="date"]
|
||||
date: //time
|
||||
|
||||
prune: no
|
||||
|
||||
test_url: http://gizmodo.com/5880147/kuhn-rikon-improves-their-spice-grinder-with-grade-school-science
|
|
@ -6,6 +6,8 @@ date: //span[@class="date"]
|
|||
|
||||
prune: no
|
||||
|
||||
http_header(user-agent): PHP/5.3
|
||||
|
||||
test_url: http://gizmodo.com/5880147/kuhn-rikon-improves-their-spice-grinder-with-grade-school-science
|
||||
test_url: http://gizmodo.com/what-van-goghs-paintings-would-look-like-if-they-came-874035680
|
||||
test_url: http://gizmodo.com/vip.xml
|
6
inc/3rdparty/site_config/standard/globalgrind.com.txt
vendored
Executable file
6
inc/3rdparty/site_config/standard/globalgrind.com.txt
vendored
Executable file
|
@ -0,0 +1,6 @@
|
|||
body: //div[contains(@class, 'content-body')]
|
||||
|
||||
prune: no
|
||||
|
||||
test_url: http://globalgrind.com/2015/04/26/listen-jeremih-featuring-chance-the-rapper-the-social-experiment-planes-remix-new-music/
|
||||
test_contains: The Chicago rapper has made a name for himself
|
5
inc/3rdparty/site_config/standard/gocomics.com.txt
vendored
Executable file
5
inc/3rdparty/site_config/standard/gocomics.com.txt
vendored
Executable file
|
@ -0,0 +1,5 @@
|
|||
body: //a[@class="photo"]/img[@class="strip"]
|
||||
author: //meta[@name="author"]/@content
|
||||
date: //meta[@property="gocomics:publish_date"]/@content
|
||||
|
||||
test_url: http://www.gocomics.com/garfield/2015/06/13
|
2
inc/3rdparty/site_config/standard/help.fivefilters.org.txt
vendored
Executable file
2
inc/3rdparty/site_config/standard/help.fivefilters.org.txt
vendored
Executable file
|
@ -0,0 +1,2 @@
|
|||
title: //div[@class="title"]/h3
|
||||
date: substring-after(//div[@class="meta"], ": ")
|
12
inc/3rdparty/site_config/standard/heraldsun.com.au.txt
vendored
Executable file
12
inc/3rdparty/site_config/standard/heraldsun.com.au.txt
vendored
Executable file
|
@ -0,0 +1,12 @@
|
|||
#body: //div[@class='story-body']
|
||||
body: //div[contains(@class, 'story-body')]
|
||||
title: //div[@class='story-headline']//h1
|
||||
author: //cite[contains(@class, 'author')]
|
||||
date: //span[@class='datestamp']
|
||||
|
||||
strip_id_or_class: story-info
|
||||
strip: //div[contains(@class, 'story-promo')]
|
||||
strip: //div[contains(@class, 'story-related')]
|
||||
|
||||
prune: no
|
||||
tidy: no
|
10
inc/3rdparty/site_config/standard/hiiraan.com.txt
vendored
Executable file
10
inc/3rdparty/site_config/standard/hiiraan.com.txt
vendored
Executable file
|
@ -0,0 +1,10 @@
|
|||
# Generated by FiveFilters.org's web-based selection tool
|
||||
# Place this file inside your site_config/custom/ folder
|
||||
# Source: http://siteconfig.fivefilters.org/grab.php?url=http%3A%2F%2Fwww.hiiraan.com%2Fnews%2F2014%2FDec%2Fwararka_maanta20-89428.htm
|
||||
|
||||
body: //div[contains(concat(' ',normalize-space(@class),' '),' single ')]//div[contains(concat(' ',normalize-space(@class),' '),' description ')]
|
||||
|
||||
prune: no
|
||||
|
||||
test_url: http://www.hiiraan.com/news/2014/Dec/wararka_maanta20-89428.htm
|
||||
test_url: http://rss.hiiraan.com/wararka_maanta_rss.xml
|
|
@ -1,9 +1,16 @@
|
|||
title: //meta[@property='og:title']/@content
|
||||
body: //div[contains(@class, 'articleContent')]
|
||||
body: //img[contains(@class, 'FirstImage')] | //div[contains(@class, 'articleContent')]
|
||||
date: //meta[@property='article:published_time']/@content
|
||||
author: //div[@id='main']//div[@class='byline']//span[@class='authorName']
|
||||
|
||||
strip_id_or_class: RelatedArtTag
|
||||
|
||||
strip: //h5[contains(., 'READ MORE:')]
|
||||
strip: //h5[contains(., 'Read more:')]
|
||||
|
||||
tidy: no
|
||||
test_url: http://www.independent.co.uk/news/world/middle-east/syria-could-face-human-rights-probe-2274326.html
|
||||
test_url: http://www.independent.co.uk/news/world/middle-east/syria-could-face-human-rights-probe-2274326.html
|
||||
test_url: http://www.independent.co.uk/voices/comment/robert-fisk-on-the-cia-torture-report-once-again-language-is-distorted-in-order-to-hide-us-state-wrongdoing-9924501.html
|
||||
test_contains: Thank God for Noam Chomsky.
|
||||
|
||||
test_url: http://www.independent.co.uk/news/uk/rss
|
1
inc/3rdparty/site_config/standard/io9.com.txt
vendored
Executable file
1
inc/3rdparty/site_config/standard/io9.com.txt
vendored
Executable file
|
@ -0,0 +1 @@
|
|||
http_header(user-agent): PHP/5.3
|
4
inc/3rdparty/site_config/standard/ippmedia.com.txt
vendored
Executable file
4
inc/3rdparty/site_config/standard/ippmedia.com.txt
vendored
Executable file
|
@ -0,0 +1,4 @@
|
|||
title: //div[@class="content_title"]//h2
|
||||
author: substring-after(//div[@class="byline"], "By ")
|
||||
date: //div[@class="publish_date"]
|
||||
strip: //div[@class="read_image_box"]
|
5
inc/3rdparty/site_config/standard/itnews.com.au.txt
vendored
Executable file
5
inc/3rdparty/site_config/standard/itnews.com.au.txt
vendored
Executable file
|
@ -0,0 +1,5 @@
|
|||
title: //h1[@class='article-header']
|
||||
body: //div[@class='body-content']
|
||||
author: //span[@class='author-byline']/a[contains(@id, 'Author')]
|
||||
|
||||
strip: //span[contains(@id, 'Article_SourceLabel')]
|
|
@ -1,2 +1,5 @@
|
|||
author: //span[@class='plus-icon']
|
||||
|
||||
http_header(user-agent): PHP/5.3
|
||||
|
||||
test_url: http://jalopnik.com/5892124/1955-porsche-550-spyder-sells-for-record-3685-million/
|
1
inc/3rdparty/site_config/standard/jezebel.com.txt
vendored
Executable file
1
inc/3rdparty/site_config/standard/jezebel.com.txt
vendored
Executable file
|
@ -0,0 +1 @@
|
|||
http_header(user-agent): PHP/5.3
|
|
@ -1,2 +1,5 @@
|
|||
author: //span[@class="plus-icon"]
|
||||
|
||||
http_header(user-agent): PHP/5.3
|
||||
|
||||
test_url: http://kotaku.com/5920211/save-the-furries-on-your-wii-in-this-weeks-nintendo-download
|
|
@ -2,7 +2,8 @@ title: //meta[@name='title']/@content
|
|||
author: //span[@class='sign']//a[@class='journaliste']
|
||||
author: //meta[@name='author']/@content
|
||||
body: //*[@id='article']/div[@class='photo'] | //*[@id='article']/h2 | //*[@id='article']/div[@class='texte']
|
||||
date: //time[@pubdate]/@datetime
|
||||
date: //li[contains(concat(' ',normalize-space(@class),' '),' fig-date-pub ')]//time
|
||||
prune: no
|
||||
test_url: http://www.lefigaro.fr/environnement/2011/11/10/01029-20111110ARTFIG00801-la-chine-confrontee-a-un-immense-defi-ecologique.php
|
||||
test_url: http://www.lefigaro.fr/conjoncture/2012/11/20/20002-20121120ARTFIG00609-l-usager-devrait-payer-plus-pour-financer-les-transports.php
|
||||
test_url: http://www.lefigaro.fr/conjoncture/2012/11/20/20002-20121120ARTFIG00609-l-usager-devrait-payer-plus-pour-financer-les-transports.php
|
||||
test_url: http://www.lefigaro.fr/social/2015/03/10/09010-20150310ARTFIG00312-encore-une-annee-noire-pour-l-emploi-salarie.php
|
||||
|
|
|
@ -42,6 +42,12 @@ strip: //p[@class="arrow"]
|
|||
|
||||
# Remove "track" image from article body
|
||||
strip: //img[@alt="track"]
|
||||
|
||||
# Remove hidden URLs
|
||||
strip: //a[@x-inset="hidden"]
|
||||
|
||||
http_header(user-agent): PHP/5.3
|
||||
|
||||
test_url: http://lifehacker.com/5925801/how-can-i-turn-vague-goals-into-actionable-to+dos
|
||||
test_url: http://lifehacker.com/5941600/hack-an-old-computer-mouse-into-a-wireless-bluetooth-mouse
|
||||
test_url: http://lifehacker.com/what-happens-to-the-brain-when-you-meditate-and-how-it-1202533314
|
||||
test_url: http://lifehacker.com/what-happens-to-the-brain-when-you-meditate-and-how-it-1202533314
|
||||
|
|
6
inc/3rdparty/site_config/standard/linuxjournal.com.txt
vendored
Executable file
6
inc/3rdparty/site_config/standard/linuxjournal.com.txt
vendored
Executable file
|
@ -0,0 +1,6 @@
|
|||
body: //div[@class='content-area']
|
||||
next_page_link: //a[@title='Go to next page']
|
||||
author: //a[@title='View user profile.']
|
||||
strip_id_or_class: comments
|
||||
|
||||
test_url: http://www.linuxjournal.com/content/be-mechanicwith-android-and-linux
|
|
@ -3,6 +3,12 @@ body: //div[@class="story-body"]
|
|||
date: //p[@class='date']/strong
|
||||
author: substring-after(//div[@class="story-inner"]/div[@class="byline"]//span[@class='name'], 'By')
|
||||
|
||||
find_string: http://ichef.bbci.co.uk/news/200/
|
||||
replace_string: http://ichef.bbci.co.uk/news/624/
|
||||
|
||||
find_string: http://ichef.bbci.co.uk/news/304/
|
||||
replace_string: http://ichef.bbci.co.uk/news/624/
|
||||
|
||||
strip: //div[@class="story-inner"]/div[@class="byline"]
|
||||
|
||||
test_url: http://m.bbc.co.uk/news/science-environment-19144464
|
12
inc/3rdparty/site_config/standard/m.facebook.com.txt
vendored
Executable file
12
inc/3rdparty/site_config/standard/m.facebook.com.txt
vendored
Executable file
|
@ -0,0 +1,12 @@
|
|||
body: //div[@id='m_story_permalink_view' or contains(@data-sigil, 'm-story-view')]
|
||||
|
||||
title: //div[@id='m_story_permalink_view' or contains(@data-sigil, 'm-story-view')]//h3
|
||||
|
||||
strip_id_or_class: commentable
|
||||
strip: //*[contains(@data-sigil, 'm-mentions-expand') or contains(@data-sigil, 'story-popup-context') or contains(@data-sigil, 'share') or contains(@data-sigil, 'translate')]
|
||||
|
||||
prune: no
|
||||
tidy: no
|
||||
|
||||
test_url: https://www.facebook.com/permalink.php?story_fbid=10154584776550183&id=294468630182
|
||||
test_contains: holding an extraordinary session in Brussels this month
|
4
inc/3rdparty/site_config/standard/m.theregister.co.uk.txt
vendored
Executable file
4
inc/3rdparty/site_config/standard/m.theregister.co.uk.txt
vendored
Executable file
|
@ -0,0 +1,4 @@
|
|||
strip: //div[@class='wptl btm']
|
||||
body: //div[@id='article']//h2 | //div[@id='body']
|
||||
|
||||
test_url: http://m.theregister.co.uk/2015/07/06/geeks_guide_spaceguard_center/
|
1
inc/3rdparty/site_config/standard/marketingmag.com.au.txt
vendored
Executable file
1
inc/3rdparty/site_config/standard/marketingmag.com.au.txt
vendored
Executable file
|
@ -0,0 +1 @@
|
|||
strip: //h3[@class="related-posts"]
|
|
@ -1,4 +1,4 @@
|
|||
body: //div[contains(@class, 'postContent-inner')]
|
||||
body: //div[contains(@class, 'postArticle-content')]
|
||||
strip_id_or_class: supplementalPostContent
|
||||
|
||||
prune: no
|
||||
|
|
|
@ -3,10 +3,5 @@ body: //div[@style="float:left;width:740px;"]
|
|||
|
||||
tidy: no
|
||||
|
||||
test_url: http://www.menshealth.com.sg/fitness/mh-picks-under-armour-clutchfit-nitro-mid-cleats
|
||||
test_contains: These cleats are made for one thing
|
||||
|
||||
test_url: http://www.menshealth.com.sg/fitness/top-10-fat-burning-bodyweight-moves-you-can-do-10-minutes
|
||||
test_contains: let this workout fool you
|
||||
|
||||
test_url: http://www.menshealth.com.sg/fitness/feed
|
||||
# broken feed?
|
||||
test_url: http://www.menshealth.com.sg/fitness/feed
|
||||
|
|
3
inc/3rdparty/site_config/standard/mitchellrepublic.com.txt
vendored
Executable file
3
inc/3rdparty/site_config/standard/mitchellrepublic.com.txt
vendored
Executable file
|
@ -0,0 +1,3 @@
|
|||
body: //div[@class='section']
|
||||
strip_id_or_class: mediumtxt
|
||||
strip: //strong[contains
|
|
@ -1,4 +1,7 @@
|
|||
title: //h1[contains(@class, 'headline')]
|
||||
body: //article[contains(@class, 'full-art')]
|
||||
date: //meta[@name="pdate"]/@content
|
||||
author: //meta[@name="byl"]/@content
|
||||
|
||||
strip_id_or_class: image-credit
|
||||
test_url: http://mobile.nytimes.com/2014/06/19/opinion/gail-collins-romney-and-the-2016-contenders-huddle.html
|
2
inc/3rdparty/site_config/standard/moneymanagement.com.au.txt
vendored
Executable file
2
inc/3rdparty/site_config/standard/moneymanagement.com.au.txt
vendored
Executable file
|
@ -0,0 +1,2 @@
|
|||
date: //span[@class="publishdate"]//time
|
||||
author: //span[@class="byline"]
|
3
inc/3rdparty/site_config/standard/nbnnews.com.au.txt
vendored
Executable file
3
inc/3rdparty/site_config/standard/nbnnews.com.au.txt
vendored
Executable file
|
@ -0,0 +1,3 @@
|
|||
body: //div[contains(concat(' ',normalize-space(@class),' '),' entry ') or contains(@class, 'single-post-thumb')]
|
||||
test_url: http://www.nbnnews.com.au/2015/03/24/lismore-man-will-attempt-to-run-around-australia/
|
||||
test_url: http://www.nbnnews.com.au/category/nthn-rivers-sport/feed/
|
3
inc/3rdparty/site_config/standard/news.com.au.txt
vendored
Executable file
3
inc/3rdparty/site_config/standard/news.com.au.txt
vendored
Executable file
|
@ -0,0 +1,3 @@
|
|||
body: //div[@class='story-body']
|
||||
prune: no
|
||||
tidy: no
|
1
inc/3rdparty/site_config/standard/news.menshealth.com.txt
vendored
Executable file
1
inc/3rdparty/site_config/standard/news.menshealth.com.txt
vendored
Executable file
|
@ -0,0 +1 @@
|
|||
strip: //span[@style="color: #cf1206;"]
|
3
inc/3rdparty/site_config/standard/news.ninemsn.com.au.txt
vendored
Executable file
3
inc/3rdparty/site_config/standard/news.ninemsn.com.au.txt
vendored
Executable file
|
@ -0,0 +1,3 @@
|
|||
strip: //a[@class="contact"]
|
||||
strip: //div[@class="article-media video-item"]
|
||||
date: //div[@class='display-date']
|
7
inc/3rdparty/site_config/standard/parool.nl.txt
vendored
Executable file
7
inc/3rdparty/site_config/standard/parool.nl.txt
vendored
Executable file
|
@ -0,0 +1,7 @@
|
|||
#bypass cookie check
|
||||
single_page_link: //a[contains(@href, '/acceptCookieCheck.do?url=')]
|
||||
|
||||
test_url: http://www.parool.nl/parool/nl/4/AMSTERDAM/article/detail/4042734/2015/05/29/MRSA-bacterie-niet-verder-verspreid-in-Bijlmerbajes.dhtml
|
||||
test_contains: De twee gevangenen die
|
||||
|
||||
test_url: http://www.parool.nl/amsterdam/rss.xml
|
12
inc/3rdparty/site_config/standard/perthnow.com.au.txt
vendored
Executable file
12
inc/3rdparty/site_config/standard/perthnow.com.au.txt
vendored
Executable file
|
@ -0,0 +1,12 @@
|
|||
#body: //div[@class='story-body']
|
||||
body: //div[contains(@class, 'story-body')]
|
||||
title: //div[@class='story-headline']//h1
|
||||
author: //cite[contains(@class, 'author')]
|
||||
date: //span[@class='datestamp']
|
||||
|
||||
strip_id_or_class: story-info
|
||||
strip: //div[contains(@class, 'story-promo')]
|
||||
strip: //div[contains(@class, 'story-related')]
|
||||
|
||||
prune: no
|
||||
tidy: no
|
1
inc/3rdparty/site_config/standard/planetsave.com.txt
vendored
Executable file
1
inc/3rdparty/site_config/standard/planetsave.com.txt
vendored
Executable file
|
@ -0,0 +1 @@
|
|||
strip_id_or_class: author-bio-box
|
11
inc/3rdparty/site_config/standard/presseportal.de.txt
vendored
Executable file
11
inc/3rdparty/site_config/standard/presseportal.de.txt
vendored
Executable file
|
@ -0,0 +1,11 @@
|
|||
body: //div[contains(concat(' ',normalize-space(@class),' '),' story-text ')]
|
||||
|
||||
strip_id_or_class: news-bodycopy
|
||||
|
||||
parser: html5php
|
||||
tidy: no
|
||||
|
||||
test_url: http://www.presseportal.de/pm/103258/2930232/felix-neureuther-vor-der-ski-wm-ich-denke-von-rennen-zu-rennen
|
||||
test_url: http://www.presseportal.de/pm/66749/2933779/koelner-stadt-anzeiger-bahnmitarbeiter-werden-in-nrw-immer-haeufiger-angegriffen-zahl-der/rss
|
||||
test_contains: kleineren Bahnhöfen installieren und erhofft
|
||||
test_url: http://www.presseportal.de/rss/presseportal.rss2
|
18
inc/3rdparty/site_config/standard/quora.com.txt
vendored
18
inc/3rdparty/site_config/standard/quora.com.txt
vendored
|
@ -1,8 +1,10 @@
|
|||
tidy: no
|
||||
prune: no
|
||||
body: //div[contains(@class, 'main_col')]
|
||||
title: //h1
|
||||
body: //div[contains(concat(' ',normalize-space(@class),' '),' Answer ')] | //div[contains(concat(' ',normalize-space(@class),' '),' header ')] | //div[contains(concat(' ',normalize-space(@class),' '),' AnswerWikiArea ')] | //hr
|
||||
#body: //div[contains(@class, 'main_col')]
|
||||
|
||||
strip_id_or_class: AnswerFooter
|
||||
strip_id_or_class: ActionBar
|
||||
strip_id_or_class: hidden
|
||||
strip_id_or_class: item_action_bar
|
||||
strip_id_or_class: answer_voters
|
||||
|
@ -13,5 +15,15 @@ strip_id_or_class: view_tag
|
|||
strip_id_or_class: include_details
|
||||
strip_id_or_class: sig_edit
|
||||
strip_id_or_class: profile_photo_img
|
||||
strip_id_or_class: question_text_icons
|
||||
|
||||
test_url: http://www.quora.com/What-everyday-habit-do-you-wish-you-had-developed-earlier-in-life
|
||||
# insert hr between answers
|
||||
find_string: <div class="Answer"
|
||||
replace_string: <hr /><div class="Answer"
|
||||
|
||||
test_url: http://www.quora.com/What-everyday-habit-do-you-wish-you-had-developed-earlier-in-life
|
||||
test_contains: Please provide a specific practical/measurable action-based everyday
|
||||
test_contains: Exercise every day
|
||||
|
||||
test_url: http://www.quora.com/What-is-the-greatest-illusion-in-life
|
||||
test_contains: What is the greatest illusion in life?
|
||||
|
|
|
@ -7,9 +7,7 @@ author: //p[@class="tagline"]/a
|
|||
# this doesn't work for some reason...?
|
||||
date: //p[@class="tagline"]//@datetime
|
||||
|
||||
#body: (//div[contains(@class, 'noncollapsed')]//div[contains(@class, 'usertext-body')])[1]
|
||||
|
||||
body: //div[contains(concat(' ',normalize-space(@class),' '),' usertext-body ') and (contains(concat(' ',normalize-space(@class),' '),' may-blank-within ')) and (contains(concat(' ',normalize-space(@class),' '),' md-container '))]//div[contains(concat(' ',normalize-space(@class),' '),' md ')]
|
||||
body: (//div[contains(@class, 'noncollapsed')]//div[contains(@class, 'usertext-body')])[1]
|
||||
|
||||
strip_id_or_class: tagline
|
||||
strip_id_or_class: unvotable-message
|
||||
|
@ -20,4 +18,4 @@ single_page_link: //p[@class="title"]/a[contains(@href, 'http://')]
|
|||
|
||||
test_url: http://www.reddit.com/r/truegaming/comments/wfe7r/i_wrote_about_the_problems_i_honestly_feel_that/
|
||||
test_url: http://www.reddit.com/r/worldnews/comments/1as37r/twelve_north_korean_soldiers_attempting_to_defect/
|
||||
test_url: http://www.reddit.com/r/WritingPrompts/comments/2786lw/wp_in_a_world_where_puns_are_illegal_one_man/chybk8e
|
||||
test_url: http://www.reddit.com/r/WritingPrompts/comments/2786lw/wp_in_a_world_where_puns_are_illegal_one_man/chybk8e
|
|
@ -1,8 +1,10 @@
|
|||
title: //h2
|
||||
|
||||
strip: //div[ contains(@class, 'respond') ] | //h2 | //h1
|
||||
strip_id_or_class: social
|
||||
strip_id_or_class: dd_post_share
|
||||
|
||||
date: substring-after(//p[@class='info'], ' on ')
|
||||
|
||||
author: //p[@class='info']//a
|
||||
test_url: http://www.rockpapershotgun.com/2010/07/29/rps-half-verdict-starcraft-2/
|
||||
test_url: http://www.rockpapershotgun.com/2010/07/29/rps-half-verdict-starcraft-2/
|
||||
|
|
11
inc/3rdparty/site_config/standard/saadaalnews.net.txt
vendored
Executable file
11
inc/3rdparty/site_config/standard/saadaalnews.net.txt
vendored
Executable file
|
@ -0,0 +1,11 @@
|
|||
body: //div[contains(@class, 'section-content-left')]
|
||||
|
||||
strip_id_or_class: related
|
||||
strip_id_or_class: nocontent
|
||||
strip_id_or_class: comment
|
||||
strip_id_or_class: widget
|
||||
strip_id_or_class: respond
|
||||
strip: //h3[.='Comments']
|
||||
strip: //p[.='comments']
|
||||
|
||||
test_url: http://saadaalnews.net/?p=42624
|
14
inc/3rdparty/site_config/standard/smh.com.au.txt
vendored
Executable file
14
inc/3rdparty/site_config/standard/smh.com.au.txt
vendored
Executable file
|
@ -0,0 +1,14 @@
|
|||
body: //div[@id='content']
|
||||
title: //h1[@class='cN-headingPage']
|
||||
author: //h3[@class='authorName']
|
||||
date: //dd[@class='updated dtstamp']
|
||||
|
||||
strip: //ul[@class='social sponsored cfix']
|
||||
strip: //div[contains(@class, 'hiddenVisually')]
|
||||
strip: //dd[@class='updated dtstamp']
|
||||
strip: //h3[@class='authorName']
|
||||
strip: //ul[@class='social cfix']
|
||||
strip: //div[contains(@id, 'adspot')]
|
||||
|
||||
strip: //div[contains(@class, 'overlayPlayCountdown')]
|
||||
strip: //div[@class='fdVideoWof']//span[@class='gone']
|
13
inc/3rdparty/site_config/standard/smh.drive.com.au.txt
vendored
Executable file
13
inc/3rdparty/site_config/standard/smh.drive.com.au.txt
vendored
Executable file
|
@ -0,0 +1,13 @@
|
|||
body: //div[@id='content']
|
||||
title: //h1[@class='cN-headingPage']
|
||||
author: //h3[@class='authorName']
|
||||
date: //dd[@class='updated dtstamp']
|
||||
|
||||
strip: //ul[@class='social sponsored cfix']
|
||||
strip: //div[contains(@class, 'hiddenVisually')]
|
||||
strip: //dd[@class='updated dtstamp']
|
||||
strip: //h3[@class='authorName']
|
||||
strip: //ul[@class='social cfix']
|
||||
strip: //div[contains(@id, 'adspot')]
|
||||
|
||||
test_url: http://smh.drive.com.au/roads-and-traffic/driver-distraction-responsible-for-more-car-crashes-than-alcohol-20130503-2iyg0.html
|
|
@ -7,6 +7,9 @@ body://div[@id = 'article-body']
|
|||
# full content
|
||||
single_page_link://td/li[@class = 'article-singlepage']/a
|
||||
|
||||
# continue link
|
||||
single_page_link: //a[@id='continue-btn']
|
||||
|
||||
# caption clean up
|
||||
wrap_in(i)://span[@class='articleImageCaptionwide']
|
||||
move_into (//span[@class='articleImageCaptionwide'])://div[@id = 'articleImage']/p
|
||||
|
@ -17,4 +20,4 @@ strip://p[@id = 'articlePaginationWrapper']
|
|||
strip://ul[contains(@class, 'cat-breadcrumb')]
|
||||
strip://div [@class= 'viewMorePhotos']
|
||||
|
||||
test_url: http://www.smithsonianmag.com/history-archaeology/The-Goddess-Goes-Home.html
|
||||
test_url: http://www.smithsonianmag.com/history-archaeology/The-Goddess-Goes-Home.html
|
||||
|
|
3
inc/3rdparty/site_config/standard/snip.ly.txt
vendored
Executable file
3
inc/3rdparty/site_config/standard/snip.ly.txt
vendored
Executable file
|
@ -0,0 +1,3 @@
|
|||
single_page_link: //meta[@property="og:url"]/@content
|
||||
|
||||
test_url: http://snip.ly/qa1R
|
3
inc/3rdparty/site_config/standard/soundcity.tv.txt
vendored
Executable file
3
inc/3rdparty/site_config/standard/soundcity.tv.txt
vendored
Executable file
|
@ -0,0 +1,3 @@
|
|||
strip_id_or_class: sharing
|
||||
|
||||
test_url: http://soundcity.tv/feed/
|
25
inc/3rdparty/site_config/standard/spiegel.de.txt
vendored
25
inc/3rdparty/site_config/standard/spiegel.de.txt
vendored
|
@ -8,6 +8,9 @@
|
|||
# - Fixed single_page_link
|
||||
# - Included intro text in single page view
|
||||
# - Added body in default view
|
||||
# stesie@
|
||||
# - removed copyright box
|
||||
# - removed "print more" box
|
||||
|
||||
# set body
|
||||
tidy: no
|
||||
|
@ -15,6 +18,7 @@ tidy: no
|
|||
body: //div[@id="spArticleContent"]
|
||||
# body in default view
|
||||
body: //div[@id="spArticleSection"]
|
||||
body: //div[contains(@class, 'article-section')] | //div[@id='js-article-top-wide-asset'] | //p[contains(@class, 'article-intro')] | //div[contains(@class, 'js-module-box-image')]
|
||||
# body in "Fotostrecke"
|
||||
body: //div[@id="spBigaContent"]
|
||||
|
||||
|
@ -25,6 +29,8 @@ strip: //div[@id="spArticleContent"]/h3
|
|||
# set date in "Fotostrecke"
|
||||
date: //div[@id="spBigaDatum"]
|
||||
|
||||
# title in default view
|
||||
title: //h2[contains(@class, 'article-title')]
|
||||
#set title in single page view
|
||||
title: //div[@id='spArticleContent']/h2
|
||||
# strip title
|
||||
|
@ -49,7 +55,7 @@ strip: //*/div[@class='spCredit']/following-sibling::p
|
|||
strip: //div[@class='spMInline']
|
||||
|
||||
# remove photogalleries and extras
|
||||
strip: //div[@class='spPhotoGallery']
|
||||
strip: //div[contains(@class, 'spPhotoGallery')]
|
||||
strip: //div[@class='spPhotoGallery']/following-sibling::br
|
||||
strip: //div[@class='spAssetAlignleft']
|
||||
strip: //div[contains(@class,'spAsset')]
|
||||
|
@ -67,9 +73,24 @@ strip: //div[@id='spBigaLatestEntries']
|
|||
strip: //div[contains(@class, 'spBigaNavi')]
|
||||
strip: //div[@class='spDottedLine']
|
||||
|
||||
strip: //div[@class='asset-box article-print-more']
|
||||
strip: //div[@class='article-copyright']
|
||||
strip: //span[@class='image-buttons']
|
||||
|
||||
# Use link to print article for single page view
|
||||
single_page_link: //a[contains(@href, '-druck')]
|
||||
if_page_contains: //div[contains(@class, 'multi-pager-control')]
|
||||
|
||||
# Clean up title in print view
|
||||
find_string: <title>Druckversion -
|
||||
replace_string: <title>
|
||||
|
||||
# use next link in "Fotostrecke"
|
||||
next_page_link: //a[@class='spBigaControlForw']
|
||||
test_url: http://www.spiegel.de/politik/deutschland/0,1518,787602,00.html
|
||||
test_url: http://www.spiegel.de/politik/deutschland/0,1518,787602,00.html
|
||||
|
||||
# regular article
|
||||
test_url: http://www.spiegel.de/wirtschaft/soziales/griechenland-was-den-griechischen-buergern-nun-droht-a-1042682.html
|
||||
|
||||
# multipage article
|
||||
test_url: http://www.spiegel.de/spiegel/a-710880.html
|
24
inc/3rdparty/site_config/standard/srf.ch.txt
vendored
Executable file
24
inc/3rdparty/site_config/standard/srf.ch.txt
vendored
Executable file
|
@ -0,0 +1,24 @@
|
|||
# Author: cirnod@gmail.com
|
||||
|
||||
tidy: no
|
||||
prune: no
|
||||
|
||||
body: //div[@id="article-content"]/p | //div[@class="main-article-content clearfix"]
|
||||
|
||||
# General Cleanup
|
||||
strip_id_or_class: offscreen
|
||||
strip_id_or_class: video-description
|
||||
strip_id_or_class: v2 big-video
|
||||
strip_id_or_class: module smb freetext
|
||||
strip_id_or_class: asset span3
|
||||
strip_id_or_class: module smb related-links
|
||||
|
||||
# fix image-galleries
|
||||
strip_id_or_class: module lightbox-gallery image hide
|
||||
replace_string(width="624"): width="100%"
|
||||
replace_string(height="468"): height="%"
|
||||
|
||||
# Try yourself
|
||||
test_url: http://www.srf.ch/news/wirtschaft/weltbank-korrigiert-konjunktur-erwartungen-nach-unten
|
||||
test_url: http://www.srf.ch/news/wirtschaft/ural-statt-alpen-russische-touristen-bleiben-zuhause
|
||||
test_url: http://www.srf.ch/news/international/zwei-schweizer-bei-blutigem-attentat-in-mali-verletzt
|
|
@ -1,6 +1,6 @@
|
|||
# 2012-12-04: complete rewrite after Süddeutsche.de relaunch - carlo@...
|
||||
|
||||
single_page_link: //a[ contains( @href, "/2.220/" ) ]
|
||||
single_page_link: //li[@id="article-sidebar-action-print"]/@data-clickurl
|
||||
|
||||
body: //article[@id="sitecontent"]/section[@class="body"]
|
||||
author: //address[@class="author"]
|
||||
|
|
10
inc/3rdparty/site_config/standard/sunshinecoastdaily.com.au.txt
vendored
Executable file
10
inc/3rdparty/site_config/standard/sunshinecoastdaily.com.au.txt
vendored
Executable file
|
@ -0,0 +1,10 @@
|
|||
body: //section//article//p
|
||||
|
||||
strip: //aside
|
||||
strip: //div[@class='margin-top-15']
|
||||
strip: //p[@class='tags']
|
||||
|
||||
author: //span[@class='byline']//ul[@class='piped']//li[1]
|
||||
date: //span[@class='byline']//ul[@class='piped']//li[2]
|
||||
|
||||
parser: html5lib
|
2
inc/3rdparty/site_config/standard/sz.de.txt
vendored
2
inc/3rdparty/site_config/standard/sz.de.txt
vendored
|
@ -1,6 +1,6 @@
|
|||
# 2012-12-04: complete rewrite after Süddeutsche.de relaunch - carlo@...
|
||||
|
||||
single_page_link: //a[ contains( @href, "/2.220/" ) ]
|
||||
single_page_link: //li[@id="article-sidebar-action-print"]/@data-clickurl
|
||||
|
||||
body: //article[@id="sitecontent"]/section[@class="body"]
|
||||
author: //address[@class="author"]
|
||||
|
|
14
inc/3rdparty/site_config/standard/tagesanzeiger.ch.txt
vendored
Executable file
14
inc/3rdparty/site_config/standard/tagesanzeiger.ch.txt
vendored
Executable file
|
@ -0,0 +1,14 @@
|
|||
# Author: cirnod@gmail.com
|
||||
|
||||
tidy: no
|
||||
prune: no
|
||||
|
||||
body: //div[@id="article"]/h3 | //*[@id="mainContent"]
|
||||
|
||||
# General Cleanup
|
||||
#strip_id_or_class: info_panel
|
||||
|
||||
|
||||
# Try yourself
|
||||
test_url: http://www.tagesanzeiger.ch/zuerich/stadt/Nach-spektakulaerer-Abseilaktion-verhaftet/story/18039895
|
||||
test_url: http://www.tagesanzeiger.ch/ausland/naher-osten-und-afrika/IS-zerstoert-auch-das-antike-Hatra/story/19865699
|
|
@ -1,23 +1,14 @@
|
|||
title://h1[1]
|
||||
body: //div[contains(@class, 'sectionArticle')]//div[contains(concat(' ',normalize-space(@class),' '),' box ')]
|
||||
|
||||
author: substring-after(//em, 'Von ')
|
||||
author:string('tagesschau.de')
|
||||
strip_id_or_class: infokasten
|
||||
strip_id_or_class: teaserImTeaser
|
||||
strip_id_or_class: Comments
|
||||
strip_id_or_class: mediaInfo
|
||||
strip: //div[contains(@class, 'mediaCon')]//iframe
|
||||
|
||||
date:substring-after(//div[@class='standDatum'], 'Stand: ')
|
||||
prune: no
|
||||
|
||||
body://div[contains(@class, 'article')] | //div[contains(@class, 'centerCol')]
|
||||
test_url: http://www.tagesschau.de/ausland/snowden-dateien-entschluesselung-101.html
|
||||
test_contains: Snowden hatte zunächst für
|
||||
|
||||
strip://h1[1]
|
||||
strip: //div[contains(@class, 'directLinks')]
|
||||
strip: //div[contains(@class, 'zitatBox')]
|
||||
strip: //div[contains(@class, 'teaserBox metaBlock')]
|
||||
strip: //*[contains(@class, 'inv')]
|
||||
strip: //span[@class='imgSubline']
|
||||
strip: //*[contains(@class, 'topline')][1]
|
||||
strip: //div[@id='rightCol'][1]
|
||||
strip: //div[@id="footer"][1]
|
||||
strip: //div[@class="fPlayer"]
|
||||
strip: //div[@id='seitenanfang']
|
||||
strip: //div[@class='standDatum']
|
||||
strip: //em
|
||||
test_url: http://www.tagesschau.de/ausland/wahlkampffrankreich102.html
|
||||
test_url: http://www.tagesschau.de/xml/rss2
|
||||
|
|
3
inc/3rdparty/site_config/standard/taz.de.txt
vendored
3
inc/3rdparty/site_config/standard/taz.de.txt
vendored
|
@ -1,8 +1,9 @@
|
|||
date: //div[@class='secthead']
|
||||
body: //div[@class='sectbody']
|
||||
body: (//div[@class='sectbody'])[1]
|
||||
title: concat(//div[@class='sectbody']/h4,': ',//div[@class='sectbody']/h1)
|
||||
author: //span[@class='author']
|
||||
strip: //p[@class='caption']
|
||||
strip_id_or_class: ad_bin
|
||||
strip_id_or_class: rack
|
||||
|
||||
test_url: http://www.taz.de/Protestbewegung-Occupy/!80188/
|
|
@ -1,5 +1,6 @@
|
|||
title: //div[contains(@class, 'articleHead')]//h1
|
||||
|
||||
body: //div[@itemprop='articleBody']
|
||||
body: //div[@class='articleText']
|
||||
body: //div[@class='articleContent']
|
||||
body: //div[@id='article']
|
||||
|
@ -13,10 +14,14 @@ strip: //p[contains(., 'This article available online at:')]
|
|||
strip: //p[contains(., 'This article available online at:')]/following::*
|
||||
strip: //div[@class='earthbox']
|
||||
|
||||
single_page_link: //article//a[contains(@class, 'print')]
|
||||
single_page_link: //div[contains(@class, 'article-tools')]//a[contains(@class, 'print')]
|
||||
|
||||
native_ad_clue: //meta[@property="og:url" and contains(@content, '/sponsored/')]
|
||||
|
||||
#multi-page article
|
||||
test_url: http://www.theatlantic.com/magazine/archive/2014/12/the-real-roots-of-midlife-crisis/382235/
|
||||
test_contains: The curve tends to evince itself
|
||||
|
||||
test_url: http://www.theatlantic.com/technology/archive/2011/04/want-to-see-how-crazy-a-bot-run-market-can-be/237773/
|
||||
test_url: http://www.theatlantic.com/magazine/archive/2007/11/the-autumn-of-the-multitaskers/6342/
|
||||
test_url: http://www.theatlantic.com/entertainment/archive/2012/04/30-rock-live-a-funny-reminder-of-why-sitcoms-arent-shot-live-anymore/256447/
|
||||
test_url: http://www.theatlantic.com/entertainment/archive/2012/04/30-rock-live-a-funny-reminder-of-why-sitcoms-arent-shot-live-anymore/256447/
|
||||
|
|
6
inc/3rdparty/site_config/standard/theaustralian.com.au.txt
vendored
Executable file
6
inc/3rdparty/site_config/standard/theaustralian.com.au.txt
vendored
Executable file
|
@ -0,0 +1,6 @@
|
|||
body: //div[contains(@class, 'story-body')]
|
||||
author: //cite[contains(@class, 'author')]
|
||||
date: //span[@class='datestamp']
|
||||
|
||||
strip: //div[@class='story-info']
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
title: //meta[@name='og:title']/@content
|
||||
date: //meta[@name='created']/@content
|
||||
body: //div[@class="StoryBody" or @class="storyTeaser"]
|
||||
body: //div[contains(@class, "article-body")]
|
||||
|
||||
replace_string(<p></p>): <br /><br />
|
||||
|
||||
test_url: http://www.thebostonchannel.com/slideshow/news/28210648/detail.html
|
||||
test_url: http://www.wcvb.com/news/2-teens-arrested-in-fatal-dorchester-shooting-of-16yearold-boy/33564886
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
title: //div[@id='main-article-info']//h1
|
||||
body: //div[@id='article-wrapper']
|
||||
body: //figure[contains(@itemprop, "associatedMedia")] | //div[contains(@itemprop, "articleBody")]
|
||||
date: //li[@class='publication']//time[@pubdate] | //li[@class='publication']//data[@pubdate]
|
||||
strip: //div[contains(@class, 'email-subscription')]
|
||||
strip: //div[contains(@class, 'kindleWidget')]
|
||||
|
@ -11,6 +11,8 @@ native_ad_clue: //meta[@property="video:tag" and contains(@content, "Partner zon
|
|||
prune: no
|
||||
tidy: no
|
||||
|
||||
strip_id_or_class: -expand-
|
||||
|
||||
test_url: http://www.theguardian.com/world/2013/oct/04/nsa-gchq-attack-tor-network-encryption
|
||||
test_contains: The National Security Agency has made repeated attempts to develop
|
||||
test_contains: The agency did not directly address those questions, instead providing a statement.
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
# Updated 25-Jan-2014
|
||||
single_page_link: //a[contains(@href, '/Print/')]
|
||||
single_page_link: //link[contains(@href, 'm.theregister')]
|
||||
if_page_contains: //div[@id='nextpage']
|
||||
strip: //div[@class='wptl btm']
|
||||
body: //div[contains(@class,'article_head')]//h2 | //div[@id='body']
|
||||
|
||||
title: //div[@id="article"]/h2
|
||||
author: //p[@class="byline"]/a
|
||||
date: //p[@class="dateline"]/a[last()]
|
||||
|
||||
test_url: http://www.theregister.co.uk/2014/01/24/thirty_years_of_the_apple_macintosh_part_2/
|
||||
#multipage
|
||||
test_url: http://www.theregister.co.uk/2015/07/06/geeks_guide_spaceguard_center/
|
||||
#singlepage
|
||||
test_url: http://www.theregister.co.uk/2015/07/06/us_japan_massive_robots_in_the_ring/
|
||||
|
|
|
@ -33,6 +33,8 @@ strip_id_or_class: m-ad
|
|||
strip_id_or_class: social-sharing
|
||||
strip_id_or_class: m-video-entry__excerpt
|
||||
strip_id_or_class: hidden
|
||||
strip_id_or_class: m-article__follow-bar
|
||||
strip_id_or_class: m-article__share-buttons
|
||||
|
||||
replace_string(<noscript>): <div>
|
||||
replace_string(</noscript>): </div>
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue