diff options
author | Manuel Graf <postmaster@grafoo.at> | 2021-01-10 23:06:29 +0100 |
---|---|---|
committer | Manuel Graf <postmaster@grafoo.at> | 2021-01-10 23:39:52 +0100 |
commit | 7cc673178f3c44d2509d823ea19171e2288308f9 (patch) | |
tree | 90d32b1c48f36c4955617615896eeff533bf947b | |
download | transfeed-7cc673178f3c44d2509d823ea19171e2288308f9.tar.gz |
Initial commit
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | LICENSE | 5 | ||||
-rw-r--r-- | README.md | 37 | ||||
-rw-r--r-- | derstandard.at/international/asien/hongkong/stylesheet.xml | 22 | ||||
-rwxr-xr-x | generate | 15 |
5 files changed, 81 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fbd9934 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +cookies +feed.xml diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..29c6d39 --- /dev/null +++ b/LICENSE @@ -0,0 +1,5 @@ +Copyright (C) 2021 by Manuel Graf <postmaster@grafoo.at> + +Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..81fea49 --- /dev/null +++ b/README.md @@ -0,0 +1,37 @@ +# About + +Generate atom feeds for websites and other stuff without syndication support. + +# Extract firefox cookies + +Create a `cookie` file which later can be used with `curl` to pass things like e.g. privacy walls. + +```sh +sqlite3 -separator $'\t' $(find ~/.mozilla/firefox/*.default-release -name cookies.sqlite) <<EOF >cookies +.mode tabs +.header off +select host, +case substr(host,1,1)='.' when 0 then 'FALSE' else 'TRUE' end, +path, +case isSecure when 0 then 'FALSE' else 'TRUE' end, +expiry, +name, +value +from moz_cookies; +EOF +``` + +# Usage + +`./generate derstandard.at/international/asien/hongkong/stylesheet.xml` + +# References + +- https://www.w3schools.com/xml/xsl_intro.asp +- https://validator.w3.org/feed/docs/atom.html + +# Todos + +## Add entry content + +`curl -sL -b cookies https://www.derstandard.at/story/2000122974234/hongkonger-demokratieaktivisten-wong-droht-weitere-strafe|xmllint --html --xpath '//main//article[@class="story-article"]//div[@class="article-body"]' /dev/stdin` diff --git a/derstandard.at/international/asien/hongkong/stylesheet.xml b/derstandard.at/international/asien/hongkong/stylesheet.xml new file mode 100644 index 0000000..4f4ac01 --- /dev/null +++ b/derstandard.at/international/asien/hongkong/stylesheet.xml @@ -0,0 +1,22 @@ +<?xml version="1.0" encoding="UTF-8"?> +<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> + <xsl:template match="/main"> + <feed xmlns="http://www.w3.org/2005/Atom"> + <id>https://grafoo.tilde.institute/transfeed/derstandard.at/international/asien/hongkong/feed.xml</id> + <title>derStandard.at - <xsl:value-of select="normalize-space(//header/h1)"/></title> + <!-- <title><xsl:value-of select="normalize-space(//head//meta[@name='title']/@content)"/></title> --> + <updated><xsl:value-of select="normalize-space(//div[@class='lastmodified']//time/@datetime)"/></updated> + <icon>https://at.staticfiles.at/img/appicons/dst-60@2x-14f7371c73.png</icon> + <logo>https://at.staticfiles.at/img/meta/meta_image_1200x630-4d0796cf00.png</logo> + <xsl:for-each select="//section"> + <entry> + <id><xsl:value-of select="generate-id(article)"/></id> + <title><xsl:value-of select="normalize-space(article//header/h1[@class='teaser-title'])"/></title> + <updated><xsl:value-of select="normalize-space(article//header/time/@datetime)"/></updated> + <link rel="alternate"><xsl:attribute name="href">https://www.derstandard.at<xsl:value-of select="article//a/@href"/></xsl:attribute></link> + <summary><xsl:value-of select="normalize-space(article//header/p[@class='teaser-subtitle'])"/></summary> + </entry> + </xsl:for-each> + </feed> + </xsl:template> +</xsl:stylesheet> diff --git a/generate b/generate new file mode 100755 index 0000000..0d5705d --- /dev/null +++ b/generate @@ -0,0 +1,15 @@ +#!/bin/sh +test $# -ne 1 && + { + echo "Usage: ${0} <xslt-stylesheet-file>" + exit 2 + } + +echo "$1" | grep 'derstandard.at/international/asien/hongkong' -q && url='https://www.derstandard.at/international/asien/hongkong' +# TODO: Add option to aggregate older articles using e.g. https://www.derstandard.at/international/asien/hongkong/2020/1/1 + +curl -sL -b cookies "$url" | + xmllint --html --xpath '//main' /dev/stdin 2>/dev/null | + xsltproc "$1" /dev/stdin | + xmllint --format /dev/stdin \ + >"$(dirname "$1")/feed.xml" |