about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorManuel Graf <postmaster@grafoo.at>2021-01-10 23:06:29 +0100
committerManuel Graf <postmaster@grafoo.at>2021-01-10 23:39:52 +0100
commit7cc673178f3c44d2509d823ea19171e2288308f9 (patch)
tree90d32b1c48f36c4955617615896eeff533bf947b
downloadtransfeed-7cc673178f3c44d2509d823ea19171e2288308f9.tar.gz
Initial commit
-rw-r--r--.gitignore2
-rw-r--r--LICENSE5
-rw-r--r--README.md37
-rw-r--r--derstandard.at/international/asien/hongkong/stylesheet.xml22
-rwxr-xr-xgenerate15
5 files changed, 81 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..fbd9934
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+cookies
+feed.xml
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..29c6d39
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,5 @@
+Copyright (C) 2021 by Manuel Graf <postmaster@grafoo.at>
+
+Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..81fea49
--- /dev/null
+++ b/README.md
@@ -0,0 +1,37 @@
+# About
+
+Generate atom feeds for websites and other stuff without syndication support.
+
+# Extract firefox cookies
+
+Create a `cookie` file which later can be used with `curl` to pass things like e.g. privacy walls.
+
+```sh
+sqlite3 -separator $'\t' $(find ~/.mozilla/firefox/*.default-release -name cookies.sqlite) <<EOF >cookies
+.mode tabs
+.header off
+select host,
+case substr(host,1,1)='.' when 0 then 'FALSE' else 'TRUE' end,
+path,
+case isSecure when 0 then 'FALSE' else 'TRUE' end,
+expiry,
+name,
+value
+from moz_cookies;
+EOF
+```
+
+# Usage
+
+`./generate derstandard.at/international/asien/hongkong/stylesheet.xml`
+
+# References
+
+- https://www.w3schools.com/xml/xsl_intro.asp
+- https://validator.w3.org/feed/docs/atom.html
+
+# Todos
+
+## Add entry content 
+
+`curl -sL -b cookies https://www.derstandard.at/story/2000122974234/hongkonger-demokratieaktivisten-wong-droht-weitere-strafe|xmllint --html --xpath '//main//article[@class="story-article"]//div[@class="article-body"]' /dev/stdin`
diff --git a/derstandard.at/international/asien/hongkong/stylesheet.xml b/derstandard.at/international/asien/hongkong/stylesheet.xml
new file mode 100644
index 0000000..4f4ac01
--- /dev/null
+++ b/derstandard.at/international/asien/hongkong/stylesheet.xml
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+  <xsl:template match="/main">
+    <feed xmlns="http://www.w3.org/2005/Atom">
+      <id>https://grafoo.tilde.institute/transfeed/derstandard.at/international/asien/hongkong/feed.xml</id>
+      <title>derStandard.at - <xsl:value-of select="normalize-space(//header/h1)"/></title>
+      <!-- <title><xsl:value-of select="normalize-space(//head//meta[@name='title']/@content)"/></title> -->
+      <updated><xsl:value-of select="normalize-space(//div[@class='lastmodified']//time/@datetime)"/></updated>
+      <icon>https://at.staticfiles.at/img/appicons/dst-60@2x-14f7371c73.png</icon>
+      <logo>https://at.staticfiles.at/img/meta/meta_image_1200x630-4d0796cf00.png</logo>
+      <xsl:for-each select="//section">
+        <entry>
+          <id><xsl:value-of select="generate-id(article)"/></id>
+          <title><xsl:value-of select="normalize-space(article//header/h1[@class='teaser-title'])"/></title>
+	  <updated><xsl:value-of select="normalize-space(article//header/time/@datetime)"/></updated>
+	  <link rel="alternate"><xsl:attribute name="href">https://www.derstandard.at<xsl:value-of select="article//a/@href"/></xsl:attribute></link>
+          <summary><xsl:value-of select="normalize-space(article//header/p[@class='teaser-subtitle'])"/></summary>
+        </entry>
+      </xsl:for-each>
+    </feed>
+  </xsl:template>
+</xsl:stylesheet>
diff --git a/generate b/generate
new file mode 100755
index 0000000..0d5705d
--- /dev/null
+++ b/generate
@@ -0,0 +1,15 @@
+#!/bin/sh
+test $# -ne 1 &&
+	{
+		echo "Usage: ${0} <xslt-stylesheet-file>"
+		exit 2
+	}
+
+echo "$1" | grep 'derstandard.at/international/asien/hongkong' -q && url='https://www.derstandard.at/international/asien/hongkong'
+# TODO: Add option to aggregate older articles using e.g. https://www.derstandard.at/international/asien/hongkong/2020/1/1
+
+curl -sL -b cookies "$url" |
+	xmllint --html --xpath '//main' /dev/stdin 2>/dev/null |
+	xsltproc "$1" /dev/stdin |
+	xmllint --format /dev/stdin \
+		>"$(dirname "$1")/feed.xml"