From 7cc673178f3c44d2509d823ea19171e2288308f9 Mon Sep 17 00:00:00 2001 From: Manuel Graf Date: Sun, 10 Jan 2021 23:06:29 +0100 Subject: Initial commit --- .gitignore | 2 ++ LICENSE | 5 +++ README.md | 37 ++++++++++++++++++++++ .../international/asien/hongkong/stylesheet.xml | 22 +++++++++++++ generate | 15 +++++++++ 5 files changed, 81 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 derstandard.at/international/asien/hongkong/stylesheet.xml create mode 100755 generate diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fbd9934 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +cookies +feed.xml diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..29c6d39 --- /dev/null +++ b/LICENSE @@ -0,0 +1,5 @@ +Copyright (C) 2021 by Manuel Graf + +Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..81fea49 --- /dev/null +++ b/README.md @@ -0,0 +1,37 @@ +# About + +Generate atom feeds for websites and other stuff without syndication support. + +# Extract firefox cookies + +Create a `cookie` file which later can be used with `curl` to pass things like e.g. privacy walls. + +```sh +sqlite3 -separator $'\t' $(find ~/.mozilla/firefox/*.default-release -name cookies.sqlite) <cookies +.mode tabs +.header off +select host, +case substr(host,1,1)='.' when 0 then 'FALSE' else 'TRUE' end, +path, +case isSecure when 0 then 'FALSE' else 'TRUE' end, +expiry, +name, +value +from moz_cookies; +EOF +``` + +# Usage + +`./generate derstandard.at/international/asien/hongkong/stylesheet.xml` + +# References + +- https://www.w3schools.com/xml/xsl_intro.asp +- https://validator.w3.org/feed/docs/atom.html + +# Todos + +## Add entry content + +`curl -sL -b cookies https://www.derstandard.at/story/2000122974234/hongkonger-demokratieaktivisten-wong-droht-weitere-strafe|xmllint --html --xpath '//main//article[@class="story-article"]//div[@class="article-body"]' /dev/stdin` diff --git a/derstandard.at/international/asien/hongkong/stylesheet.xml b/derstandard.at/international/asien/hongkong/stylesheet.xml new file mode 100644 index 0000000..4f4ac01 --- /dev/null +++ b/derstandard.at/international/asien/hongkong/stylesheet.xml @@ -0,0 +1,22 @@ + + + + + https://grafoo.tilde.institute/transfeed/derstandard.at/international/asien/hongkong/feed.xml + derStandard.at - <xsl:value-of select="normalize-space(//header/h1)"/> + + + https://at.staticfiles.at/img/appicons/dst-60@2x-14f7371c73.png + https://at.staticfiles.at/img/meta/meta_image_1200x630-4d0796cf00.png + + + + <xsl:value-of select="normalize-space(article//header/h1[@class='teaser-title'])"/> + + https://www.derstandard.at + + + + + + diff --git a/generate b/generate new file mode 100755 index 0000000..0d5705d --- /dev/null +++ b/generate @@ -0,0 +1,15 @@ +#!/bin/sh +test $# -ne 1 && + { + echo "Usage: ${0} " + exit 2 + } + +echo "$1" | grep 'derstandard.at/international/asien/hongkong' -q && url='https://www.derstandard.at/international/asien/hongkong' +# TODO: Add option to aggregate older articles using e.g. https://www.derstandard.at/international/asien/hongkong/2020/1/1 + +curl -sL -b cookies "$url" | + xmllint --html --xpath '//main' /dev/stdin 2>/dev/null | + xsltproc "$1" /dev/stdin | + xmllint --format /dev/stdin \ + >"$(dirname "$1")/feed.xml" -- cgit 1.4.1-2-gfad0