From 0b2ca86fa716dfc2fb3bc3425019f049dd65eda2 Mon Sep 17 00:00:00 2001 From: Sanel Zukan Date: Thu, 10 Mar 2022 22:01:12 +0100 Subject: [PATCH] Added txtify.it. Removed outline.com. User-agent fixes outline.com is shut down so I'm adding https://txtify.it as alternative. It will render html page to text file Fixing user-agent string so eww and txtify backends does not append "User-Agent" string, because url/eww will append it. --- README.md | 26 ++++++----------------- declutter.el | 59 ++++++++++++++++++++++++++-------------------------- 2 files changed, 35 insertions(+), 50 deletions(-) diff --git a/README.md b/README.md index 31e2fed..12e2f84 100644 --- a/README.md +++ b/README.md @@ -6,16 +6,15 @@ readable html, straight inside your Emacs. declutter can render content using: - * [outline.com API](https://outline.com/) * [lynx](https://en.wikipedia.org/wiki/Lynx_(web_browser)) * [rdrview](https://github.com/eafer/rdrview) * [EWW](https://www.gnu.org/software/emacs/manual/html_mono/eww.html) - Emacs builtin browser + * [txtify.it](https://txtify.it/) ## Installation declutter depends on [json.el](https://github.com/thorstadt/json.el) -(if you are using `outline.com` backend) and -[shr.el](http://bzr.savannah.gnu.org/lh/emacs/trunk/annotate/head:/lisp/net/shr.el). `shr.el` +and [shr.el](http://bzr.savannah.gnu.org/lh/emacs/trunk/annotate/head:/lisp/net/shr.el). `shr.el` is part of Emacs since 24.4 version. To install declutter, just copy `declutter.el` to `$HOME/.emacs.d` @@ -58,7 +57,7 @@ To change rendering engine (default is outline.com API), use this: ; or (setq declutter-engine 'eww) ; eww will get and render html ; or -(setq declutter-engine 'outline) ; outline.com will get and render html +(setq declutter-engine 'txtify) ; txtify.it will get and render html ``` For `lynx` and `rdrview` engines, you can set a custom path to the @@ -88,22 +87,9 @@ For example, to use default Emacs fonts and add margins, set this: ## Note -When declutter is using [outline.com](https://outline.com) to render -the content and sometimes can fail with internal error (received from -`outline.com`). In that case, try url multiple times. - -Also regardingy privacy, be aware that `outline.com` **can see** what -you browse. I'm not affiliated with `outline.com` in any way. - -## Restriction Hacking - -As from May 2019 (at least the period I was able to track), -`outline.com` will report for some sites this: -`We're sorry, but this URL is not supported by Outline`. - -To bypass it, use some url shortener (like https://bitly.com) and -short destination url. Pass that shortened url to declutter and it -will be able to render the content again. +When declutter is using [txtify.it](https://txtify.it) to render be +aware that `txtify.it` **can see** what you browse. I'm not affiliated +with `txtify.it` in any way. ## Bug reports & patches diff --git a/declutter.el b/declutter.el index c81dd71..cbdf8d7 100644 --- a/declutter.el +++ b/declutter.el @@ -1,7 +1,7 @@ ;;; declutter.el --- Read html content and (some) paywall sites without clutter ;;; -*- indent-tabs-mode: nil -*- -;; Copyright (c) 2019-2021 Sanel Zukan +;; Copyright (c) 2019-2022 Sanel Zukan ;; ;; Author: Sanel Zukan ;; URL: http://www.github.com/sanel/declutter @@ -26,7 +26,7 @@ ;;; Commentary: -;; Allows reading sites without clutter. Uses outline.com service or lynx for actual work. +;; Allows reading sites without clutter. Uses eww, rdrview, lynx or txtify.it for actual work. ;;; Installation: @@ -48,8 +48,8 @@ :prefix "declutter-" :group 'applications) -(defcustom declutter-outline-api-url "https://api.outline.com/v3/parse_article?source_url=" - "Outline service, used to get cleaned content." +(defcustom declutter-txtify-url "https://txtify.it" + "txtify.it service, used to get cleaned content." :type 'string :group 'declutter) @@ -58,9 +58,9 @@ :type 'string :group 'declutter) -(defcustom declutter-engine 'outline +(defcustom declutter-engine 'eww "Engine used to visit and render URL. -Values are 'outline for using outline.com service, 'lynx for using local lynx +Values are 'eww (default) for using built-in eww browser, 'lynx for using local lynx installation or 'rdrview for https://github.com/eafer/rdrview." :type 'symbol :group 'declutter) @@ -74,12 +74,12 @@ rdrview, it will call them as is, assuming they are in PATH." :group 'declutter) (defun declutter-fetch-url (url referer jsonp) - "Try to get content from given URL. -If JSONP is true, parse it to json list. REFERER is necessary for outline.com." + "Try to get content from given URL. If JSONP is true, parse it to json list." (with-temp-buffer - (let ((url-request-extra-headers (if referer - (list (cons "Referer" referer)))) - (url-user-agent (concat "User-Agent: " declutter-user-agent "\r\n"))) + (let* ((url-request-extra-headers (if referer + (list (cons "Referer" referer)))) + (old-user-agent url-user-agent) + (url-user-agent (or declutter-user-agent old-user-agent))) (url-insert-file-contents url) (if jsonp ;; TODO: support for native JSON (https://emacs.stackexchange.com/a/38482) @@ -88,13 +88,11 @@ If JSONP is true, parse it to json list. REFERER is necessary for outline.com." ;; extract actual content (buffer-substring-no-properties (point-min) (point-max)))))) -(defun declutter-get-html-from-outline (url) - "Construct properl URL and call outline.com service. -Expects json response and retrieve html part from it." - (let* ((full-url (concat declutter-outline-api-url (url-hexify-string url))) - (response (declutter-fetch-url full-url "https://outline.com/" t))) - (cdr - (assoc 'html (assoc 'data response))))) +(defun declutter-get-txt-from-txtify (url) + "Construct proper URL and call txtify.it service. Expects a text reply." + (let* ((full-url (concat declutter-txtify-url "/" url)) + (response (declutter-fetch-url full-url nil nil))) + response)) (defun declutter-render-content (content htmlp) "Render in *declutter* buffer. @@ -108,12 +106,12 @@ or just display it, depending if HTMLP was set to true." (when htmlp (shr-render-region (point-min) (point-max)))))) -(defun declutter-url-outline (url) - "Use Outline API to declutter URL." - (let ((content (declutter-get-html-from-outline url))) - (if (not content) - (message "Zero reply from outline.com. This usually means it wasn't able to render the article.") - (declutter-render-content content t)))) +(defun declutter-url-txtify (url) + "Use txtify.it to declutter URL." + (let ((content (declutter-get-txt-from-txtify url))) + (if (not content) + (message "No content from txtify.it") + (declutter-render-content content nil)))) (defun declutter-url-lynx (url) "Use lynx to declutter URL." @@ -148,8 +146,9 @@ or just display it, depending if HTMLP was set to true." ;; Sadly, eww doesn't have easy way to render html with eww-readable due many stateful ;; operations, so calling declutter-fetch-url and passing content to eww/eww-readable ;; does not work. - (let (;; declutter-fetch-url is not used here, so we must set user agent for eww. - (url-user-agent (concat "User-Agent: " declutter-user-agent "\r\n"))) + (let* (;; declutter-fetch-url is not used here, so we must set user agent for eww. + (old-user-agent url-user-agent) + (url-user-agent (or declutter-user-agent old-user-agent))) (pop-to-buffer "*declutter*") ;; switch to eww-mode or eww will pop out own buffer (eww-mode) @@ -159,11 +158,11 @@ or just display it, depending if HTMLP was set to true." (defun declutter-url (url) "Depending on declutter-engine variable, call appropriate functions." (cond - ((eq 'outline declutter-engine) (declutter-url-outline url)) + ((eq 'eww declutter-engine) (declutter-url-eww url)) ((eq 'lynx declutter-engine) (declutter-url-lynx url)) ((eq 'rdrview declutter-engine) (declutter-url-rdrview url)) - ((eq 'eww declutter-engine) (declutter-url-eww url)) - (t (message "Unknown decluttering engine. Use 'outline, 'lynx or 'rdrview.")))) + ((eq 'txtify declutter-engine) (declutter-url-txtify url)) + (t (message "Unknown decluttering engine. Use 'eww, 'lynx, 'txtify or 'rdrview.")))) (defun declutter-get-url-under-point () "Try to figure out is there any URL under point. @@ -183,7 +182,7 @@ Returns nil if none." ;;;###autoload (defun declutter (url) - "Read URL and declutter it, using outline.com service." + "Read URL and declutter it." (interactive (let* ((url (declutter-get-url-under-point)) (prompt (if url