From d47f1298b8c26dbf7b93e97ea5620b40eb9c7426 Mon Sep 17 00:00:00 2001 From: Not Zed Date: Sun, 7 Jul 2019 12:18:47 +0930 Subject: [PATCH] Redirect 301 old /blog/?post=xx to /blog/xx Add sitemap.xml generator for posts and articles. Add site.make makefile hook. --- Makefile | 3 ++ README | 46 ++++++++++++++++-- blog.c | 12 +++++ makemap.pl | 136 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 194 insertions(+), 3 deletions(-) create mode 100755 makemap.pl diff --git a/Makefile b/Makefile index 839e8c9..0846bff 100644 --- a/Makefile +++ b/Makefile @@ -132,3 +132,6 @@ config.make: config.h Makefile ifeq (,$(filter clean install-db dist,$(MAKECMDGOALS))) -include $(patsubst %.c,.deps/%.d,$(built_SRCS)) endif + +# optionally put per-site hooks here +-include site.make diff --git a/README b/README index 6fe1c9f..9af5c1b 100644 --- a/README +++ b/README @@ -35,6 +35,8 @@ Copy config.h.in to config.h and edit the settings. Copy all the templates in template/*.html.in to template/*.html and edit them for your site. +See also the notes on sitemap.xml. + first build - - - - - - @@ -75,10 +77,11 @@ $ PATH_INFO=/ ./blog CUSTOMISE --------- -Some templates for the page header and footer are in template/. +Apart from config.h the blog header and fooer templares are in +template/. These are compiled into the binary. -There are some hardcoded url paths in the source, these are mostly -absolute so you need to configure apache appropriately. +If available 'site.make' will be included and allows for +custom hooks. INTSTALL -------- @@ -130,6 +133,33 @@ matches the post filename but ends in .meta. There are some historic bits to this due to the blogger import, look at newpost.c and makeindex.pl. +sitemap.xml +- - - - - - + +A perl tool makemap.pl is included which will create a sitemap.xml +from the blog posts and standard html files. + +For zedzone, I included the following in site.make: + +install: sitemap + +sitemap: + ./makemap.pl \ + --html https://www.zedzone.space/ /var/zedzone/html \ + --db https://www.zedzone.space/post/ /var/zedzone/var/post \ + --blog https://www.zedzone.space/blog/ > sitemap.xml~ + install -DC sitemap.xml~ /var/zedzone/html/sitemap.xml + rm sitemap.xml~ + +.PHONY: sitemap + +This creates both the long and short form of the post urls (see URL Paths), +a root /blog/ with the date of the latest post, and all of the standard +html pages. It assumes /index.html is equivalent to / for the html pages +and includes both in the sitemap. + +The lastmod tag is based on the file date. + Tags - - - @@ -183,6 +213,16 @@ post-date-title This isn't actually used much in the code and is a bit of a pain to write in the posts but it works. +Obsolete URL Paths +- - - - - - - - - - + +These predate the public release but of course leaked out onto the +internet. Some code is included to handle these. + +/blog/?post={postid} + + This is redirected using a 301 responce code to /blog/{postid} + robots.txt - - - - - diff --git a/blog.c b/blog.c index 4bea800..e6f724c 100644 --- a/blog.c +++ b/blog.c @@ -315,6 +315,18 @@ static void doblog(char *path, char *query) { postid_t latest; struct lex lex = { .q = path + 1 }; + // verison handling, redirect old /blog?post=xx urls + if (query && query[0]) { + struct cgi_param params[1] = { 0 }; + + if (cgi_decode(query, params, 1) == 1 + && strcmp(params[0].name, "post") == 0) { + send("Status: 301\n"); + sendf("Location: %s/%s\n\n", POST_SCRIPT, params[0].value); + return; + } + } + // Parameter processing if (path == NULL || strlen(path) < 2) { id = ULONG_MAX; diff --git a/makemap.pl b/makemap.pl new file mode 100755 index 0000000..f72cc75 --- /dev/null +++ b/makemap.pl @@ -0,0 +1,136 @@ +#!/usr/bin/perl + +# makemap.pl create sitemap + +# Copyright (C) 2019 Michael Zucchi + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public +# License along with this program. If not, see +# . + +use POSIX; + +# usage: +# --db post-url post-db +# --html html-url html-directory +# --blog blog-url +# +# example: +# ./makemap.pl \ +# --html https://www.zedzone.space/ /var/zedzone/html \ +# --db https://www.zedzone.space/post/ /var/zedzone/post \ +# --blog https://www.zedzone.space/blog/ > sitemap.xml + +@dbs = (); +@htmls = (); +@blogs = (); + +while ($#ARGV >= 0) { + my $cmd = shift @ARGV; + + if ($cmd eq "--db") { + push @dbs,shift @ARGV; + push @dbs,shift @ARGV; + } elsif ($cmd eq "--html") { + push @htmls,shift @ARGV; + push @htmls,shift @ARGV; + } elsif ($cmd eq "--blog") { + push @blogs,shift @ARGV; + } +} + +@allposts = (); +@allhtmls = (); + +$latest = 0; + +while ($#dbs >= 0) { + my $url = shift @dbs; + my $db = shift @dbs; + + open IN,"ls -1 $db | grep -v meta | grep -v '~' | sort|" || die "Unable to find posts"; + while () { + chop; + $id = $_; + $name = $db."/".$id.".meta"; + open T,"<$name"; + while () { + chop; + if (m@^original=http.*\.com/(\d{4}/\d{2})/(.*)@) { + $part = $1; + $file = $2; + } + } + close T; + + my %post = (); + my $stamp = (stat $db."/".$id )[9]; + + $post{id} = $id; + $post{long} = "$url$part/$file"; + $post{short} = "$url$id"; + $post{date} = POSIX::strftime("%Y-%m-%d", gmtime($stamp)); + + push @allposts, \%post; + + $latest = $stamp if ($stamp > $latest); + } + close IN; +} + +for $blog (@blogs) { + my %post = (); + + $post{short} = "$blog"; + $post{date} = POSIX::strftime("%Y-%m-%d", gmtime($latest)); + + push @allposts, \%post; +} + +while ($#htmls >= 0) { + my $url = shift @htmls; + my $dir = shift @htmls; + + open IN,"find '$dir' -name '*.html' -printf '%P\n'|" || die "Unable to find pages"; + while () { + chop; + my $file = $_; + my %post = (); + + $post{long} = "$url$file"; + $post{date} = POSIX::strftime("%Y-%m-%d", gmtime((stat $dir."/".$file )[9])); + + if ($file =~ m@^index.html$@) { + $post{short} = "$url"; + } elsif ($file =~ m@(.*/)index.html$@) { + $post{short} = "$url$1"; + } + + push @allposts, \%post; + } + close IN; +} + + +print "\n"; +print "\n"; + +for $ref (@allposts) { + my %post = %{$ref}; + + print "$post{long}$post{date}\n" + if (defined $post{long}); + print "$post{short}$post{date}\n" + if (defined $post{short}); +} +print "\n"; -- 2.39.2