inital commit

This commit is contained in:
Pranshu Sharma 2025-05-14 22:42:33 +10:00
commit 717e545e34
2 changed files with 826 additions and 0 deletions

55
README.md Normal file
View file

@ -0,0 +1,55 @@
# Dependencies
Gnome libxml2
# Usage
## Templates
This uses Template::Toolkit for template syntax.
You must now have any value in `[no_name_dir]` in blog_dir.
# Concepts
Orignally the genorg that was made had the concept that moving files
was natural and should not affect the system.
## Static file tree
Contradictory to orignal genorg, this is designed for the blog org
tree to remain static. The orignal reason I wanted this was that so
the blog directory tree would be easy to try. But then I realised,
it's supposed to be write-only. For a linear blog, you don't really
make modifications to article once it's published, and you should be
able to find new
## Caching
This uses a 2 step caching procsess.
```
Input Cache Output
+-----------+ +---------------+ +-----------+
| Org files | ----> | direct org | ----> | Modified |
| | ----> | html export | ----> | html |
+-----------+ +---------------+ +-----------+
| |
+---------------+ +-----------+
| Neccasary info| ----> | navigtion |
| eg title, date| ----> | files |
+---------------+ +-----------+
```
The intermideate transformation of the direct org to html is stored
for caching. The neccasry info like path and all is stord in seperate
xml document to not make it so the whole file has to be parsed again
to link.
## Speed for space
`genorg` copies the whole file tree accross to cache in intermiediate
stage, this allows for the `-m` option.

771
genorg Executable file
View file

@ -0,0 +1,771 @@
#!/usr/bin/perl
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see
# <https://www.gnu.org/licenses/>.
# Copyright (C) 2025 Pranshu Sharma
=head1 NAME
genorg - generate a blog from org files
Author: Pranshu Sharma <pranshu@bauherren.ovh>
=head1 SYNOPSIS
--verbose,-v Be verbose. False by default
--config,-c Specify config file. genconf by default
--mid-cache,-m Reuse the intermediate cache
--disable-cache,-d Do not use the cache
--help,-h Print this help
=head1 VERSION
0.01
=cut
# Add caching, fun
use v5.40.1;
use warnings;
use strict;
use feature qw(defer class);
no warnings qw( experimental::class experimental::defer );
use Symbol 'gensym';
use Getopt::Long qw(:config auto_help);
use Path::Tiny qw(path cwd);
use Time::Piece;
use List::Util qw(any pairmap first);
use IPC::Open3;
use Template;
# We need this for writing the file to comunicate with server
use JSON::XS;
my @files;
our $Conf;
our $tt;
GetOptions(
'config|c=s' => \ (my $config_file = "genorg.toml"),
'disable-cache|d' => \ our $use_cache,
'mid-cache|m' => \(our $mid_cache),
'verbose|v' => \ our $verbose
);
$Conf = Config->new(file => $config_file);
$mid_cache = !$mid_cache;
$use_cache = !$use_cache;
if (!$use_cache) {
$mid_cache = 0;
}
my @cached_arts;
sub process_file ($path, $state) {
if (-d $path) {
# We need to copy the *exact* file structure to output
mkdir ($Conf->out_dir . "/" . $path->relative($Conf->blog_dir));
return;
}
# For now, we just need the org files. As when we export the org
# files, that might generate new files.
$path =~ /\.org$/ or return;
if ($use_cache) {
for (@cached_arts) {
return if (path($_->file_path) eq $path->realpath)
}
}
push @files, $path;
}
# TODO fill this funciton
sub read_cache {
if (-d $Conf->cache_dir) {
} else {
# There is no cache
$use_cache = 0;
}
}
sub get_cache_new_filenames ($n) {
return (1..$n) if !@cached_arts || !$use_cache;
my $bcwd = cwd->stringify;
chdir $Conf->cache_dir;
my @occupied = map
{ path($_->mid_file)->relative($Conf->cache_dir) }
@cached_arts;
my @free;
my $cn = 1;
while ((0+@free) != $n) {
unless (any { $_ eq "$cn" } @occupied) {
push @free, $cn;
}
++$cn;
}
chdir $bcwd;
$|++;
return @free;
}
# We process the $file, and put it in $Conf->template_main
sub process_template ($file, $data) {
$tt->process($file, $data, \(my $stuff));
$stuff
}
sub run_emacs (@org_files) {
my @output_files =
map { $Conf->cache_dir . "/$_" }
get_cache_new_filenames(0+@files);
sub listify (@list) {
'(' . (join "", map "\"$_\" ", @list) . ')'
}
my $ox = $Conf->ox;
my $in = listify @org_files;
my $out = listify @output_files;
mkdir $Conf->cache_dir;
# Fix package-dir
my ($emacs, $package_dir, $init_file) =
($Conf->emacs, $Conf->package_dir , $Conf->init_file);
# I keep the postable as it is needed to get data and author and all that
my $init_code = <<"ELISP" =~ s/\n//gr;
(require 'org)
(org-link-set-parameters "g"
:follow nil
:export
#'(lambda (link desc _ _)
(let ((q (char-to-string 34)))
(concat "<a href=" q link q ">" desc "</a>")))
:store nil)
(setq org-html-head ""
org-html-head-extra ""
org-html-head-include-default-style nil
org-html-head-include-scripts nil
org-html-preamble nil
org-html-use-infojs nil)
(setq make-backup-files nil)
(setq in-files '$in)
(setq out-files '$out)
(setq package-dir "$package_dir")
(setq dd default-directory)
(dolist (dir (directory-files package-dir t))
(add-to-list 'load-path dir))
(dolist (in in-files)
(find-file dd)
(setq out (pop out-files))
(find-file in)
(setq string (org-export-as '$ox))
(find-file out)
(delete-region (point-min) (point-max))
(insert string)
(save-buffer))
(princ "DONE" #'external-debugging-output)
ELISP
open3(undef, undef, my $emacs_err = 'gensym',
$Conf->emacs,
("--load", $init_file) x !!$init_file,
qw(-Q --batch --eval), "(progn $init_code)")
or die "Couldn't open emacs\n";
my $done;
while (<$emacs_err>) {
do { $done = true ; last} if $_ eq "DONE";
print "E-> ", $_;
}
if (!$done) {
die "Couldn't complete emacs task\n";
}
return @output_files;
}
our %article;
our $cache_dom;
sub main {
$|++ if $verbose;
# read the config -----------------------------
my $conf_dir = path($config_file)->dirname;
chdir $conf_dir
or die "Not valid config path\n";
$Conf->saturate;
mkdir $Conf->out_dir;
my $cache_file = $Conf->cache_dir . "/cache.xml";
$use_cache = 0 unless -f $cache_file;
if ($use_cache) {
-f $cache_file or $use_cache = 0;
$cache_dom = XML::LibXML->load_xml(location => $cache_file)
or die "Couldn't parse cache file";
# We need to read the cache over here, and see if any files are no
# use.
for my $cat_node ($cache_dom->findnodes('/data/cat')) {
my $cat = $cat_node->{name};
for my ($xart) ($cat_node->getChildrenByTagName("art")) {
my $id = $xart->{id};
local *value = sub ($n) {
$xart->getChildrenByTagName($n)->[0]->to_literal
};
my $mid_file = $Conf->cache_dir . "/" . value("mid_file");
my $file_path = $Conf->blog_dir . "/" . value("file_path");
if ((!-f $file_path)
|| ((stat($file_path))[9] >= (stat($mid_file))[9])
) {
say "Changes made to $cat->$id" if $verbose;
# This means we pretend like this is normal file
} else {
# We just saturate cache
my $art = Article->new(file => $mid_file, path => $file_path);
$art->pinit(
id => $id,
cat => $cat,
date => value("date"),
title => value("title")
);
$article{$cat}{$id} = $art;
push @cached_arts, $art;
}
}
}
}
# Read blog dir and get relevent files ---------
# saturate @files with org files to export
path($Conf->blog_dir)->visit(\&process_file, {recurse => 1});
die "No changed made\n" unless !$mid_cache || @files;
($use_cache && !$mid_cache) && @files && die("New files to convert, canot use -m\n");
# Run emacs and covert @files to @mid_files ----
say "Runing emacs" if $verbose;
@files and my @mid_files = run_emacs(@files);
if (!$mid_cache) {
push @files, (map { $_->file_path } @cached_arts);
}
# Now we parse and get information about these files
my %export_files;
@export_files{@files} = @mid_files;
for my ($path, $html) (%export_files) {
# This means this file is already cached.
my $art =
(first { $_->file_path eq $path } @cached_arts)
//
Article->new(file => $html, path => $path);
try {
$verbose &&
say("Parsing file " . path($path)->relative($Conf->blog_dir));
$art->parse;
} catch ($e) {
print "Error when parsing: $e"
}
$article{$art->cat}{$art->id} = $art;
}
# Now we copy the files from tree to out dir
chdir $Conf->blog_dir;
mkdir $Conf->out_dir;
# We make a list of files to round up and move for later
my %files_to_move;
path('.')->visit(
sub ($path, $state) {
return if /\.org$/;
if (-d $path) {
$files_to_move{$path} = undef;
}
else {
$files_to_move{$path->realpath} = $path->stringify;
}
}
, {recurse => 1});
chdir $Conf->out_dir;
# Setting up Template::Toolkit stuff
$tt = Template->new({
INCLUDE_PATH => $Conf->template_dir,
INTERPOLATE => 1,
}) or die "$Template::ERROR\n";
my @temp;
my %data = (
cats => {map { $_ => {name => $_,
link => "/" . $Conf->oblog_dir . "/$_",
rss => "/" . $Conf->orss_dir . "/$_"}}
(keys %article)},
cat => {pairmap
{
$a =>
[sort { $b->{numdate} <=> $a->{numdate} }
(pairmap {
unshift @temp, $b->info_hash;
$temp[0]
} %{$b})]
}
%article},
all_arts => [sort {$b->{numdate} <=> $a->{numdate}} @temp],
main_dir => "/" . $Conf->oblog_dir,
main_rss => "/" . $Conf->orss_dir
);
chdir $conf_dir;
# Ok, now we need to copy the mid html files
for my $art (map {values %{$_}} (values %article)) {
# if $mid_cache is off, we redo files that are already done
if ($mid_cache) {
next if any {$_ == $art} @cached_arts;
}
$tt->process(
$Conf->template_art,
{%data,
current_cat => $art->cat,
current_id => $art->id,
body => $art->text(),
$art->info_hash->%*},
\(my $content));
path($Conf->out_dir . "/" .
path($art->new_path)->relative($Conf->blog_dir))->spew_utf8($content);
}
# I need to make 3 special files:
# - article: body
# - cat: Current category
# - layout: content
say "Processing main blog template" if $verbose;
my $blog = process_template ($Conf->template_blog, \%data);
chdir $Conf->out_dir;
for ($Conf->otemplate_dir) {
mkdir $_; chdir $_;
}
path($Conf->template_blog)->spew($blog);
my $rss_fh = path("main.xml")->openw;
defer { close $rss_fh }
gen_rss($rss_fh, [map {values %{$_}} (values %article)]);
# Let's go back to main outdir
# Now we need to make an INFO json file for genserve
my $land_cont =
process_template($Conf->template_land, {%data});
path($Conf->template_land)->spew($land_cont);
my %info = (
blog_file => $Conf->otemplate_dir . '/' . $Conf->template_blog,
landing => $Conf->otemplate_dir . '/' . $Conf->template_land,
files => [],
cpath => [],
blog_dir => "/" . $Conf->oblog_dir . "/",
rss_dir => "/" . $Conf->orss_dir . "/:cat" ,
main_rss => $Conf->otemplate_dir . '/main.xml',
main_rss_url => "/" . $Conf->orss_dir
);
my $cfile = 1;
for my ($cat, $arts) (%article) {
for my ($id, $art) (%{$arts}) {
push (@{$info{files}} ,
{cat => $cat,
art => $art->id,
file => path($art->new_path)->relative($Conf->blog_dir)->stringify});
}
# We also need to make the template
my $cont =
process_template($Conf->template_cat, {%data, current_cat => $cat});
my $p = $cfile++ . ".html";
path($p)->spew($cont);
my $art_path = path($p)->realpath->relative($Conf->out_dir)->stringify;
push (@{$info{cpath}},
{cat => $cat,
file => $art_path,
rss => "$art_path.xml"});
my $fh = path("$p.xml")->openw;
defer { close $fh }
gen_rss($fh, [values %{$arts}], $cat);
}
chdir $Conf->out_dir;
path($Conf->info_file)->spew(encode_json(\%info));
# Remember we rounded up %files_to_move?
# Now we move them
for my ($from, $to) (%files_to_move) {
if ($to) {
if ($from =~ /\.html\.tt$/) {
# File is a template
$verbose &&
say("Processing template file ",
path($from)->relative($Conf->blog_dir))
;
my $cont = path($from)->slurp;
$tt->process(\$cont, \%data, \(my $stuff));
path($to =~ s/\.tt$//r)->spew( $stuff);
} else {
path($from)->copy($to);
}
} else {
mkdir $from;
next;
}
}
# Now we saturate the cache
my $dom = XML::LibXML::Document->new('1.0', 'UTF-8');
my $xmain = $dom->createElement('data');
for my ($cat, $arts) (%article) {
my $xcat = $dom->createElement('cat');
$xcat->{name} = $cat;
for my ($id, $art) ($arts->%*) {
my $xart = $dom->createElement('art');
$xart->{id} = $id;
my $xadd = sub ($name, $value) {
my $xel = $dom->createElement($name);
$xel->appendText($value);
$xart->appendChild($xel);
};
# ----
&$xadd('date', $art->date->epoch);
&$xadd('title', $art->title);
&$xadd('mid_file', path($art->mid_file)->relative($Conf->cache_dir));
&$xadd('file_path', path($art->file_path)->relative($Conf->blog_dir));
# ----
$xcat->appendChild($xart);
}
$xmain->appendChild($xcat);
}
$dom->setDocumentElement($xmain);
path($Conf->cache_dir . "/cache.xml" )->spew($dom->toString(1));
say "All done" if $verbose;
}
main;
sub gen_rss ($fh, $arts, $cat //= 0) {
my $title = $Conf->title;
my $desc = $Conf->desc;
my $domain = $Conf->domain;
print $fh <<"EOX" =~ s/\n//gr;
<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0">
<channel>
EOX
my $mid_cont;
if ($cat) {
$mid_cont = <<"EOX"
<title>$title: $cat</title>
<link>$domain</link>
<description>$desc. Category: $cat</description>
EOX
} else {
$mid_cont = <<"EOX"
<title>$title</title>
<link>$domain</link>
<description>$desc</description>
EOX
}
print $fh ($mid_cont =~ s/\n//gr);
for my $art (@{$arts}) {
my ($cat, $id) = ($art->cat, $art->id);
my $link = "/" . $Conf->oblog_dir . "/$cat/$id";
my $art_title = $art->title;
my $time = $art->date->strftime($Conf->time_format);
print $fh <<"EOX" =~ s/\n[ \t]+//gr;
<item>
<title>$art_title</title>
<link>$domain$link</link>
<pubDate>$time</pubDate>
</item>
EOX
}
print $fh "</channel></rss>";
}
class Article {
use List::Util qw(first);
use Path::Tiny;
use Time::Piece;
use XML::LibXML;
field $mid_file :param(file) :reader(mid_file);
field $file_path :reader :param(path);
field $new_path :reader;
# The full thing is wayy too hrd to spell
field $cat :reader;
field $id :reader;
field $title :reader;
field $date :reader;
field $dom;
field %related;
ADJUST {
$new_path = "$file_path.html";
}
method text {
$dom or $dom = XML::LibXML->load_html(location => $mid_file,
recover => 1,
suppress_errors => 1);
my ($body) = $dom->findnodes('/html/body');
$body->toString
}
method info_hash {
# Get all the relevent info in hash, for templating use.
return {
# TODO related articles
# TODO custom date format
date => $date->strftime($Conf->time_format),
numdate => $date->epoch,
title => $title,
link => "/" . $Conf->oblog_dir . "/$cat/$id",
id => $id,
cat => $cat
}
}
method pinit (%i) {
# id cat date title
$date = localtime($i{date});
$cat = $i{cat};
$title = $i{title};
$id = $i{id};
}
sub parse_desc ($str) {
$str || die "No description content provided\n";
my @return = ();
while ($str =~ s/(^.*?\s*->\s*\w+)\s*//) {
@return = (@return, split(/->\s*/, $1));
}
@return;
}
method parse {
$dom = XML::LibXML->load_html(location => $mid_file,
recover => 1,
suppress_errors => 1)
or die "Couldn't parse\n";
my ($title_node) = $dom->findnodes("/html/head/title") or die "No title\n";
$title = $title_node->to_literal;
my $date_node =
first {$_->to_literal =~ /^Date: /}
($dom->findnodes('/html/body/*[@id="postamble"]/p[@class="date"]'));
$date_node or die "No date\n";
$date = Time::Piece->strptime($date_node->to_literal
=~ s/^Date: ([^ ]+).+$/$1/r,
"%F") or die "Couldn't parse date\n";
# We don't need the postamble at all.
$_->parentNode->removeChild($_) for ($date_node->parentNode);
my ($desc_node) = $dom->findnodes('/html/head/meta[@name="description"]')
or die "Description doesn't exist\n";
($cat, $id, %related) = parse_desc $desc_node->{content};
# Now, we just need to make the image path an full path for each
# thing.
for my $node ($dom->findnodes("//img")) {
my $src = $node->{src};
next if $src =~ /^http/;
$src =~ s|file://||;
if ($src =~ m[^/]) {
$src = "/" . path($src)->relative($Conf->blog_dir)
} else {
$src = "/" .
path(path($file_path)->dirname
. ("/$src" =~ s#\./##r))->relative($Conf->blog_dir);
}
$node->{src} = $src;
}
}
}
class Config {
use TOML qw(from_toml);
use List::Util qw(any pairmap pairgrep);
use Path::Tiny qw(cwd path);
field $file :param :reader;
field $time_format :reader = "%Y-%m-%d";
field $oblog_dir :reader;
field $otemplate_dir :reader;
field $orss_dir :reader;
field %no_names = (
blog => [\$oblog_dir, 'blog', 'dir_bn'],
templates => [\$otemplate_dir, 'templates', 'dir_bn'],
rss => [\$orss_dir, 'rss', 'dir_bn']
);
field $out_dir :reader;
field $blog_dir :reader;
field $cache_dir :reader;
field $info_file :reader;
field %paths = (
out_dir => [\$out_dir, 0,'dir'],
blog_dir => [\$blog_dir, 0,'existing_dir'],
cache_dir => [\$cache_dir, '.genrog-cache', 'dir'],
info_file => [\$info_file, '.genorg.json', 'file_bn']
);
field $emacs :reader;
field $init_file :reader;
# TODO make this an array
field $package_dir :reader;
field $ox :reader;
field %emacs_conf = (
emacs => [\$emacs, qw(emacs program)],
init_file => [\$init_file, 0, 'existing_file'],
package_dir => [\$package_dir, $ENV{HOME} . '/.emacs.d/elpa', 'existing_dir'],
ox => [\$ox, "html", "symbol"]
);
field $template_dir :reader;
# These 2 are special templates
field $template_art :reader;
field $template_cat :reader;
field $template_main :reader;
field $template_blog :reader;
field $template_land :reader;
field %template_files = (
template_dir => [\$template_dir, 'templates', 'existing_dir'],
template_main => [\$template_main, 'layout.html.tt', 'existing_file_bn'],
template_blog => [\$template_blog, 'blog.html.tt', 'existing_file_bn'],
template_art => [\$template_art, 'art.html.tt', 'existing_file_bn'],
template_cat => [\$template_cat, 'cat.html.tt', 'existing_file_bn'],
template_land => [\$template_land, 'index.html.tt', 'existing_file_bn'],
);
field $title :reader;
field $domain :reader;
field $desc :reader;
field %info = (
title => [\$title, 0, 'symbol'],
domain => [\$domain, 0, 'symbol'],
desc => [\$desc, 0, 'symbol']
);
# Saturate the path variables
# TODO add tilda support
use Data::Dumper;
sub get_paths ($data, $info, $section) {
my %vars = (map {($_, 0)} keys %$info);
for my ($var, $val) (%$data) {
exists $info->{$var}
or die " Unknown directive, '$var'\n";
my ($ref, $default, $type) = $info->{$var}->@*;
validate_type($val, $type);
$val = change_if_fs($val, $type);
$$ref = $val;
$vars{$var}++;
}
my %not_done = pairgrep { !$b } %vars;
for (keys %not_done) {
my ($ref, $default, $type) = @{$info->{$_}};
validate_type($default, $type);
$default = change_if_fs($default, $type);
$$ref = $default || die "$_ expected in [$section]\n";
}
}
sub change_if_fs ($v, $t) {
if ($t =~ /dir$/ or $t =~ /file$/) {
return path($v)->realpath->stringify
}
return $v
}
sub validate($v, $t) {
return if validate_type($v, $t);
(die "$v must be " . {
'existing_file' => "an existing file",
'existing_dir' => "an existing directory"
}->{$t} . "\n")
}
sub validate_type ($value, $type) {
for ($type) {
/^existing_dir$/ && do { return -d $value };
/^dir(_bn)?$/ && do { return true };
/^program$/ && do { return true };
/^existing_file(_bn)?$/ && do { return -f $value };
# Symbol
return 1;
}
}
method saturate {
say "Reading config" if $verbose;
try {
my $data = from_toml(path($file)->slurp);
# TODO refactor
get_paths(($data->{paths} // die "[paths] needed\n") ,\%paths, "paths");
get_paths(($data->{emacs} // die "[emacs] needed\n") ,\%emacs_conf, "emacs");
get_paths(($data->{templates} // die "[templates] needed\n") ,\%template_files, "templates");
get_paths(($data->{info} // die "[info] needed\n") ,\%info, "info");
get_paths(($data->{no_names} // die "[no_names] needed\n") ,\%no_names, "no_names")
} catch ($e) {
die "Error when reading config: $e";
}
say "Done reading config" if $verbose;
}
}