#!/bin/sh
# Generate an RSS 2.0 feed directly from your existing web site
# Author - Pádraig Brady
# Licence - LGPLV2
# Releases -
# 1.0 - Jun 19 2006 - Initial release
# 1.1 - Jun 26 2006 - Exclude files with "Exclude from bashfeed"
# HTML comment within the first 10 lines.
# 1.2 - May 01 2007 - Add author elements (from html if present)
# 1.4 - May 04 2011
# http://github.com/pixelb/scripts/commits/master/scripts/bashfeed
# Just run this script from the root directory of your web site and
# it will generate feed items for the newest files. Generally I do this
# just before I sync my local web site copy to my public server.
# One can generate a feed for a subset of the site by still running
# from the root directory, and passing a subdirectory to start at.
# To change the feed TITLE and DESCRIPTION, set those environment
# variables before running the script.
# Which files are selected and excluded can be configured below.
# Note for html files it will extract the following elements if present
#
# Item title
#
#
#
# Note this script will keep the same item guid for an updated file.
# Just updating the pubDate will not cause liferea 1.0.11 at least
# to mark the item as updated (or update the timestamp even).
# One must change the description or title also, and so
# I set the (hidden) description to the file timestamp.
#
# Testing with thunderbird 1.0.8 shows that it indexes on link
# and so wont ever show updates to other fields. Therefore I append #seconds
# to the link to force it to create a new entry for an updated item.
#
# Note you may find the http://www.pixelbeat.org/scripts/fix script
# useful for doing edits to files that you don't want to show up
# as updated content in the feed, or generally edit a file without
# changing the modification date.
num_files=10
site="www.pixelbeat.org"
author="P@draigBrady.com (Pádraig Brady)"
suggested_update_freq=1440 #mins
: ${TITLE=$site}
: ${DESCRIPTION="latest from $site"}
#files starting with . | files without a . | files ending in .c .cpp ...
include_re='(^|/)[.].+|(^|/)[^.]+$|[.](c|cpp|py|sh|rc|tips|fortune|html)$' #only show these files
exclude_re='(\.git/|priv/|tmp/|.htaccess|xvpics|timeline\.html|modified\.html|head\.html|header\.html|footer\.html|footer-home\.html|adds\.html|last\.html|fslint/(NEWS\.html|md5sum)|README)' #don't show these paths
default_files="index.html index.shtml index.php"
############# No user serviceable parts below ###################
for file in $default_files; do
replace_default_files="$replace_default_files; s/\(.*\)$file$/\1/;t"
done
echo '
'
time=`date --rfc-2822`
echo "
$TITLE$suggested_update_freq
http://$site/$1
http://www.pixelbeat.org/scripts/bashfeed$DESCRIPTION$author$time`echo $LANG | sed 's/\(..\)_.*/\1/'`
"
xml_unescaped() { sed 's/&//g; s/&[lg]t;//g; s/"//g' | grep -q "[&<>]"; }
find $1 -type f -printf "%p\t%T@\n" |
sed 's/^\.\///' | # strip leading ./ when "$1" is empty
sort -k2,2nr |
cut -f1 |
grep -E "$include_re" |
grep -Ev "$exclude_re" |
while read file; do
if ! head "$file" | grep -Fiq ''; then
echo "$file"
i=$((i+1))
[ $i -eq $num_files ] && break
fi
done |
while read file; do
pubDate=`date --reference="$file" --rfc-2822`
force_update=`date --reference="$file" "+%s"`
title=""; keywords=""; description=""
if echo "$file" | grep -Eq '\.(html|shtml|php)$'; then
title=`sed -n 's/.*\(.*\)<\/title>.*/\1/ip;T;q' < "$file"`
keywords=`sed -n 's/.*&2
exit 1
fi
elif [ -x "$file" ]; then # I always have a 1 line description on line 3 of my scripts
description=`sed -n '3s/# \(.*\)/\1/p' "$file"`
fi
file=`echo "$file" | sed "$replace_default_files"`
[ -z "$title" ] && title="$file"
tags=""
if [ ! -z "$keywords" ]; then
for keyword in $keywords; do
tags=`echo -ne "$tags$keyword\n "`
done
fi
if [ "$page_author" ]; then
if [ "$page_author" = "$author" ]; then
page_author=""
else
page_author=`echo -ne "$page_author\n "`
fi
fi
echo "
$titlehttp://$site/$file$pubDate
http://$site/$file#$force_update
$page_author$tags]]>
"
done &&
echo ''