Initial commit

This commit is contained in:
Darren 'Tadgy' Austin 2018-04-12 19:57:19 +01:00
commit b9f45b1a83
3 changed files with 510 additions and 0 deletions

11
README Normal file
View file

@ -0,0 +1,11 @@
What is it?
-----------
Usage as a pipe logger with httpd VirtualHosts
----------------------------------------------
Usage as a raw pipe logger
-------------------------
Usage with a fifo
-----------------

8
TODO Normal file
View file

@ -0,0 +1,8 @@
* Write a man page.
* Add a regex filter (read from a file) to decide what to log and what to drop.
* Instead of requiring a fifo already exist, if the file doesn't already exist create the fifo.
Would need to modify the trap for SIGTERM in order to clean up the file.
* Figure out a way to check if the program is respawning too offten - this would indicate an error in the calling process and
we don't want to just keep looping forever.
* Have an option to change UID and/or GID when running. Alternatively, use setpriv to drop capabilities.
* Make it so the symlink filename can contain the same escapes and {} as the template file.

491
lumberjack Executable file
View file

@ -0,0 +1,491 @@
#!/bin/bash
# Copyright (c) 2018:
# Darren 'Tadgy' Austin <darren (at) afterdark.org.uk>
# Licensed under the terms of the GNU General Public License version 3.
#
#........1.........2.........3.........4.........5.........6.........7.........8.........9.........0.........1.........2.........3.....:...4
die() {
# $1 The text of the error message to display on stderr.
printf "%s: %s\n" "$LJ_NAME" "$1" >&2
exit 1
}
display_help() {
# |--------1---------2---------3---------4---------5---------6---------7---------8
cat <<-EOF
Usage: $LJ_NAME [options] <basedir> <template>
Process input (possibly including an httpd VirtualHost site identifier) from
stdin or a FIFO and write a log line to a log file based upon the <basedir> and
<template>.
Options (which are optional):
-ca "<args>" Set the compression command arguments. Default: ${LJ_COMPRESSOR_ARGS[@]}.
The quotes are required if more than 1 argument is required.
-cc <util> Set the compression command to use. Default: $LJ_COMPRESSOR.
-f Request flushing of the log file to disk after every write.
This may significantly reduce performance and result in a lot of
disk writes. Best to let the kernel do appropriate buffering.
-h Display this help.
-i <fifo> Read input from the FIFO at <fifo>, rather than stdin.
-j <jobs> Maximum number of compression jobs to have active at once.
Default: $LJ_MAXJOBS. Don't set this too high.
-l <link> Create a symlink named <link> to the currently active log file.
The symlink is created in the same directory as the log file.
-r Raw logging mode. In this mode, no processing of the log line
for an httpd VirtualHost site identifier is performed - log
lines are written verbatim to the log filename constructed from
<basedir> and <template>.
-ud <umask> Set the umask used when creating directories. Default: $LJ_DIR_UMASK.
Useful umasks are: 077, 066, 026 and 022.
-uf <umask> Set the umask used when creating files. Default: $LJ_FILE_UMASK.
Useful umasks are: 077 and 022.
-v Display version and copyright information.
-z Enable compression of the old log files.
-- Cease option processing and begin argument parsing.
Option processing ceases with the first non-option argument or --.
Arguments (which are required):
<basedir> The base directory of where to write the log files.
<template> The filename template. When normal mode, the template must
include at least one occurance of '{}', which is replaced with
the site name from the VirtualHost identifier. In raw mode
(-r), '{}' should not be included in the template. The template
may also include any %-escaped format strings recognised by the
strftime(3) function. See below for examples.
Examples:
When used with the httpd CustomLog directive, using %v as the first log format
string:
"|$LJ_NAME '/path/to/logsdir' '{}/logs/access-log-%Y-%m'"
Where the httpd VirtualHost identifier is 'example.com', would write logs
(with the site identifier steipped) to the filename:
/path/to/logsdir/example.com/logs/access-log-<year>-<month>
"|$LJ_NAME '/path/to/logsdir' '{}/logs/()-access-log-%Y-%m'"
Where the httpd VirtualHost identifier is 'example.com', would write logs
(with the site identifier steipped) to the filename:
/path/to/logsdir/example.com/logs/example.com-access-log-<year>-<month>
When used with the httpd ErrorLog directive (both examples are equilivant):
"|$LJ_NAME -r '/path/to/logsdir' 'logs/error-log-%Y-%m'"
Would write raw log lines to the filename:
/path/to/logsdir/logs/error-log-<year>-<month>
"|$LJ_NAME -r '/path/to/logsdir/logs' 'error-log-%Y-%m'"
Equilivant to the above; would write raw log lines to the filename:
/path/to/logsdir/logs/error-log-<year>-<month>
EOF
}
display_version() {
# |--------1---------2---------3---------4---------5---------6---------7---------8
cat <<-EOF
$LJ_NAME v$LJ_VERSION.
Copyright (c) 2018 Darren 'Tadgy' Austin <darren (at) afterdark.org.uk>.
Licensed under the terms of the GNU GPL v3 <http://gnu.org/licenses/gpl.html>.
This program is free software; you can modify or redistribute it in accordence
with the GNU GPL. However, it comes with ABSOLUTELY NO WARRANTY; not even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
EOF
}
is_dir() {
# $1 The path to verify is a directory.
[[ ! "$1" ]] && return 1
[[ ! -d "$1" ]] && {
syslog "error" "not a directory: $1"
return 1
}
return 0
}
make_dir() {
# $1 The directory to create.
# $2 The permissions to assign to the new directory.
[[ ! "$1" ]] && return 1
[[ ! -e "$1" ]] && {
umask "$LJ_DIR_UMASK"
mkdir "$1" 2>/dev/null || {
syslog "error" "failed to create directory: $1"
return 1
}
}
return 0
}
open_fd() {
# $1 The site identifier in the array.
# $2 The log file path to open.
[[ ! "$1" || ! "$2" ]] && return 1
echo "Opening FD for file $2"
umask "$LJ_FILE_UMASK"
exec {LJ_FDS[$1]}>>"$2" || {
syslog "error" "failed to open log file for writing: $2"
return 1
}
return 0
}
sigchld_handler() {
echo "SIGCHLD handler"
local LJ_JOB
for LJ_JOB in "${!LJ_JOBS[@]}"; do
[[ "${LJ_JOBS[$LJ_JOB]}" ]] && {
! kill -0 "${LJ_JOBS[$LJ_JOB]}" >/dev/null 2>&1 && {
echo "Waiting for ${LJ_JOBS[$LJ_JOB]}"
wait "${LJ_JOBS[$LJ_JOB]}"
unset "LJ_JOBS[$LJ_JOB]"
(( LJ_RUNNING-- ))
}
}
done
(( LJ_RUNNING == 0 )) && set +bm
}
syslog() {
# $1 The syslog level at which to log the message.
# $2 The text of the message to log.
[[ ! "$1" || ! "$2" ]] && return 1
# FIXME:
# logger --id="$$" -p "user.$1" -t "$LJ_NAME" "$1: $2" 2>/dev/null
printf "%s: %s\n" "$1" "$2"
}
# Some detaults.
LJ_COMPRESSOR_ARGS=( "-9" )
LJ_COMPRESSOR="gzip" # Use gzip by default as log processing utils can often natively read gzipped files.
LJ_FLUSH=0
LJ_INPUT="/dev/stdin"
LJ_MAXJOBS="4"
LJ_LINKFILE=""
LJ_RAW=0
LJ_DIR_UMASK="022"
LJ_FILE_UMASK="022"
LJ_COMPRESS=0
# Script details.
LJ_NAME="${0##*/}"
LJ_VERSION="0.1.0"
# trap signals.
# FIXME: Need to trap SIGCHLD and wait -n for the process.
# FIXME: Can the SIGCHLD trap be set *only* when a compression process is running? Wou;d save a lot of erronious calls...
trap 'sigchld_handler' SIGCHLD
trap '' SIGHUP
trap 'syslog "info" "received SIGUSR1 ping request"' SIGUSR1
# FIXME: flush all logs, close fd's, exit.
trap 'exit 0' SIGTERM
# Parse command line options.
while :; do
case "$1" in
-ca)
# Set the compression command arguments.
[[ ! "$2" ]] && die "missing argument to -ca"
LJ_COMPRESSOR_ARGS=( $2 )
shift 2
continue
;;
-cc)
# Set the compression command to use.
[[ ! "$2" ]] && die "missing argument to -cc"
"$2" --help >/dev/null 2>&1 || die "$2: invalid compressor command"
LJ_COMPRESSOR="$2"
shift 2
continue
;;
-f)
# Flush files after every write.
LJ_FLUSH=1
shift
continue
;;
-h)
# Show the help screen and exit.
display_help
exit 0
;;
-i)
# Use a FIFO instead of stdin - the FIFO must already exist (use 'mkfifo' first).
[[ ! "$2" ]] && die "missing argument to -f"
[[ ! -e "$2" ]] && die "$2: no such file"
[[ ! -p "$2" ]] && due "$2: not a FIFO"
LJ_INPUT="$2"
shift 2
continue
;;
-j)
# Set the maximum number of concurrent compression jobs to have active at once.
[[ ! "$2" =~ [0-9]+ ]] && die "$2: invalid number of jobs"
(( $2 == 0 )) && die "$2: invalid number of jobs"
LJ_MAXJOBS="$2"
shift 2
continue
;;
-l)
# Set the link name to use.
[[ ! "$2" ]] && die "missing argument to -l"
[[ "$2" =~ .*/ ]] && die "$2: link name cannot include '/'"
LJ_LINKFILE="$2"
shift 2
continue
;;
-r)
# Set raw mode.
LJ_RAW=1
shift
continue
;;
-ud)
# Set the directory umask.
[[ ! "$2" ]] && die "missing argument to -ud"
[[ ! "$2" =~ [0-7]{3} ]] && die "$2: invalid umask"
LJ_DIR_UMASK="$2"
shift 2
continue
;;
-uf)
# Set the file umask.
[[ ! "$2" ]] && die "missing argument to -uf"
[[ ! "$2" =~ [0-7]{3} ]] && die "$2: invalid umask"
LJ_FILE_UMASK="$2"
shift 2
continue
;;
-v)
# Show the version and exit.
display_version
exit 0
;;
-z)
# Compress logs once they are rotated.
LJ_COMPRESS=1
shift
continue
;;
--)
# Stop processing options. Everything further is taken as an argument.
shift
break
;;
*)
break
;;
esac
done
# If there isn't 2 arguments left, exit.
(( $# != 2 )) && {
printf "%s\n" "$LJ_NAME: incorrect number of non-option arguments" >&2
printf "%s\n" "Try: $LJ_NAME -h" >&2
exit 1
}
# The remaining arguments should be the base directory and the template.
LJ_BASEDIR="$1"
[[ "${LJ_BASEDIR:0:1}" != "/" ]] && die "$LJ_BASEDIR: must be an absolute path"
[[ ! -e "$LJ_BASEDIR" ]] && die "$LJ_BASEDIR: base directory does not exist"
[[ ! -d "$LJ_BASEDIR" ]] && die "$LJ_BASEDIR: not a directory"
LJ_TEMPLATE="$2"
[[ "${LJ_TEMPLATE: -1:1}" = "/" ]] && die "$LJ_TEMPLATE: template cannot end with '/'"
(( LJ_RAW == 0 )) && [[ ! "$LJ_TEMPLATE" =~ .*\{\} ]] && die "$LJ_TEMPLATE: template must include at least one '{}'"
(( LJ_RAW != 0 )) && [[ "$LJ_TEMPLATE" =~ .*\{\} ]] && die "$LJ_TEMPLATE: template cannot include '{}'"
# FIXME: This may need to be set for the exact sections of code where it's required as it causes a SIGCHLD for *every* completed
# non-builtin command.
#set -bm
# Open the input file for reading.
exec {LJ_INPUT_FD}<"$LJ_INPUT" || die "$LJ_INPUT: failed to open for reading"
# The array of file descriptors corresponding to each path.
declare -A LJ_FDS
# The array of jobs needing to be compressed.
declare -A LJ_JOBS
# The number of compression jobs currently active.
LJ_RUNNING=0
while :; do
# Reset used variables.
unset LJ_LOG_VHOST LJ_LOG_DATA
LJ_TIMED_OUT=0
# Start compression jobs if there's any in the queue.
while (( LJ_RUNNING < LJ_MAXJOBS )); do
for LJ_JOB in "${!LJ_JOBS[@]}"; do
[[ ! "${LJ_JOBS[$LJ_JOB]}" ]] && {
echo "Starting job: $LJ_COMPRESSOR ${LJ_COMPRESSOR_ARGS[@]} $LJ_JOB"
set -bm
"$LJ_COMPRESSOR" "${LJ_COMPRESSOR_ARGS[@]}" "$LJ_JOB" &
# >/dev/null 2>&1 &
LJ_JOBS[$LJ_JOB]="$!"
(( LJ_RUNNING++ ))
continue 2
}
done
break
done
# The time until the top of the next minute - this is used for the 'read' timeout so that
# closing log files and compression can still occur even if no log lines are written.
# Note: This does mean we can't have per second log files, but I can't see that being a requirement.
LJ_TTNM="$(( 60 - 10#$(printf "%(%S)T") ))"
echo
date
echo "Waiting for input, timeout: $LJ_TTNM"
# Read the log line.
# Note: The $(...) expansion should *not* be quoted in this instance.
#echo read -r -t "$LJ_TTNM" -u "$LJ_INPUT_FD" $((( LJ_RAW == 0 )) && printf "%s" "LJ_LOG_VHOST") LJ_LOG_DATA
read -r -t "$LJ_TTNM" -u "$LJ_INPUT_FD" $((( LJ_RAW == 0 )) && printf "%s" "LJ_LOG_VHOST") LJ_LOG_DATA
LJ_ERR=$?
if (( LJ_ERR > 128 )); then
# If 'read' timed out, set a marker.
echo "read timed out"
LJ_TIMED_OUT=1
elif (( LJ_ERR == 1 )); then
# An error occured (the pipe was likely closed by the server).
# The sleep will prevent the script eating CPU if the fifo is closed by the server.
echo "read error 1"
sleep 1
continue
elif (( LJ_ERR != 0 )); then
# Unhandled error - sleep for a second and try again.
# The sleep will prevent the script eating CPU if the fifo is closed by the server.
syslog "error" "unhandled return code from 'read': $ERR"
sleep 1
continue
fi
# Expand the strftime-encoded strings in the template.
LJ_EXPANDED_TEMPLATE="$(printf "%($LJ_TEMPLATE)T")"
echo "Exp template: $LJ_EXPANDED_TEMPLATE"
# The old expanded template needs to be seeded if it's not already set from a previous loop.
# Set it to the same as the current expanded template so that no rotation is done the first time around.
[[ -z "$LJ_OLD_TEMPLATE" ]] && LJ_OLD_TEMPLATE="$LJ_EXPANDED_TEMPLATE"
echo "Old template: $LJ_OLD_TEMPLATE"
# If the 'read' timed out and the exapnded template is the same as the old expanded template, there is no need to do anything.
(( LJ_TIMED_OUT == 1 )) && [[ "$LJ_EXPANDED_TEMPLATE" == "$LJ_OLD_TEMPLATE" ]] && continue
echo "Checking basedir"
# Make sure the base directory still exists - it could have disappeared while we were blocked in 'read'.
# Note: We won't make this directory ourselves - as it's the base directory it should exist on the system to start with.
[[ ! -e "$LJ_BASEDIR" ]] && {
syslog "error" "directory no longer exists: $LJ_BASEDIR"
continue
}
is_dir "$LJ_BASEDIR" || continue
echo "Old: $LJ_OLD_TEMPLATE"
echo "New: $LJ_EXPANDED_TEMPLATE"
# If the new expanded template is different from the old, close and reopen all the logs and queue for compression (if required).
[[ "$LJ_EXPANDED_TEMPLATE" != "$LJ_OLD_TEMPLATE" ]] && {
echo "Closing FDs..."
# Loop through all the open FDs.
for LJ_SITE in "${!LJ_FDS[@]}"; do
# Generate the fully expanded filename from the strftime-expanded template and the site name from the array.
LJ_FILENAME="$LJ_BASEDIR/${LJ_EXPANDED_TEMPLATE//\{\}/$LJ_SITE}"
echo "Filename: $LJ_FILENAME"
# Close the file descriptor for the old log file path.
echo "Closing FD ${LJ_FDS[$LJ_SITE]}"
{ exec {LJ_FDS[$LJ_SITE]}>&-; } 2>/dev/null || {
syslog "warn" "failed to close FD ${LJ_FDS[$LJ_SITE]} for $LJ_SITE"
# Don't 'continue' here as we should still be able to open the new log file. But, it'll leave an FD open indefinitely...
}
unset "LJ_FDS[$LJ_SITE]"
# Create (if necessary) and verify new log file dir.
echo "Make/check new dir: ${LJ_FILENAME%/*}"
make_dir "${LJ_FILENAME%/*}" || continue
is_dir "${LJ_FILENAME%/*}" || continue
# Open the new log file.
open_fd "$LJ_SITE" "$LJ_FILENAME" || continue
# Fix the now broken symlink - point it to the currently active log file.
[[ "$LJ_LINKFILE" ]] && {
echo "Fix link"
# Note: This will clobber anything that already exists with the link name.
rm -rf "${LJ_FILENAME%/*}/$LJ_LINKFILE"
ln -sf "${LJ_FILENAME##*/}" "${LJ_FILENAME%/*}/$LJ_LINKFILE" 2>/dev/null || {
syslog "error" "failed to fix link: ${LJ_FILENAME%/*}/$LJ_LINKFILE"
}
}
# Add the old log file to the compression jobs task list.
(( LJ_COMPRESS != 0 )) && {
echo "Adding to jobs list: $LJ_BASEDIR/${LJ_OLD_TEMPLATE//\{\}/$LJ_SITE}"
LJ_JOBS+=([$LJ_BASEDIR/${LJ_OLD_TEMPLATE//\{\}/$LJ_SITE}]="")
}
done
}
# If the 'read' did not time out, there must be a log line to write.
(( LJ_TIMED_OUT == 0 )) && {
# If not in raw mode, an unset LJ_LOG_VHOST is an error.
# If in raw mode, we need a placeholder for the LJ_FDS array element as LJ_LOG_VHOST would normally be unset.
if (( LJ_RAW == 0 )); then
[[ ! "$LJ_LOG_VHOST" ]] && {
syslog "error" "empty VirtualHost site identifier"
continue
}
else
LJ_LOG_VHOST="*raw*"
fi
# Generate the fully expanded filename from the strftime-expanded template.
LJ_FILENAME="$LJ_BASEDIR/${LJ_EXPANDED_TEMPLATE//\{\}/$LJ_LOG_VHOST}"
echo "Filename: $LJ_FILENAME"
echo "Make/check new dir: ${LJ_FILENAME%/*}"
make_dir "${LJ_FILENAME%/*}" || continue
is_dir "${LJ_FILENAME%/*}" || continue
# If no FD is open for the VHOST, open it.
[[ ! "${LJ_FDS[$LJ_LOG_VHOST]}" ]] && {
open_fd "$LJ_LOG_VHOST" "$LJ_FILENAME" || continue
}
echo "VHOST: $LJ_LOG_VHOST"
echo "DATA: $LJ_LOG_DATA"
echo "FD: ${LJ_FDS[$LJ_LOG_VHOST]}"
# Write the log entry.
printf "%s\n" "$LJ_LOG_DATA" >&"${LJ_FDS[$LJ_LOG_VHOST]}"
echo "Written log"
# Flush data to disk if requested.
(( LJ_FLUSH == 1 )) && {
echo "Flush log"
sync "$LJ_FILENAME" 2>/dev/null || syslog "warn" "failed to sync: $LJ_FILENAME"
}
# Create symlink to the currently active log file.
[[ "$LJ_LINKFILE" ]] && {
[[ "$(readlink -n "${LJ_FILENAME%/*}/$LJ_LINKFILE")" != "${LJ_FILENAME##*/}" ]] && {
echo "Update link"
# Note: This will clobber anything that already exists with the link name.
rm -rf "${LJ_FILENAME%/*}/$LJ_LINKFILE"
ln -sf "${LJ_FILENAME##*/}" "${LJ_FILENAME%/*}/$LJ_LINKFILE" 2>/dev/null || {
syslog "error" "failed to create link: ${LJ_FILENAME%/*}/$LJ_LINKFILE"
}
}
}
}
# Store the last used filename.
LJ_OLD_TEMPLATE="$LJ_EXPANDED_TEMPLATE"
echo "End cycle"
done