lumberjack/lumberjack

465 lines
16 KiB
Bash
Executable file

#!/bin/bash
# Copyright (c) 2018:
# Darren 'Tadgy' Austin <darren (at) afterdark.org.uk>
# Licensed under the terms of the GNU General Public License version 3.
#
#........1.........2.........3.........4.........5.........6.........7.........8.........9.........0.........1.........2.........3.....:...4
# Script details.
LJ_NAME="${0##*/}"
LJ_VERSION="0.1.5"
die() {
# $1 The text of the error message to display on stderr.
printf "%s: %s\n" "$LJ_NAME" "$1" >&2
exit 1
}
display_help() {
# |........1.........2.........3.........4.........5.........6.........7.........8
cat <<-EOF
Usage: $LJ_NAME [options] <basedir> <template>
Process input (possibly including an httpd VirtualHost site identifier) from
stdin or a FIFO and write a log line to a log file based upon the <basedir> and
<template>.
Options (all of which are optional):
-ca "<arg>" Set the compression command arguments. Default: ${LJ_COMPRESSOR_ARGS[@]}.
The quotes are required if more than one <arg> is supplied.
-cc <util> Set the compression command to use. Default: $LJ_COMPRESSOR.
-f Request flushing of the log file to disk after every write.
This may significantly reduce performance and result in a lot of
disk writes. Best to let the kernel do appropriate buffering.
-h Display this help.
-i <fifo> Read input from the FIFO at <fifo>, rather than stdin.
-j <jobs> Maximum number of compression jobs to have active at once.
Default: $LJ_MAXJOBS. Don't set this too high.
-l <link> Create a symlink named <link> to the currently active log file.
The <link> is created relative to <basedir>. In normal mode,
the link name may include the same '{}' sequence and %-escaped
formatting as the <template> (see below). In raw mode (-r), the
'{}' is not allowed, but % escape sequences can still be used.
WARNING: The (expanded) location of this link will be WIPED OUT!
-r Raw logging mode. In this mode, no processing of the log line
for an httpd VirtualHost site identifier is performed - log
lines are written verbatim to the log filename constructed from
<basedir> and <template>.
-ud <umask> Set the umask used when creating directories. Default: $LJ_DIR_UMASK.
Useful umasks are: 077, 066, 026 and 022.
-uf <umask> Set the umask used when creating files. Default: $LJ_FILE_UMASK.
Useful umasks are: 077 and 022.
-v Display version and copyright information.
-z Enable compression of the old log files.
-- Cease option processing and begin argument parsing.
Option processing ceases with the first non-option argument or --.
Arguments (all of which are mandatory):
<basedir> The base directory of where to write the log files.
<template> The filename template. When in normal mode, the template must
include at least one occurrance of '{}', which is replaced with
the site name from the VirtualHost identifier. In raw mode
(-r), the '{}' should not be included in the template. The
template may also include any %-escaped format strings
recognised by the strftime(3) function. See below for examples.
Examples:
When used with the httpd CustomLog directive, using %v as the first log format
string:
"|$LJ_NAME '/path/to/logsdir' '{}/logs/access-log-%Y-%m'"
Where the httpd VirtualHost identifier is 'example.com', would write logs
(with the site identifier stripped) to the filename:
/path/to/logsdir/example.com/logs/access-log-<year>-<month>
"|$LJ_NAME '/path/to/logsdir' '{}/logs/()-access-log-%Y-%m'"
Where the httpd VirtualHost identifier is 'example.com', would write logs
(with the site identifier steipped) to the filename:
/path/to/logsdir/example.com/logs/example.com-access-log-<year>-<month>
When used with the httpd ErrorLog directive (both examples are equilivent):
"|$LJ_NAME -r '/path/to/logsdir' 'logs/error-log-%Y-%m'"
Would write raw log lines to the filename:
/path/to/logsdir/logs/error-log-<year>-<month>
"|$LJ_NAME -r '/path/to/logsdir/logs' 'error-log-%Y-%m'"
Equilivant to the above; would write raw log lines to the filename:
/path/to/logsdir/logs/error-log-<year>-<month>
EOF
}
display_version() {
# |........1.........2.........3.........4.........5.........6.........7.........8
cat <<-EOF
$LJ_NAME v$LJ_VERSION.
Copyright (c) 2018 Darren 'Tadgy' Austin <darren (at) afterdark.org.uk>.
Licensed under the terms of the GNU GPL v3 <http://gnu.org/licenses/gpl.html>.
This program is free software; you can modify or redistribute it in accordence
with the GNU GPL. However, it comes with ABSOLUTELY NO WARRANTY; not even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
EOF
}
is_dir() {
# $1 The path to verify is a directory.
[[ ! "$1" ]] && return 1
[[ ! -d "$1" ]] && {
syslog "error" "not a directory: $1"
return 1
}
return 0
}
make_dir() {
# $1 The directory to create.
[[ ! "$1" ]] && return 1
[[ ! -e "$1" ]] && {
umask "$LJ_DIR_UMASK"
mkdir -p "$1" 2>/dev/null || {
syslog "error" "failed to create directory: $1"
return 1
}
}
return 0
}
open_fd() {
# $1 The site identifier in the array.
# $2 The log file path to open.
[[ ! "$1" || ! "$2" ]] && return 1
umask "$LJ_FILE_UMASK"
exec {LJ_FDS[$1]}>>"$2" || {
syslog "error" "failed to open log file for writing: $2"
return 1
}
return 0
}
sigchld_handler() {
local LJ_JOB
for LJ_JOB in "${!LJ_JOBS[@]}"; do
[[ "${LJ_JOBS[$LJ_JOB]}" ]] && {
! kill -0 "${LJ_JOBS[$LJ_JOB]}" >/dev/null 2>&1 && {
wait "${LJ_JOBS[$LJ_JOB]}"
unset "LJ_JOBS[$LJ_JOB]"
(( LJ_RUNNING-- ))
}
}
done
start_compression_jobs
(( LJ_RUNNING == 0 )) && set +bm
}
sigterm_handler() {
local LJ_SITE LJ_JOB
for LJ_SITE in "${!LJ_FDS[@]}"; do
{ exec {LJ_FDS[$LJ_SITE]}>&-; } 2>/dev/null
done
exit 0
}
start_compression_jobs() {
local LJ_JOB
while (( LJ_RUNNING < LJ_MAXJOBS )); do
for LJ_JOB in "${!LJ_JOBS[@]}"; do
[[ ! "${LJ_JOBS[$LJ_JOB]}" ]] && {
set -bm
"$LJ_COMPRESSOR" "${LJ_COMPRESSOR_ARGS[@]}" "$LJ_JOB" >/dev/null 2>&1 &
LJ_JOBS[$LJ_JOB]="$!"
(( LJ_RUNNING++ ))
continue 2
}
done
break
done
}
syslog() {
# $1 The syslog level at which to log the message.
# $2 The text of the message to log.
[[ ! "$1" || ! "$2" ]] && return 1
logger --id="$$" -p "user.$1" -t "$LJ_NAME" "$1: $2" 2>/dev/null
}
# Some detaults.
LJ_COMPRESSOR_ARGS=( "-9" )
LJ_COMPRESSOR="gzip" # Use gzip by default as log processing utils can often natively read gzipped files.
LJ_FLUSH=0
LJ_INPUT="/dev/stdin"
LJ_MAXJOBS="4"
LJ_LINKFILE=""
LJ_RAW=0
LJ_DIR_UMASK="022"
LJ_FILE_UMASK="022"
LJ_COMPRESS=0
# trap signals.
trap 'sigchld_handler' SIGCHLD
trap '' SIGHUP
trap 'syslog "info" "received SIGUSR1 ping request"' SIGUSR1
trap 'sigterm_handler' SIGTERM
# Parse command line options.
while :; do
case "$1" in
-ca)
# Set the compression command arguments.
[[ ! "$2" ]] && die "missing argument to -ca"
LJ_COMPRESSOR_ARGS=( $2 )
shift 2
continue
;;
-cc)
# Set the compression command to use.
[[ ! "$2" ]] && die "missing argument to -cc"
"$2" --help >/dev/null 2>&1 || die "$2: invalid compressor command"
LJ_COMPRESSOR="$2"
shift 2
continue
;;
-f)
# Flush files after every write.
LJ_FLUSH=1
shift
continue
;;
-h|-help|--help)
# Show the help screen and exit.
display_help
exit 0
;;
-i)
# Use a FIFO instead of stdin - the FIFO must already exist (use 'mkfifo' first).
[[ ! "$2" ]] && die "missing argument to -f"
[[ "${2:0:1}" != "/" ]] && die "$2: must be an absolute path"
[[ ! -e "$2" ]] && die "$2: no such file"
[[ ! -p "$2" ]] && due "$2: not a FIFO"
LJ_INPUT="$2"
shift 2
continue
;;
-j)
# Set the maximum number of concurrent compression jobs to have active at once.
[[ ! "$2" =~ [0-9]+ ]] && die "$2: invalid number of jobs"
(( $2 == 0 )) && die "$2: invalid number of jobs"
LJ_MAXJOBS="$2"
shift 2
continue
;;
-l)
# Set the link name to use.
[[ ! "$2" ]] && die "missing argument to -l"
[[ "${2:0:1}" == "/" ]] && die "$2: link name cannot begin with '/'"
[[ "${2: -1:1}" == "/" ]] && die "$2: link name cannot end with '/'"
LJ_LINKFILE="$2"
shift 2
continue
;;
-r)
# Set raw mode.
LJ_RAW=1
shift
continue
;;
-ud)
# Set the directory umask.
[[ ! "$2" ]] && die "missing argument to -ud"
[[ ! "$2" =~ [0-7]{3} ]] && die "$2: invalid umask"
LJ_DIR_UMASK="$2"
shift 2
continue
;;
-uf)
# Set the file umask.
[[ ! "$2" ]] && die "missing argument to -uf"
[[ ! "$2" =~ [0-7]{3} ]] && die "$2: invalid umask"
LJ_FILE_UMASK="$2"
shift 2
continue
;;
-v)
# Show the version and exit.
display_version
exit 0
;;
-z)
# Compress logs once they are rotated.
LJ_COMPRESS=1
shift
continue
;;
--)
# Stop processing options. Everything further is taken as an argument.
shift
break
;;
*)
break
;;
esac
done
# If there isn't 2 arguments left, exit.
(( $# != 2 )) && {
printf "%s\n" "$LJ_NAME: incorrect number of non-option arguments" >&2
printf "%s\n" "Try: $LJ_NAME -h" >&2
exit 1
}
# The remaining arguments should be the base directory and the template.
LJ_BASEDIR="$1"
LJ_TEMPLATE="$2"
# Santy checking.
[[ "${LJ_BASEDIR:0:1}" != "/" ]] && die "$LJ_BASEDIR: must be an absolute path"
[[ ! -e "$LJ_BASEDIR" ]] && die "$LJ_BASEDIR: base directory does not exist"
[[ ! -d "$LJ_BASEDIR" ]] && die "$LJ_BASEDIR: not a directory"
[[ "${LJ_TEMPLATE: -1:1}" == "/" ]] && die "$LJ_TEMPLATE: template cannot end with '/'"
(( LJ_RAW == 0 )) && [[ ! "$LJ_TEMPLATE" =~ .*\{\} ]] && die "$LJ_TEMPLATE: template must include at least one '{}'"
(( LJ_RAW != 0 )) && [[ "$LJ_TEMPLATE" =~ .*\{\} ]] && die "$LJ_TEMPLATE: template cannot include '{}'"
(( LJ_RAW != 0 )) && [[ "$LJ_LINKFILE" =~ .*\{\} ]] && die "$LJ_LINKFILE: link name cannot include '{}'"
# The array of file descriptors corresponding to each path.
declare -A LJ_FDS
# The array of jobs needing to be compressed.
declare -A LJ_JOBS
# The number of compression jobs currently active.
LJ_RUNNING=0
while :; do
# Reset used variables.
unset LJ_LOG_VHOST LJ_LOG_DATA
LJ_TIMED_OUT=0
# Start compression jobs if there's any in the queue.
start_compression_jobs
# The time until the top of the next minute - this is used for the 'read' timeout so that
# closing log files and compression can still occur even if no log lines are written.
# Note: This does mean we can't have per second log files, but I can't see that being a requirement.
LJ_TTNM="$(( 60 - 10#$(printf "%(%S)T") ))"
# Read the log line.
# Note: The $(...) expansion should *not* be quoted in this instance.
read -r -t "$LJ_TTNM" $((( LJ_RAW == 0 )) && printf "%s" "LJ_LOG_VHOST") LJ_LOG_DATA <"$LJ_INPUT"
LJ_ERR="$?"
if (( LJ_ERR > 128 )); then
# If 'read' timed out, set a marker.
LJ_TIMED_OUT=1
elif (( LJ_ERR == 1 )); then
[[ "$LJ_INPUT" == "/dev/stdin" ]] && {
# stdin has been closed by the parent, quit gracefully by raising a SIGTERM.
kill -TERM "$$"
}
elif (( LJ_ERR != 0 )); then
# Unhandled error - sleep for a second and try again.
syslog "error" "unhandled return code from 'read': $LJ_ERR"
sleep 1
continue
fi
# Expand the strftime-encoded strings in the template.
LJ_EXPANDED_TEMPLATE="$(printf "%($LJ_TEMPLATE)T")"
# The old expanded template needs to be seeded if it's not already set from a previous loop.
# Set it to the same as the current expanded template so that no rotation is done the first time around.
[[ -z "$LJ_OLD_TEMPLATE" ]] && LJ_OLD_TEMPLATE="$LJ_EXPANDED_TEMPLATE"
# If the 'read' timed out and the exapnded template is the same as the old expanded template, there is no need to do anything.
(( LJ_TIMED_OUT == 1 )) && [[ "$LJ_EXPANDED_TEMPLATE" == "$LJ_OLD_TEMPLATE" ]] && continue
# Make sure the base directory still exists - it could have disappeared while we were blocked in 'read'.
# Note: We won't make this directory ourselves - as it's the base directory it should exist on the system to start with.
[[ ! -e "$LJ_BASEDIR" ]] && {
syslog "error" "directory no longer exists: $LJ_BASEDIR"
continue
}
is_dir "$LJ_BASEDIR" || continue
# If the new expanded template is different from the old, close and reopen all the logs and queue for compression (if required).
[[ "$LJ_EXPANDED_TEMPLATE" != "$LJ_OLD_TEMPLATE" ]] && {
# Loop through all the open FDs.
for LJ_SITE in "${!LJ_FDS[@]}"; do
# Generate the fully expanded filename from the strftime-expanded template and the site name from the array.
LJ_FILENAME="$LJ_BASEDIR/${LJ_EXPANDED_TEMPLATE//\{\}/$LJ_SITE}"
# Close the file descriptor for the old log file path.
{ exec {LJ_FDS[$LJ_SITE]}>&-; } 2>/dev/null || {
syslog "warn" "failed to close FD ${LJ_FDS[$LJ_SITE]} for $LJ_SITE"
# Don't 'continue' here as we should still be able to open the new log file. But, it'll leave an FD open indefinitely...
}
unset "LJ_FDS[$LJ_SITE]"
# Create (if necessary) and verify new log file dir.
make_dir "${LJ_FILENAME%/*}" || continue
is_dir "${LJ_FILENAME%/*}" || continue
# Open the new log file.
open_fd "$LJ_SITE" "$LJ_FILENAME" || continue
# Fix the now broken symlink - point it to the currently active log file.
[[ "$LJ_LINKFILE" ]] && {
LJ_LINKFILE_EXPANDED="$(printf "%($LJ_LINKFILE)T")"
# Note: This will clobber anything that already exists with the link name.
rm -rf "$LJ_BASEDIR/${LJ_LINKFILE_EXPANDED//\{\}/$LJ_SITE}"
ln -sfr "$LJ_FILENAME" "$LJ_BASEDIR/${LJ_LINKFILE_EXPANDED//\{\}/$LJ_SITE}" 2>/dev/null || {
syslog "error" "failed to fix link: $LJ_BASEDIR/${LJ_LINKFILE_EXPANDED//\{\}/$LJ_SITE}"
}
}
# Add the old log file to the compression jobs task list.
(( LJ_COMPRESS != 0 )) && {
LJ_JOBS+=([$LJ_BASEDIR/${LJ_OLD_TEMPLATE//\{\}/$LJ_SITE}]="")
}
done
}
# If the 'read' did not time out, there must be a log line to write.
(( LJ_TIMED_OUT == 0 )) && {
# If not in raw mode, an unset LJ_LOG_VHOST is an error.
# If in raw mode, we need a placeholder for the LJ_FDS array element as LJ_LOG_VHOST would normally be unset.
if (( LJ_RAW == 0 )); then
[[ ! "$LJ_LOG_VHOST" ]] && {
syslog "error" "empty VirtualHost site identifier"
continue
}
else
LJ_LOG_VHOST="*raw*"
fi
# Generate the fully expanded filename from the strftime-expanded template.
LJ_FILENAME="$LJ_BASEDIR/${LJ_EXPANDED_TEMPLATE//\{\}/$LJ_LOG_VHOST}"
# Create/check the log file directory.
make_dir "${LJ_FILENAME%/*}" || continue
is_dir "${LJ_FILENAME%/*}" || continue
# If no FD is open for the VHOST, open it.
[[ ! "${LJ_FDS[$LJ_LOG_VHOST]}" ]] && {
open_fd "$LJ_LOG_VHOST" "$LJ_FILENAME" || continue
}
# Write the log entry.
printf "%s\n" "$LJ_LOG_DATA" >&"${LJ_FDS[$LJ_LOG_VHOST]}"
# Flush data to disk if requested.
(( LJ_FLUSH == 1 )) && {
sync "$LJ_FILENAME" 2>/dev/null || syslog "warn" "failed to sync: $LJ_FILENAME"
}
# Create symlink to the currently active log file.
[[ "$LJ_LINKFILE" ]] && {
LJ_LINKFILE_EXPANDED="$(printf "%($LJ_LINKFILE)T")"
[[ "$(stat -L --printf="%d:%i" "$LJ_BASEDIR/${LJ_LINKFILE_EXPANDED//\{\}/$LJ_LOG_VHOST}" 2>/dev/null)" != \
"$(stat --printf="%d:%i" "$LJ_FILENAME" 2>/dev/null)" ]] && {
# Note: This will clobber anything that already exists with the link name.
rm -rf "$LJ_BASEDIR/${LJ_LINKFILE_EXPANDED//\{\}/$LJ_LOG_VHOST}"
ln -sfr "$LJ_FILENAME" "$LJ_BASEDIR/${LJ_LINKFILE_EXPANDED//\{\}/$LJ_LOG_VHOST}" 2>/dev/null || {
syslog "error" "failed to create link: $LJ_BASEDIR/${LJ_LINKFILE_EXPANDED//\{\}/$LJ_LOG_VHOST}"
}
}
}
}
# Store the last used filename.
LJ_OLD_TEMPLATE="$LJ_EXPANDED_TEMPLATE"
done