#!/usr/bin/perl

eval 'exec /usr/bin/perl  -S $0 ${1+"$@"}'
    if 0; # not running under some shell

use warnings;
use strict;
use XMLTV::ProgressBar;
use XMLTV::Memoize; XMLTV::Memoize::check_argv('get_octets');
use XMLTV::DST;
use XMLTV::Supplement qw/GetSupplement SetSupplementRoot/;
use XMLTV::Options qw/ParseOptions/;
use XMLTV::Configure::Writer;
use XMLTV::Ask;
use XMLTV::Lineup;
use File::Path;
use File::Basename;
use LWP::UserAgent;
use HTTP::Cache::Transparent;
use Encode qw/decode encode/;
use Date::Manip;
use HTML::Entities;
use IO::Scalar; # used for configuration to write channels to string

##############################################
#################### TODO ####################
##############################################

# - allow renaming of title based on given title and desc
#   (some docu series (Time Shift/Storyville) may drop the series 
#   title and use the episode/subtitle instead as the title.
#   This can break PVR rules.
#
# - revisit handling of DST changeovers to ensure
#   both changeovers are handled appropriately
#   and that attempts are made to handle incorrectly
#   flagged programmes
#
# - if a programme length is wildly wrong, override
#   the ignoring of later programs
#
# - look forward more than 1 programme when
#   looking for and correcting overlapping
#   programmes
#
# - include actor role data where available
#
# - include review details where available

###############################################
################## VARIABLES ##################
###############################################

# Grabber name
my $grabber_name = 'tv_grab_uk_rt';

# Grabber version
my $grabber_cvs_id = '$Id: tv_grab_uk_rt.in,v 1.342 2011/06/19 06:50:36 knowledgejunkie Exp $';
my $grabber_version;

if ($grabber_cvs_id =~ m!\$Id: [^,]+,v (\S+) ([0-9/: -]+)!) {
    $grabber_version = "$1, $2";
}
else {
    $grabber_version = "Unknown";
}

# Location of Radio Times channel index file
my $rt_root_dir = 'http://xmltv.radiotimes.com/xmltv';
my $rt_channels_uri = "$rt_root_dir/channels.dat";

# The format of the Radio Times source data (set to strict UTF-8)
my $source_encoding = "utf-8";
# Default XML output encoding to use (set to strict UTF-8)
my $xml_encoding    = "utf-8";

# Required to be displayed by Radio Times
my $rt_copyright 
      = "\n"
      . "     +-----------------------------------------------------+     \n"
      . "     | In accessing this XML feed, you agree that you will |     \n"
      . "     | only access its contents for your own personal and  |     \n"
      . "     |  non-commercial use and not for any commercial or   |     \n"
      . "     |  other purposes, including advertising or selling   |     \n"
      . "     |  any goods or services, including any third-party   |     \n"
      . "     |   software applications available to the general    |     \n"
      . "     |           public. <xmltv.radiotimes.com>            |     \n"
      . "     +-----------------------------------------------------+     \n"
      . "\n";

my %tv_attributes = (
    'source-info-name'    => 'Radio Times XMLTV Service',
    'source-info-url'     => 'http://www.radiotimes.com',
    'source-data-url'     => "$rt_channels_uri",
    'generator-info-name' => "XMLTV/$XMLTV::VERSION, $grabber_name $grabber_version",
    'generator-info-url'  => 'http://www.xmltv.org',
);

# Reciprocal XMLTV/RT ID hashes for the required channel_ids fields, allowing
# RT ID -> XMLTV ID and XMLTV ID -> RT ID lookups
my (%rt_to_xmltv, %xmltv_to_rt);
# Hashes for the optional channel_ids fields, keyed by XMLTV ID
my (%extra_dn, %icon_urls, %channel_offset, %broadcast_hours, %video_quality);

# Do the progress bars need a final update?
my $need_final_update;

#type id source-data-url generator-info-name generator-info-url
my %xmltv_lineup_attributes = (
    'type'                => 'DVB-T',
    'version'             => '1.00',
    'id'                  => 'freeview.co.uk',
    'source-data-url'     => 'tv_grab_uk_rt FreeView channels',
    'generator-info-name' => "XMLTV/$XMLTV::VERSION, $grabber_name $grabber_version",
    'generator-info-url'  => 'http://www.xmltv.org',
);
# Lineup writer
my $lineup_writer;

# Get default location to store cached listings data
my $default_cachedir = get_default_cachedir();

# Set up LWP::UserAgent
my $ua = LWP::UserAgent->new;
$ua->agent("xmltv/$XMLTV::VERSION");
$ua->env_proxy;

# Read all command line options 
my ( $opt, $conf ) = ParseOptions( {
    grabber_name => "$grabber_name",
    version => "$grabber_cvs_id",
    description => "United Kingdom/Republic of Ireland (Radio Times)",
    capabilities => [qw/baseline manualconfig cache preferredmethod tkconfig apiconfig lineups/],
    stage_sub => \&config_stage,
    listchannels_sub => \&list_channels,
    load_old_config_sub => \&load_old_config,
    preferredmethod => 'allatonce',
    defaults => { days => 15, offset => 0, quiet => 0, debug => 0 },
    list_lineups_sub => \&list_lineups,
    get_lineup_sub => \&get_lineup,
} );


################################################################
# At this point, grabbing routines take over from ParseOptions #
################################################################

# UTC required for XMLTV::DST
Date_Init('TZ=UTC');

die "Error: You cannot specify --quiet with --debug, exiting"
    if ($opt->{quiet} && $opt->{debug});

if (not defined( $conf->{channel} )) {
    print STDERR "No channels selected in configfile " .
                 $opt->{'config-file'} . "\n" .
                 "Please run the grabber with --configure.\n";
    exit 1;
}

# New-style config files must include a cachedir entry
if (not defined( $conf->{cachedir} )) {
    print STDERR "No cachedir defined in configfile " .
                 $opt->{'config-file'} . "\n" .
                 "Please run the grabber with --configure.\n";
    exit 1;
}

# Update encoding if seen in new-style config file
if (defined( $conf->{encoding} )) {
    $xml_encoding = $conf->{encoding}->[0];
}

# Enable title processing? Enable it by default if not explicitly disabled
my $title_processing;
if (defined( $conf->{'title-processing'} )) {
    $title_processing = $conf->{'title-processing'}->[0];
}
else {
    $title_processing = 'enabled';
}

# Initialise the cache-directory
init_cachedir( $conf->{cachedir}->[0] );

# Set cache options
#
# MaxAge set to 15 days as Radio Times provides 14 days of listings
# NoUpdate set to 1hr as Radio Times data updated once per day
#
HTTP::Cache::Transparent::init( {
    BasePath       => $conf->{cachedir}->[0],
    MaxAge         => 15*24,
    NoUpdate       => 60*60,
    Verbose        => $opt->{debug},
    ApproveContent => \&check_content_length,
    }
);

# Variables for programme title manipulation
my $have_title_data = 0;
my %non_title_info;           # key = title, value = title
my %mixed_title_subtitle;     # key = title, value = title
my @mixed_subtitle_title;     # array
my %reversed_title_subtitle;  # key = title, value = title
my %replacement_titles;       # key = old title, value = replacement title
my %replacement_episodes;     # key = title, value = hash (where key = old ep, value = new ep)
my %replacement_cats;         # key = title, value = category
my %replacement_title_eps;    # key = 'old_title . '|' . old_ep', value = (new_title, new_ep)
my %flagged_title_eps;        # key = old_title from title fixup routine 8
my %dotdotdot_titles;         # key = replacement title ending with '...' seen in title fixup routine 8
my %replacement_ep_from_desc; # key = title, value = hash (where key = desc, value = new ep)
my %uc_prog_titles;           # key = title, value = title
my %title_in_subtitle_fixed;  # key = 'title . '|' . episode', value = hashref (keys are title and episode)
my %title_in_subtitle_notfixed; # key = 'title . '|' . episode', value = hashref (keys are title and episode)

# Create global hash to store the programme titles for all programmes on all
# channels, as we will process this last after grabbing to determine any
# 'manufactured' titles which may include temporary 'seasonal' information
my %prog_titles;
# hash to store case/punctuation-insensitive variants of titles
my %case_insens_titles;

# Array to store any overlapping programmes that are detected
my @overlapping_progs;

# Hash to store bad character strings and their replacments that are used when
# processing the source data to remove mis-encoded UTF-8 characters
my %utf8_fixups;

# Create hashes to store names/urls of channels with occurences of mis-encoded
# UTF-8 data after our replacement routines have run
my %hasC27FBFchars;
my %hadEFBFBD;
my %hadC3AFC2BFC2BD;

# Create hashes to store uncategories programmes and available categories
# to potentially use for such programmes
my %uncategorised_progs;
my %categories;
my %cats_per_prog;

# Create hashes to store episode details that may still contain series, episode
# or part numbering after processing to handle these has been carried out
my %possible_series_nums;
my %possible_episode_nums;
my %possible_part_nums;

# Hash to store titles containing text that should likely be removed
my %title_text_to_remove;

# Hash to store details of empty source listings
my %empty_listings;

# Track problems during listings retrieval. Currently we exit(1) only if
# listings data is missing for any requested channels
my $warnings = 0;

# Output XMLTV library and grabber versions
if (!$opt->{quiet}) {
    say("Program/library version information:\n");
    say("XMLTV library version: $XMLTV::VERSION");
    say("$grabber_name version: $grabber_version");
    say("  libwww-perl version: $LWP::VERSION");
    say("  Date::Manip version: " . DateManipVersion(1) . "\n");
}

# Determine the modification time of the source data on the RT servers
my $rt_mod_time = get_mod_time($rt_channels_uri);
if ($rt_mod_time) {
    say("\nSource data last updated on: " . $rt_mod_time . "\n") if (!$opt->{quiet});
    $tv_attributes{'date'} = $rt_mod_time;
}

# Retrieve list of all channels currently available
my $available_channels = load_available_channels();
# Now ensure configured channels are still available to download
my $wanted_chs = check_configured_channels($available_channels);

###############################################
############### GRAB THE DATA #################
###############################################

# Configure output and write XMLTV data - header, channels, listings, and footer
my $writer;
setup_xmltv_writer();
write_xmltv_header();
write_channel_list($available_channels, $wanted_chs);
write_listings_data($available_channels, $wanted_chs);
write_xmltv_footer();

# Print out optional debug info for titles, bad utf-8 chars and categories
if ($opt->{debug}) {
    if ($title_processing eq 'enabled') {
        print_titles_with_colons();
        print_titles_with_hyphens();
        print_new_titles();

        print_uc_titles_post();
        print_title_variants();
        print_titles_inc_years();
        print_flagged_title_eps();
        print_dotdotdot_titles();
        print_title_in_subtitle();

        print_categories();
        print_uncategorised_progs();
        print_cats_per_prog();
    }
    
    print_possible_prog_numbering();
    print_overlapping_progs();
    
    print_misencoded_utf8_data();
    print_empty_listings();
}

# Give a useful exit status if data for some channels was not downloaded
if ($warnings) {
    if (!$opt->{quiet}) {
        say("\nFinished, but listings for some channels are missing. " .
            "Check error log.\n");
    }
    exit(1);
}
else {
    if (!$opt->{quiet}) {
        say("\nFinished!\n");
    }
    exit(0);
}

###############################################
################ SUBROUTINES ##################
###############################################

# Convenience debugging method
sub t {
    my ($message) = @_;
    if ($opt->{debug}) {
        print STDERR $message . "\n";
    }
}

# Convenience method for use with XMLTV::Memoize. Only return content
# after a successful response. We require access to the raw octets via
# $resp->content in order to be able to process the data for double and
# mis-encoded UTF-8 content. Calling $resp->decoded_content or using
# LWP::Simple::get() (versions of LWP >=5.827) would not permit this.
sub get_octets {
    my $resp = $ua->get(shift @_);
    if ($resp->is_error) {
        return undef;
    }
    else {
        return $resp->content;
    }
}

# Get the last-modified time of a successful HTTP Response object. Return
# undef on error
sub get_mod_time {
    my $resp = $ua->get(shift @_);
    if ($resp->is_error) {
        return undef;
    }
    else {
        return $resp->header('Last-Modified');
    }
}

# Return the digit equivalent of its word, i.e. "one" -> "1",
# or return the word if it appears to consist of only digits
sub word_to_digit {

    my $word = shift;
    return undef if !defined $word;
    return $word if $word =~ m/\d+/;
    for (lc $word) { 
        if    (m/^one$/)       { return 1 }
        elsif (m/^two$/)       { return 2 }
        elsif (m/^three$/)     { return 3 }
        elsif (m/^four$/)      { return 4 }
        elsif (m/^five$/)      { return 5 }
        elsif (m/^six$/)       { return 6 }
        elsif (m/^seven$/)     { return 7 }
        elsif (m/^eight$/)     { return 8 }
        elsif (m/^nine$/)      { return 9 }
        elsif (m/^ten$/)       { return 10 }
        elsif (m/^eleven$/)    { return 11 }
        elsif (m/^twelve$/)    { return 12 }
        elsif (m/^thirteen$/)  { return 13 }
        elsif (m/^fourteen$/)  { return 14 }
        elsif (m/^fifteen$/)   { return 15 }
        elsif (m/^sixteen$/)   { return 16 }
        elsif (m/^seventeen$/) { return 17 }
        elsif (m/^eighteen$/)  { return 18 }
        elsif (m/^nineteen$/)  { return 19 }
        elsif (m/^twenty$/)    { return 20 }
        # handle 1-8 in roman numberals
        elsif (m/^i$/)         { return 1 }
        elsif (m/^ii$/)        { return 2 }
        elsif (m/^iii$/)       { return 3 }
        elsif (m/^iv$/)        { return 4 }
        elsif (m/^v$/)         { return 5 }
        elsif (m/^vi$/)        { return 6 }
        elsif (m/^vii$/)       { return 7 }
        elsif (m/^viii$/)      { return 8 }
        # return undef if input unhandled
        else                  { return undef }
    }
}

# Display required copyright message from Radio Times
sub display_copyright {
    say("$rt_copyright");
}

sub get_default_cachedir {
    my $winhome = undef;
    if (defined $ENV{HOMEDRIVE} && defined $ENV{HOMEPATH}) {
        $winhome = $ENV{HOMEDRIVE} . $ENV{HOMEPATH};
    }
        
    my $home = $ENV{HOME} || $winhome || ".";
    my $dir = "$home/.xmltv/cache";
    t("Using '$dir' as cache-directory for XMLTV listings");
    return $dir;
}

sub init_cachedir {
    my $path = shift @_;
    if (! -d $path) {
        if (mkpath($path)) {
            t("Created cache-directory '$path'");
        }
        else {
            die "Error: Failed to create cache-directory $path: $@, exiting";
        }
    }
}

# Check whether data files on the RT website are empty but still online, or
# contain HTML/XML from the Radio Times' error page.
#
# These files will have a good HTTP response header as they exist, but they
# contain no data. Caching via HCT without checking for a non-zero content_size
# beforehand will therefore overwrite good data with bad. Any file having a
# content_length of 0 or seen to contain DOCTYPE info will not be cached and the 
# existing cached copy of the file will be used instead.
#
# Support for this functionality requires using at least the 1.0 version of
# HTTP::Cache::Transparent, which can be obtained from CPAN.
#
sub check_content_length {
    my $rt_file = shift @_;
    if ($rt_file->is_success) {
        # reject an empty (but available) file
        if ($rt_file->content_length == 0) {
            return 0;
        }
        # an empty source file containing only the RT disclaimer has a length
        # of approx 300 bytes
        elsif ($rt_file->content_length < 400) {
            return 0;
        }
        # reject a likely HTML error page
        elsif ($rt_file->content =~ m/DOCTYPE/) {
            return 0;
        }
        # cache a likely good file
        else {
            return 1;
        }
    }
    # reject file if retrieval failed
    else {
        return 0;
    }
}

sub config_stage {
    my ( $stage, $conf ) = @_;

    my $result;
    my $writer = new XMLTV::Configure::Writer( OUTPUT => \$result,
                                               encoding => $xml_encoding );

    $writer->start( { grabber => "$grabber_name" } );
    
    if ($stage eq 'start') {

        $writer->start_selectone( {
            id => 'encoding',
            title => [ [ 'Encoding', 'en' ] ],
            description => [
                [ "Select which output format to use",
                'en' ] ],
        } );
        $writer->write_option( {
            value => 'utf-8',
            text => [ [ 'UTF-8 (Unicode)', 'en' ] ],
        } );
        $writer->write_option( {
            value => 'iso-8859-1',
            text => [ [ 'ISO-8859-1 (Latin-1)', 'en' ] ],
        } );
        $writer->end_selectone();

        $writer->end('select-cachedir');
    }
    elsif ($stage eq 'select-cachedir') {
        $writer->write_string( {
            id => 'cachedir',
            title => [ [ 'Enter the directory to store the listings cache in', 'en' ] ],
            description => [
                [ "$grabber_name uses a cache with files that it has already " .
                'downloaded. Please specify where the cache shall be stored.',
                'en' ] ],
            default => $default_cachedir,
        } );
        $writer->end('select-title-processing');
    }
    elsif ($stage eq 'select-title-processing') {

        $writer->start_selectone( {
            id => 'title-processing',
            title => [ [ 'Enable title processing?', 'en' ] ],
            description => [
                [ "In a bid to provide more consistent listings data, $grabber_name " .
                'can further process programme and episode titles.',
                'en' ] ],
        } );
        $writer->write_option( {
            value => 'enabled',
            text => [ [ 'Enable title processing', 'en' ] ],
        } );
        $writer->write_option( {
            value => 'disabled',
            text => [ [ 'Disable title processing', 'en' ] ],
        } );
        $writer->end_selectone();

        $writer->end('select-postcode');
    }
    elsif ($stage eq 'select-postcode') {

        $writer->write_string( {
            id => 'postcode',
            title => [ [ 'Enter the first part of your postcode', 'en' ] ],
            description => [
                [ "$grabber_name can use the first part of your postcode in " .
                'order to determine which regional channels to display ' .
                'during configuration. Republic of Ireland users should enter ' .
                'the pseudo-postcode "EIRE". Entering "none" will disable this ' .
                'feature.',
                'en' ] ],
            default => 'none',
        } );
        $writer->end('select-platform');
    }
    elsif ($stage eq 'select-platform') {
        $writer->start_selectone( {
            id => 'platform',
            title => [ [ 'Select which TV platform you use', 'en' ] ],
            description => [
                [ "When choosing which channels to download listings for, $grabber_name " .
                'can show only those channels available on your TV platform.',
                'en' ] ],
        } );
        $writer->write_option( {
            value => 'none',
            text => [ [ 'None of these, I\'ll choose channels manually', 'en' ] ],
        } );
        $writer->write_option( {
            value => 'freeview',
            text => [ [ 'Freeview', 'en' ] ],
        } );
        $writer->write_option( {
            value => 'skydigital',
            text => [ [ 'Sky Digital', 'en' ] ],
        } );
        $writer->write_option( {
            value => 'virgintv',
            text => [ [ 'VirginMedia TV', 'en' ] ],
        } );
        $writer->write_option( {
            value => 'freesat',
            text => [ [ 'Freesat', 'en' ] ],
        } );
        $writer->write_option( {
            value => 'freesatfromsky',
            text => [ [ 'Freesat from Sky', 'en' ] ],
        } );
        $writer->write_option( {
            value => 'analogue',
            text => [ [ 'Analogue', 'en' ] ],
        } );
        $writer->end_selectone();

        # The select-channels stage must be the last stage called
        $writer->end( 'select-channels' );
    }
    else {
        die "Unknown stage $stage";
    }
    
    return $result;
}

sub list_channels {
    my ( $conf, $opt ) = @_;

    my $channels = load_available_channels();
    my $platform = $conf->{platform}->[0];
    my $postcode = $conf->{postcode}->[0];

    # During configuration or listing available channels, we can also filter
    # those channels which are available on the user's TV platform.
    #
    # If we find a usable platform identifier, we retrieve details on which 
    # channels are available on that platform, and remove those which are not 
    # available from the list of available channels
    if (defined $platform && lc $platform ne 'none') {
        # Retrieve hash of matched/unmatched channels 
        my $platform_chans = get_channels_by_platform($platform);

        # Have we matched any channels?
        if (defined $platform_chans->{matched} 
                && scalar @{ $platform_chans->{matched} } >= 1) {
            # Flag the matched channels as being available
            foreach my $matched_chan (@{ $platform_chans->{matched} } ) {
                $channels->{$matched_chan}{available} = 1;
            }
            # Remove channels not flagged as available from %channels hash
            foreach my $chan (keys %{$channels}) {
                if (defined $channels->{$chan}{available} 
                                && $channels->{$chan}{available} == 1) {
                    delete $channels->{$chan}{available};
                }
                else {
                    delete $channels->{$chan};
                }
            }
        }
    }

    # During configuration or listing available channels, we can only include
    # regional channels which are available in the configured postcode area.
    #
    # If we find a configured postcode, we retrieve details on which channels
    # are available, and remove those which are not available from the list of
    # available channels
    if (defined $postcode && lc $postcode ne 'none') {
        # Retrieve hash of matched/unmatched channels 
        my $reg_chans = get_channels_by_postcode($postcode);

        # Have we matched any channels?
        if (defined $reg_chans->{matched} 
                && scalar @{ $reg_chans->{matched} } >= 1) {
            # Remove the ummatched regional channels from the %channels hash
            foreach my $unmatched_chan (@{ $reg_chans->{unmatched} } ) {
                delete $channels->{$unmatched_chan};
            }
        }
    }

    my $result="";
    my $fh = new IO::Scalar \$result;
    my $oldfh = select( $fh );

    my %g_args = (OUTPUT => $fh);

    # Write XMLTV to $result, rather than STDOUT
    my $writer = new XMLTV::Writer(%g_args, encoding => $xml_encoding);
    $writer->start(\%tv_attributes);

    # It is perhaps better to sort the list of available channels by
    # display-name, rather than xmltv_id. First create a hash to store the
    # id->name mapping
    my %chan_id_to_name;

    # Only add the non-RT sourced timeshifted channels during configuration,
    # otherwise the configuration could include both Radio Times-sourced
    # timeshifted data, and the timeshifted data we create internally from a
    #regular +0 channel
    my $chan_name;
    foreach my $chan_id (keys % {$channels}) {
        $chan_name = $channels->{$chan_id}->{'display-name'}->[0]->[0];
        if ($chan_name !~ m/\(RT\)$/) {
            $chan_id_to_name{$chan_id} = $chan_name;
        }
    }

    # Create a sorted list of xmltv_ids in ascending order of the
    # corresponding display name (case-insensitive)
    my @chan_ids = sort {uc($chan_id_to_name{$a}) cmp uc($chan_id_to_name{$b})}
                        keys %chan_id_to_name;

    foreach my $channel (@chan_ids) {
        delete $channels->{$channel}{'rt_id'};
        $writer->write_channel( $channels->{$channel} );
    }
    
    $writer->end;
    select( $oldfh );
    $fh->close();

    return $result;
}

sub load_old_config {
    my ( $config_file ) = @_;
 
    if (!$opt->{quiet}) {
        say("Using old-style config file");
    }

    my @config_entries = XMLTV::Config_file::read_lines( $config_file );

    my $conf = {};
    # Use default cachedir as there was no support for choosing an alternative
    # cache directory before ParseOptions support was added to the grabber.
    $conf->{cachedir}->[0] = $default_cachedir;
    $conf->{channel} = [];

    CONFIG_ENTRY:
    foreach my $config_entry (@config_entries)
    {
        next CONFIG_ENTRY if (!defined $config_entry);
        next CONFIG_ENTRY if ($config_entry =~ m/^#/ || $config_entry =~ m/^\s*$/);
        if ($config_entry !~ m/^channel\s+(\S+)$/) {
            if (!$opt->{quiet}) {
                say("Bad line '$config_entry' in config file, skipping");
            }
            next CONFIG_ENTRY;
        }

        my( $command, $param ) = split( /\s+/, $config_entry, 2 );
        $param =~ tr/\n\r//d;
        $param =~ s/\s+$//;

        # We only support channel entries in the old-style config
        if ($command =~ m/^\s*channel\s*$/) {
            push @{$conf->{channel}}, $param;
        }
        else {
            die "Unknown command '$command' in config file $config_file"
        }
    }

    return $conf;
}

# Determine all currently available channels by reading the current Radio
# Times list of channels, and adding additional information from the
# grabber's channel_ids file. The content of both of these files is
# required in order to proceed with listings retrieval.
#
sub load_available_channels {
    # First we read in the XMLTV channel_ids file to provide additional
    # information (icon, display name) about available channels, and also
    # provide the information necessary for timeshifted and part-time channel
    # support.
    #
    # We use the hashes %rt_to_xmltv and %xmltv_to_rt to lookup the Radio 
    # Times and XMLTV channel IDs. These will deal sensibly with a new RT
    # channel that isn't yet mentioned in the channel_ids file.
    
    # Provide statistics for the number of usable, unusable, timeshifted,
    # part-time, and part-time timeshifted channels listed in channel_ids.
    my $num_good_channels = 0;
    my $num_bad_channels = 0;
    my $num_ts_channels = 0;
    my $num_pt_channels = 0;
    my $num_pt_ts_channels = 0;

    # Retrieve grabber's channel_ids file via XMLTV::Supplement
    my $xmltv_channel_ids = GetSupplement("$grabber_name", 'channel_ids');

    die "Error: XMLTV channel_ids data is missing, exiting" 
        if (!defined $xmltv_channel_ids || $xmltv_channel_ids eq '');

    my @lines = split /[\n\r]+/, $xmltv_channel_ids;

    t("\nExtended XMLTV channel information:\n");

    XMLTV_CHANID_ENTRY:
    foreach my $line (@lines) {
        # Skip blank lines. Comments are allowed if they are at the start 
        # of the line.
        next XMLTV_CHANID_ENTRY if ($line =~ m/^#/ || $line =~ m/^$/);
        my @fields = split /\|/, $line;
        # We need at least 2 fields (xmltv_id,rt_id) to run the grabber.
        # No check on maximum number of fields to support future updates
        # to channel_ids now we are using XMLTV::Supplement.
        if (scalar @fields < 2) {
            t("Wrong number of fields in XMLTV channel_ids entry:\n"
                    . "\t" . $line);
            next XMLTV_CHANID_ENTRY;
        }

        # The channel_ids fields are:
        # 1) XMLTV ID
        # 2) RT ID
        # 3) Channel name
        # 4) Channel icon URL
        # 5) Timeshift offset
        # 6) Broadcast hours
        # 7) Video quality
        #
        # The RT channels.dat provides a channel name, but it may be out of
        # date - here we provide an alternative or updated name if the 
        # channel name has changed
        my ($xmltv_id, $rt_id,          $extra_dn, 
            $icon_url, $channel_offset, $broadcast_hours,
            $video_quality) = @fields;

        # Flag timeshifted and part-time channels for stats
        my ($is_timeshift, $is_parttime);

        # Check for required XMLTV ID and RT ID fields, skip if missing
        if (!defined $xmltv_id  || $xmltv_id eq '') {
            t("Undefined XMLTV ID seen in channel_ids, skipping");
            next XMLTV_CHANID_ENTRY;
        }
        if ($xmltv_id !~ m/\w+\.\w+.*/) {
            t("Invalid XMLTV ID seen in channel_ids, skipping");
            next XMLTV_CHANID_ENTRY;
        }
        if (!defined $rt_id || $rt_id eq '') {
            t("Undefined RT ID seen in channel_ids, skipping");
            next XMLTV_CHANID_ENTRY;
        }
        if ($rt_id !~ m/^\d+$/) {
            t("Invalid RT ID seen in channel_ids, skipping");
            next XMLTV_CHANID_ENTRY;
        }
        
        # Check for duplicate RT IDs having same associated XMLTV ID. As part of
        # timeshifted/part-time channel support, we associate the same RT ID
        # with different XMLTV IDs
        foreach my $id (@{$rt_to_xmltv{$rt_id}}) {
            if (defined $id && $id eq $xmltv_id) {
                t("Radio Times ID '$rt_id' already seen in XMLTV " 
                  . "channel_ids file, skipping");
                next XMLTV_CHANID_ENTRY;
            }
        }

        # Check whether current XMLTV ID has already been seen
        if (defined $xmltv_to_rt{$xmltv_id}) {
            t("XMLTV ID '$xmltv_id' already seen in XMLTV channel_ids file, skipping");
            next XMLTV_CHANID_ENTRY;
        }

        # Store the XMLTV channel description, report if it is missing
        if (defined $extra_dn) {
            if ($extra_dn eq '' || $extra_dn !~ m/\w+/) {
                $extra_dn = undef;
                if (defined $extra_dn && $extra_dn !~ m/\(Do\ Not\ Use\)/) {
                    t("No XMLTV channel name associated with '$xmltv_id'");
                }
            }
            else {
                $extra_dn{$xmltv_id} = $extra_dn;
            }
        }
        
        # Check for channel icon
        if (defined $icon_url) {
            if ($icon_url eq '' || $icon_url !~ m/^http/) {
                $icon_url = undef;
                if (defined $extra_dn && $extra_dn !~ m/\(Do\ Not\ Use\)/) {
                    t("No channel icon associated with '$xmltv_id'");
                }
            }
            else {
                $icon_urls{$xmltv_id} = $icon_url;
            }
        }
        
        # Check for valid timeshift offset
        if (defined $channel_offset) {
            if ($channel_offset eq '' || $channel_offset !~ m/^(\+|\-)/) {
                $channel_offset = undef;
            }
            else {
                $channel_offset{$xmltv_id} = $channel_offset;
                if (defined $extra_dn && $extra_dn !~ m/\(Do\ Not\ Use\)/) {
                    t("Channel '$xmltv_id' has timeshift of '$channel_offset'");
                }
                $is_timeshift = 1;
            }
        }
        
        # Check for correct broadcast hours format (HHMM-HHMM)
        if (defined $broadcast_hours) {
            if ($broadcast_hours eq '' || $broadcast_hours !~ m/\d{4}-\d{4}/) {
                $broadcast_hours = undef;
            }
            else {
                $broadcast_hours{$xmltv_id} = $broadcast_hours;
                if (defined $extra_dn && $extra_dn !~ m/\(Do\ Not\ Use\)/) {
                    t("Channel '$xmltv_id' is on air '$broadcast_hours'");
                }
                $is_parttime = 1;
            }
        }

        # Check for presence of video quality information (SDTV or HDTV)
        if (defined $video_quality) {
            if ($video_quality eq '' || $video_quality !~ m/SDTV|HDTV/) {
                $video_quality = undef;
            }
            else {
                $video_quality{$xmltv_id} = $video_quality;
            }
        }

        # Handle multiple XMLTV IDs associated with a single RT ID. Required
        # after introduction of timeshifted and part-time channel support,
        # which map multiple XMLTV IDs to a single RT ID.
        push @{$rt_to_xmltv{$rt_id}}, $xmltv_id;
        $xmltv_to_rt{$xmltv_id} = $rt_id;

        # Update the counts of part-time and timeshifted channels
        if ($is_timeshift && $is_parttime) {
            $num_pt_ts_channels++;
        }
        elsif ($is_timeshift) {
            $num_ts_channels++;
        }
        elsif ($is_parttime) {
            $num_pt_channels++;
        }

        # Finally, update count of good/bad channels
        if ($extra_dn =~ m/\(Do\ Not\ Use\)/) {
            $num_bad_channels++;
        }
        else {
            $num_good_channels++;
        }
    }
    t("\n");
    # channel_ids processing finished

    die "Error: No usable XMLTV channel definitions seen in channel_ids, exiting"
        if (!defined $num_good_channels || $num_good_channels < 1);


    # Read in the Radio Times channels.dat file, which is supplied in UTF-8
    # format. We process the list of available channels and check for 
    # presence of duplicate IDs or names.
    #
    # Grab the octets
    t("Retrieving channel list from Radio Times website");
    my $rt_channels_dat = get_octets( $rt_channels_uri );

    die "Error: Radio Times channels.dat data is missing, exiting\n"
        . "Please check $rt_channels_uri"
        if (!defined $rt_channels_dat || $rt_channels_dat eq '');
        
    # Decode source UTF-8 octets, process for HTML entities, and encode 
    # into configured output encoding
    my $decoded_rt_channels_dat;
    t("\nDecoding channel data from $source_encoding octets into Perl's internal format");
    $decoded_rt_channels_dat = decode($source_encoding, $rt_channels_dat);
    t("Processing for HTML entities seen in the channel data");
    decode_entities($decoded_rt_channels_dat);
    t("Encoding channel data from Perl's internal format into $xml_encoding octets\n");
    $rt_channels_dat = encode($xml_encoding, $decoded_rt_channels_dat);

    my @rt_channels = split /\n/, $rt_channels_dat;
    my $num_rt_channels = scalar @rt_channels;

    $need_final_update = 0;
    my $chans_bar;
    if (!$opt->{quiet} && !$opt->{debug}) {
        $chans_bar = new XMLTV::ProgressBar({name   => 'Retrieving channels',
                                             count  => $num_rt_channels,
                                             ETA    => 'linear', });
    }

    # Hash to store details for <channel> elements
    my %channels;
    my (%seen_rt_id, %seen_name);
    my $num_good_rt_channels = 0;
    my $to_say = "";

    RT_CHANDAT_ENTRY:
    foreach my $rt_channel (@rt_channels) {
        chomp $rt_channel;
        # ignore empty line and disclaimer at start of file
        if ($rt_channel =~ m/^\s*$/ || $rt_channel =~ /^In accessing this XML feed/) {
            next RT_CHANDAT_ENTRY;
        }

        if ($rt_channel !~ m/^(\d+)\|(.+)/) {
            t("Bad entry '$rt_channel' seen in RT channels.dat, skipping");
            next RT_CHANDAT_ENTRY;
        }

        my ($rt_id, $rt_name) = ($1, $2);
        if ($seen_rt_id{$rt_id}++) {
            t("Duplicate channnel ID '$rt_id' seen in RT channels.dat, skipping");
            next RT_CHANDAT_ENTRY;
        }

        if ($seen_name{$rt_name}++) {
            t("Duplicate channel name '$rt_name' seen in RT channels.dat, skipping");
            next RT_CHANDAT_ENTRY;
        }
        
        # Check whether there is at least one XMLTV ID associated with the RT ID
        #
        # If the current RT channel has a known XMLTV ID, check it against known bad
        # channels and skip it if required. If the channel does not have an 
        # XMLTV ID, create one and continue.
        #
        my $xmltv_id = $rt_to_xmltv{$rt_id}[0];
        if (defined $xmltv_id) {
            # Skip any RT entries which have been flagged as bad in channel_ids file
            if ($extra_dn{ $rt_to_xmltv{$rt_id}[0] } =~ m/\(Do\ Not\ Use\)/) {
                t("Channel '$rt_name' ($rt_id) flagged as bad, skipping");
                $need_final_update = 1;
                next RT_CHANDAT_ENTRY;
            }
        }
        else {
            # Handle new channels available on RT site unknown to channel_ids file
            $to_say .= "Unknown channel '$rt_name'. Will configure as 'C$rt_id.radiotimes.com'\n";
            push @{$rt_to_xmltv{$rt_id}}, "C$rt_id.radiotimes.com";
        }

        foreach my $id (@{$rt_to_xmltv{$rt_id}}) {
            # Use a name for the channel if defined in our channel_ids file,
            # otherwise use the name supplied by the Radio Times.
            my @names = ();
            if (defined $extra_dn{$id}) {
                @names = ([ $extra_dn{$id} ]);
            }
            else {
                @names = ([ $rt_name ]);
            }

            # Add a URL for a channel icon if available.
            my @icon;
            my $icon_url = $icon_urls{$id};
            if ($icon_url) {
                @icon = { 'src' => $icon_url };
            }

            # Add the channel's details to the %channels hash, adding icon
            # details if available.
            if (@icon) {
                $channels{$id} = {
                    id             => $id,
                    rt_id          => $rt_id,
                    'display-name' => \@names,
                    'icon'         => \@icon,
                };
            }
            else {
                $channels{$id} = {
                    id             => $id,
                    rt_id          => $rt_id,
                    'display-name' => \@names,
                };
            }
        }
        # We have a usable channel definition at this point
        $num_good_rt_channels++;

        # Update the progres bar by one increment
        if (defined $chans_bar) {
            $chans_bar->update();
        }
    }

    die "Error: No usable Radio Times channel definitions available, exiting"
        if ($num_good_rt_channels < 1);

    if (defined $chans_bar) {
        # Only update the progress bar to 100% if we need to
        if ($need_final_update) {
            $chans_bar->update($num_rt_channels);
        }
        $chans_bar->finish();
        if (!$opt->{quiet}) {
            say( "\n" );
        }
    }

    if (!$opt->{quiet} && $to_say) {
        say( $to_say );
        say("\n  Please notify the maintainer to get the new channels added");
    }

    # Output statistics on the number of channels currently available
    if (!$opt->{quiet}) {
        say("\nThe Radio Times has usable data available for $num_good_rt_channels channels which we\n"
            . "can use to generate TV listings for regular and some timeshifted\n"
            . "channels. The tv_grab_uk_rt software also has support for an additional\n"
            . "$num_ts_channels timeshifted, $num_pt_channels part-time, and $num_pt_ts_channels part-time timeshifted channels\n"
            . "based on the Radio Times data.\n\n"
            . "In total, tv_grab_uk_rt currently supports $num_good_channels channels.\n");
    }

    # Report any channels listed in channel_ids not seen on the Radio Times
    # site
    if (!$opt->{quiet}) {
        XMLTV_ID:
        foreach my $xmltv_id (keys %xmltv_to_rt) {
            # Ignore channels flagged as bad in channel_ids
            next XMLTV_ID if ($extra_dn{$xmltv_id} =~ m/.*Do\ Not\ Use.*/);
            if (!exists $channels{$xmltv_id}) {
                say("XMLTV channel '$xmltv_id' ($xmltv_to_rt{$xmltv_id}) " 
                   . "not seen on RT site\n");
            }
        }
    }

    return \%channels;
}

# Check that the requested channels are available from the Radio Times
#
sub check_configured_channels {
    my ( $available_channels ) = @_;
    
    # List of channel IDs that we can download listings for after checking
    # $opt->{channel} against the current RT/XMLTV channel list.
    my @wanted_chs;

    t("Reading config file channel entries");

    WANTED_CHAN:
    foreach my $chan_id (@{$conf->{channel}}) {
        t("  Read channel '$chan_id'");
        if (!exists ${$available_channels}{$chan_id}) {
            if (!$opt->{quiet}) {
                say("  Configured channel '$chan_id' is unavailable");
            }
            next WANTED_CHAN;
        }
        push @wanted_chs, $chan_id;
    }
    my $num_req_chans = scalar @wanted_chs;
    die "Error: No configured channels are available, exiting"
        if (!defined $num_req_chans || $num_req_chans < 1);
    t("Finished reading $num_req_chans configured channels");

    return \@wanted_chs;
}

# Retrieve and process mappings on channel ID to postcodes for regional 
# channels.
#
# Takes a postcode as an argument and returns i) a list of matching XMLTV IDs
# and ii) a list of the remaining regional XMLTV IDs that were not matched
#
sub get_channels_by_postcode {
    my $conf_postcode = shift;

    my $xmltvids_postcodes = GetSupplement("$grabber_name", 'regional_channels_by_postcode');

    die "Error: XMLTV regional_channels_by_postcode data is missing, exiting" 
        if (!defined $xmltvids_postcodes || $xmltvids_postcodes eq '');

    my @lines = split /[\n\r]+/, $xmltvids_postcodes;

    # Hash to hold matched and unmatched XMLTV IDs
    my %reg_chans;

    XMLTV_POSTCODE_ENTRY:
    foreach my $line (@lines) {
        # Skip blank lines. Comments are allowed if they are at the start 
        # of the line.
        next XMLTV_POSTCODE_ENTRY if ($line =~ m/^#/ || $line =~ m/^$/);
        my @fields = split /\|/, $line;
        # We need 2 fields (xmltv_id,postcodes).
        if (scalar @fields != 2) {
            t("Wrong number of fields in XMLTV regional_channels_by_postcode entry:\n"
                    . "\t" . $line);
            next XMLTV_POSTCODE_ENTRY;
        }
        my ( $xmltv_id, $postcodes ) = @fields;

        # Check for required XMLTV ID and postcode fields, skip if missing
        if (!defined $xmltv_id  || $xmltv_id eq '') {
            t("Undefined XMLTV ID seen in regional_channels_by_postcode, skipping");
            next XMLTV_POSTCODE_ENTRY;
        }
        if ($xmltv_id !~ m/\w+\.\w+.*/) {
            t("Invalid XMLTV ID seen in regional_channels_by_postcode, skipping");
            next XMLTV_POSTCODE_ENTRY;
        }
        if (!defined $postcodes || $postcodes eq '') {
            t("Undefined postcode entry seen in regional_channels_by_postcode, skipping");
            next XMLTV_POSTCODE_ENTRY;
        }
        if (defined $xmltv_to_rt{$xmltv_id}) {
            t("Channel '$xmltv_id' has region-specific information available");
        }
        else {
            t("Channel '$xmltv_id' is not available, skipping ");
            next XMLTV_POSTCODE_ENTRY;
        }

        my @postcodes = split /,/, $postcodes;
        foreach my $postcode (@postcodes) {
            if (uc $postcode eq uc $conf_postcode) {
                push @{$reg_chans{matched}}, $xmltv_id;
                # match made, process next channel
                next XMLTV_POSTCODE_ENTRY;
            }
        }
        # No match made, so add to unmatched list
        push @{ $reg_chans{unmatched} }, $xmltv_id;
    }

    return \%reg_chans;
}

# Retrieve and process mappings on channel ID to TV platforms.
#
# Takes a platform ID as an argument and returns i) a list of matching XMLTV IDs
# and ii) a list of the remaining regional XMLTV IDs that were not matched
#
sub get_channels_by_platform {
    my $conf_platform = shift;

    my $xmltvids_platforms = GetSupplement("$grabber_name", 'channels_platforms');

    die "Error: XMLTV channels_platforms data is missing, exiting" 
        if (!defined $xmltvids_platforms || $xmltvids_platforms eq '');

    my @lines = split /[\n\r]+/, $xmltvids_platforms;

    # Hash to hold matched and unmatched XMLTV IDs
    my %platform_chans;

    XMLTV_PLATFORM_ENTRY:
    foreach my $line (@lines) {
        # Skip blank lines. Comments are allowed if they are at the start 
        # of the line.
        next XMLTV_PLATFORM_ENTRY if ($line =~ m/^#/ || $line =~ m/^$/);
        my @fields = split /\|/, $line;
        # We need 2 fields (xmltv_id,platform(s)).
        if (scalar @fields != 2) {
            t("Wrong number of fields in XMLTV channels_platforms entry:\n"
                    . "\t" . $line);
            next XMLTV_PLATFORM_ENTRY;
        }
        my ( $xmltv_id, $platforms ) = @fields;

        # Check for required XMLTV ID and platform fields, skip if missing
        if (!defined $xmltv_id  || $xmltv_id eq '') {
            t("Undefined XMLTV ID seen in channels_platforms, skipping");
            next XMLTV_PLATFORM_ENTRY;
        }
        if ($xmltv_id !~ m/\w+\.\w+.*/) {
            t("Invalid XMLTV ID seen in channels_platforms, skipping");
            next XMLTV_PLATFORM_ENTRY;
        }
        if (!defined $platforms || $platforms eq '') {
            t("Undefined platform entry seen in channels_platforms, skipping");
            next XMLTV_PLATFORM_ENTRY;
        }
        if (defined $xmltv_to_rt{$xmltv_id}) {
            t("Channel '$xmltv_id' has platform information available");
        }
        else {
            t("Channel '$xmltv_id' is not available, skipping ");
            next XMLTV_PLATFORM_ENTRY;
        }

        my @platforms = split /,/, $platforms;
        foreach my $platform (@platforms) {
            if (uc $platform eq uc $conf_platform) {
                push @{$platform_chans{matched}}, $xmltv_id;
                # match made, process next channel
                next XMLTV_PLATFORM_ENTRY;
            }
        }
        # No match made, so add to unmatched list
        push @{ $platform_chans{unmatched} }, $xmltv_id;
    }

    return \%platform_chans;
}

# Determine options for, and create XMLTV::Writer object
sub setup_xmltv_writer {
    # output options
    my %g_args = ();
    if (defined $opt->{output}) {
        t("\nOpening XML output file '$opt->{output}'\n");
        my $fh = new IO::File ">$opt->{output}";
        die "Error: Cannot write to '$opt->{output}', exiting" if (!$fh);
        %g_args = (OUTPUT => $fh);
    }

    # Determine how many days of listings are required and range-check, applying
    # default values if impossible. If --days or --offset is specified we must
    # ensure that values for days, offset and cutoff are passed to XMLTV::Writer
    my %d_args = ();
    if (defined $opt->{days} || defined $opt->{offset}) {
        if (defined $opt->{days}) {
            if ($opt->{days} < 1 || $opt->{days} > 15) {
                if (!$opt->{quiet}) {
                    say("Specified --days option is not possible (1-15). "
                      . "Retrieving all available listings.");
                }
                $opt->{days} = 15
            }
        }
        else {
            $opt->{days} = 15;
        }

        if (defined $opt->{offset}) {
            if ($opt->{offset} < 0 || $opt->{offset} > 14) {
                if (!$opt->{quiet}) {
                    say("Specified --offset option is not possible (0-14). "
                      . "Retrieving all available listings.");
                }
                $opt->{offset} = 0;
            }
        }
        else {
            $opt->{offset} = 0;
        }
        $d_args{days} = $opt->{days};
        $d_args{offset} = $opt->{offset};
        # We currently don't provide a --cutoff option
        $d_args{cutoff} = "000000";
    }

    t("Setting up XMLTV::Writer using \"" . $xml_encoding . "\" for output");
    $writer = new XMLTV::Writer(%g_args, %d_args, encoding => $xml_encoding);
}

sub write_xmltv_header {
    t("Writing XMLTV header");
    $writer->start(\%tv_attributes);
}

sub write_channel_list {
    my ( $available_channels, $wanted_chs ) = @_;
    
    t("Started writing <channel> elements");
    foreach my $chan_id (@{$wanted_chs}) {
        my %h = %{ ${$available_channels}{$chan_id} };
        delete $h{rt_id};
        $writer->write_channel(\%h);
    }
    t("Finished writing <channel> elements");
}

# Read in the prog_titles_to_process file
sub load_prog_titles_to_process {
    my $prog_titles_to_process = undef;
    # Retrieve prog_titles_to_process via XMLTV::Supplement
    $prog_titles_to_process 
            = GetSupplement("$grabber_name", 'prog_titles_to_process');

    if (defined $prog_titles_to_process) {
        my @prog_titles_to_process = split /[\n\r]+/, $prog_titles_to_process;

        t("\nTitle processing information:\n");
        PROG_TITLE_ENTRY:
        foreach my $line (@prog_titles_to_process) {
            # Comments are allowed if they are at the start of the line
            next PROG_TITLE_ENTRY if ($line =~ m/^#/);
            my @fields = split /\|/, $line;
            # Each entry requires 2 fields
            if (scalar @fields != 2) {
                t("Wrong number of fields in XMLTV prog_titles_to_process entry:\n"
                        . "\t" . $line);
                next PROG_TITLE_ENTRY;
            }
            # The prog_titles_to_process fields are:
            # 1) procesing code
            # 2) title/non-title text to process
            #
            my ($code, $process_text) = @fields;
            if (!defined $code || $code eq '' || $code !~ m/\d+/) {
                t("Invalid title processing code: " . $line . "'");
                next PROG_TITLE_ENTRY;
            }

            if (!defined $process_text || $process_text eq '' 
                                       || $process_text !~ m/\w+/) {
                t("Invalid title processing text: " . $line . "'");
                next PROG_TITLE_ENTRY;
            }

            my $idx_char = lc(substr ($process_text, 0, 1));
            
            # processing codes are documented in prog_titles_to_process file
            if ($code eq '1')  {
                push @{$non_title_info{$idx_char}}, $process_text;
                t("[1] Will remove '" . $process_text . "' from title " 
                  . "if found");
                $have_title_data = 1;
                next PROG_TITLE_ENTRY;
            }
            elsif ($code eq '2') {
                push @{$mixed_title_subtitle{$idx_char}}, $process_text;
                t("[2] Will check for subtitle after title for '" 
                  . $process_text . "'");
                $have_title_data = 1;
                next PROG_TITLE_ENTRY;
            }
            elsif ($code eq '3') {
                push @mixed_subtitle_title, $process_text;
                t("[3] Will check for subtitle before title for '" 
                  . $process_text . "'");
                $have_title_data = 1;
                next PROG_TITLE_ENTRY;
            }
            elsif ($code eq '4') {
                push @{$reversed_title_subtitle{$idx_char}}, $process_text;
                t("[4] Will check for reversed title/subtitle for '" 
                  . $process_text . "'");
                $have_title_data = 1;
                next PROG_TITLE_ENTRY;
            }
            elsif ($code eq '5') {
                my @fields = split( /~/, $process_text, 2);
                if (scalar @fields != 2) {
                    t("[5] Invalid number of fields (need 2) in processing text: '" . $process_text . "'");
                    next PROG_TITLE_ENTRY;
                }
                my( $old_title, $new_title ) = @fields;
                $replacement_titles{$old_title} = $new_title;
                t("[5] Will check for inconsistent title '" 
                  . $old_title . "'");
                $have_title_data = 1;
                next PROG_TITLE_ENTRY;
            }
            elsif ($code eq '6') {
                my @fields = split( /~/, $process_text, 2);
                if (scalar @fields != 2) {
                    t("[6] Invalid number of fields (need 2) in processing text: '" . $process_text . "'");
                    next PROG_TITLE_ENTRY;
                }
                my( $uncat_title, $cat ) = @fields;
                $replacement_cats{$uncat_title} = $cat;
                t("[6] Will assign title '" . $uncat_title 
                    . "' to category '" . $cat . "'");
                $have_title_data = 1;
                next PROG_TITLE_ENTRY;
            }
            elsif ($code eq '7') {
                my @fields = split( /~/, $process_text, 3);
                if (scalar @fields != 3) {
                    t("[7] Invalid number of fields (need 3) in processing text: '" . $process_text . "'");
                    next PROG_TITLE_ENTRY;
                }
                my( $ep_title, $old_ep, $new_ep ) = @fields;
                $replacement_episodes{$ep_title}->{$old_ep} = $new_ep;
                t("[7] Will check for inconsistent episode data '" . $old_ep 
                    . "' for title '" . $ep_title . "'");
                $have_title_data = 1;
                next PROG_TITLE_ENTRY;
            }
            elsif ($code eq '8') {
                my @fields = split( /~/, $process_text, 4);
                if (scalar @fields != 4) {
                    t("[8] Invalid number of fields (need 4) in processing text: '" . $process_text . "'");
                    next PROG_TITLE_ENTRY;
                }
                foreach my $field (@fields) {
                    $field = "" if !defined $field;
                }
                my( $old_title, $old_ep, $new_title, $new_ep ) = @fields;
                if ($old_title eq '' or $new_title eq '') {
                    t("[8] Ignoring fixup '" . $process_text . "' as old/new title not given");
                    next PROG_TITLE_ENTRY;
                }
                if ($old_title eq $new_title) {
                    t("[8]   Old/new title are the same - change to a type 7 title fixup: '" . $process_text . "'");
                }
                # remember old title so that we can output a debug list of
                # programmes that may also need to be handled via this fixup
                $flagged_title_eps{$old_title} = $old_title;
                
                my $key = ("" . $old_title . "|" . $old_ep);
                $replacement_title_eps{$key} = [$new_title, $new_ep];
                t("[8] Will update old title/subtitle '" . $old_title . ": " . $old_ep 
                    . "' to '" . $new_title . ": " . $new_ep . "'");

                # store titles that are being corrected with an existing "some title..." fixup
                # store the title without a leading "The" or "A" or the trailing "..." for later matching
                if ($new_title =~ m/^(?:The\s+|A\s+)?(.*)\.\.\.$/) {
                    $dotdotdot_titles{$1} = $new_title;
                }
                
                $have_title_data = 1;
                next PROG_TITLE_ENTRY;
            }
            elsif ($code eq '9') {
                my @fields = split( /~/, $process_text, 3);
                if (scalar @fields != 3) {
                    t("[9] Invalid number of fields (need 3) in processing text: '" . $process_text . "'");
                    next PROG_TITLE_ENTRY;
                }
                my( $title, $episode, $desc ) = @fields;
                $replacement_ep_from_desc{$title}->{$desc} = $episode;
                t("[9] Will update subtitle to '" . $episode . "' for title '" . $title 
                    . "' based on given description '" . $desc . "'");
                $have_title_data = 1;
                next PROG_TITLE_ENTRY;
            }
            else {
                t("Unknown code seen in prog_titles_to_process file," 
                  . " skipping entry '" . $line . "'");
                next PROG_TITLE_ENTRY;
            }
        }
    }
    else {
        if (!$opt->{quiet}) {
            say("Disabling title processing, no information found.");
        }
    }
    if (!$opt->{quiet}) {
        say("\n");
    }
}

# Read in the utf8_fixups file
sub load_utf8_fixups {
    my $utf8_fixups = undef;
    # Retrieve utf8_fixups via XMLTV::Supplement
    $utf8_fixups 
            = GetSupplement("$grabber_name", 'utf8_fixups');

    if (defined $utf8_fixups) {
        my @utf8_fixups = split /[\n\r]+/, $utf8_fixups;

        t("\nLoading UTF-8 fixups\n");
        UTF8_FIXUP_ENTRY:
        foreach my $line (@utf8_fixups) {
            # Comments are allowed if they are at the start of the line
            next UTF8_FIXUP_ENTRY if ($line =~ m/^#/);
            my @fields = split /\|/, $line;
            # Each entry requires 2 fields
            if (scalar @fields != 2) {
                t("Wrong number of fields in XMLTV UTF-8 fixup entry:\n"
                        . "\t" . $line);
                next UTF8_FIXUP_ENTRY;
            }
            
            # The utf8_fixups fields are:
            # 1) bad utf-8 characters to find and replace (as hex)
            # 2) the replacement characters (as hex)
            #
            my ($bad_chars, $replacement) = @fields;

            if (!defined $bad_chars || $bad_chars eq '') {
                t("Invalid UTF-8 fixup regex: '" . $line . "'");
                next UTF8_FIXUP_ENTRY;
            }
            if (!defined $replacement || $replacement eq '') {
                t("Invalid UTF-8 fixup replacement: '" . $line . "'");
                next UTF8_FIXUP_ENTRY;
            }
            # ignore unknown fixup formats
            if ($bad_chars !~ m/\\xEF\\xBF\\xBD/ 
                    && $bad_chars !~ m/\\xC3\\xAF\\xC2\\xBF\\xC2\\xBD/ 
                    && $bad_chars !~ m/^\\xC2\\x[8-9][0-9A-F]$/) {
                t("Ignoring UTF-8 fixup: '" . $line . "'");
                next UTF8_FIXUP_ENTRY;
            }
            # Remove the \x chars read from the file leaving a simple hex string
            # containing only [0-9A-F] chars
            $replacement =~ s/\\x//g;
            # Now convert each byte (2 hex chars) into its character equivalent
            $replacement =~ s/([0-9A-F][0-9A-F])/chr(hex($1))/eig;

            # Create hashes to store each type of fixup separately. This should
            # improve processing speed by restricting number of fixups checked.
            if ($bad_chars =~ m/\\xEF\\xBF\\xBD/) {
                $utf8_fixups{'EFBFBD'}{$bad_chars} = $replacement;
            }
            elsif ($bad_chars =~ m/\\xC3\\xAF\\xC2\\xBF\\xC2\\xBD/) {
                $utf8_fixups{'C3AFC2BFC2BD'}{$bad_chars} = $replacement;
            }
            elsif ($bad_chars =~ m/^\\xC2\\x[8-9][0-9A-F]$/) {
                $utf8_fixups{'C2809F'}{$bad_chars} = $replacement;
            }
                
            # Process the regex to get a character string to print. We use
            # the preserved hex string during processing
            my $bad_chars_chr = $bad_chars;
            $bad_chars_chr =~ s/\\x//g;
            $bad_chars_chr =~ s/([0-9A-F][0-9A-F])/chr(hex($1))/eig;
            t("UTF-8 fixup: will replace \"" . $bad_chars_chr . "\" with \"" 
                    . $replacement . "\" if seen");
            next UTF8_FIXUP_ENTRY;
        }
    }
    else {
        if (!$opt->{quiet}) {
            say("No additional UTF-8 fixups were found.");
        }
    }
    if (!$opt->{quiet}) {
        say("\n");
    }
}

# Tidy up any bad characters in the source data. Although the data is provided
# as UTF-8, the text may contain mis-encoded UTF-8 characters or the NULL 
# or other extraneous characters which should be corrected where possible.
#
sub process_utf8_fixups($$$) {

    # read in the data to be processed, a descriptive name and a URI for it
    my $page = shift;
    my $rt_name = shift;
    my $rt_listings_uri = shift;

    t("  Checking '$rt_name' listings data for bad UTF-8 chars...");
    for ($page) {
        # Programme entries containing RT reviews or updated information
        # may have erroneous CR+SP characters which we tidy up here
        #
        t("    Looking for CR+SP characters...");
        if (s/\x0D\x20//g) {
            t("      Removed CR+SP characters from '$rt_name' listings data");
        }

        # Fix double-encoded UTF-8 characters (4 bytes)
        # =============================================
        #
        # The ISO-8859-1 charset contains 256 codepoints (0x00-0xFF). When 
        # encoded into UTF-8, either 1 or 2 bytes are required to encode these 
        # characters as follows:
        # 
        # ISO-8859-1           UTF-8        Chars in    Bytes      Notes
        #    range         byte(s) range     Range     Required
        #
        #  0x00-0x1F     [00]-[1F]             32         1        Non-printing
        #  0x20-0x7F     [20]-[7F]             96         1        Printing
        #  0x80-0x9F     [C2][80]-[C2][9F]     32         2        Non-printing
        #  0xA0-0xBF     [C2][A0]-[C2][BF]     32         2        Printing
        #  0xC0-0xFF     [C3][80]-[C3][BF]     64         2        Printing
        #
        # A double-encoded UTF-8 character that uses 4 bytes (but should use 
        # only 2 if properly encoded) uses the first 2 bytes to contain the 
        # UTF-8 representation of the first byte of the proper UTF-8 
        # representation of the character, and the second 2 bytes to contain 
        # the UTF-8 representation of the second byte.
        #
        # E.g.:
        #
        # The data contains a double-encoded UTF-8 encoding of the A-grave 
        # character using 4 bytes. The correct UTF-8 encoding of this character 
        # is [C3][80]. The data actually contains the 4 bytes [C3][83][C2][80]. 
        # [C3][83] is the UTF-8 encoding of [C3], and [C2][80] is the UTF-8 
        # encoding of [80]. We therefore replace this 4-byte double-encoding 
        # with [C3][80] which is valid UTF-8 and can be successfully encoded 
        # into other character encodings if required.
        #
        # The range of Unicode codepoints encoded into 2 bytes in UTF-8 lie in the 
        # range [C2-DF][80-BF].
        #
        # http://en.wikipedia.org/wiki/ISO/IEC_8859-1
        # http://en.wikipedia.org/wiki/UTF-8
        # http://www.eki.ee/letter/
        #
        t("    Looking for double-encoded UTF-8 characters...");
        if (m/[\xC3][\x82-\x83][\xC2][\x80-\xBF]/) {
            # first capture each set of double-encoded UTF-8 bytes 
            # (4 in total, 2 for each "real" UTF-8 char) into a list
            my @double_bytes = ($page =~ m/[\xC3][\x82-\x83][\xC2][\x80-\xBF]/g);

            # get a unique list of the different doubly encoded bytes
            my %unique_double_bytes;
            foreach(@double_bytes) {
                $unique_double_bytes{$_} = $_;
            }
            # Get a list of unique 4-byte sequences
            @double_bytes = sort values %unique_double_bytes;
            foreach (@double_bytes) {
                t("      Found double-encoded bytes: " . $_);
            }
            # process the list, reading 2 pairs of bytes in each iteration
            foreach (@double_bytes) {
                /([\xC3][\x82-\x83])([\xC2][\x80-\xBF])/;
                my $badbytes_1 = $1;
                my $badbytes_2 = $2;
                # convert each pair of bytes from UTF-8 to ISO-8859-1 to get a single 
                # byte from the original pair
                my $goodbyte_1 = encode("iso-8859-1", decode("utf-8", $badbytes_1) );
                my $goodbyte_2 = encode("iso-8859-1", decode("utf-8", $badbytes_2) );
                # finally globally replace each group of 4 bad bytes with 
                # the 2 correct replacement bytes
                $page =~ s/$badbytes_1$badbytes_2/$goodbyte_1$goodbyte_2/g;
                t("      Replaced bad bytes '" . $badbytes_1 . $badbytes_2 
                                . "' with good bytes '" . $goodbyte_1 . $goodbyte_2 . "'");
            }
        }

        # Fix double-encoded UTF-8 General Punctuation characters (6 bytes)
        # =================================================================
        #
        # Occasionally in the listings we see double-encoded characters from
        # the Unicode General Punctuation range of characters. When encoded 
        # into UTF-8 these characters should require 3 bytes. However, when 
        # double-encoded they take 6 bytes. During their handling we replace 
        # them with their ASCII equivalents which are how the characters are 
        # usually included in the listings.
        #
        t("    Looking for double-encoded UTF-8 General Punctuation characters...");
        if (m/[\xC3][\xA2][\xC2][\x80-\x81]/) {
            t("      Replaced double-encoded 6-byte UTF-8 General Punctuation chars");
            s/\xC3\xA2\xC2\x80\xC2\x90/\x2D/g; # <2D> -> -
            s/\xC3\xA2\xC2\x80\xC2\x91/\x2D/g; # <2D> -> -
            s/\xC3\xA2\xC2\x80\xC2\x92/\x2D/g; # <2D> -> -
            s/\xC3\xA2\xC2\x80\xC2\x93/\x2D/g; # <2D> -> -
            s/\xC3\xA2\xC2\x80\xC2\x94/\x2D/g; # <2D> -> -
            s/\xC3\xA2\xC2\x80\xC2\x95/\x2D/g; # <2D> -> -
            s/\xC3\xA2\xC2\x80\xC2\x98/\x27/g; # <27> -> '
            s/\xC3\xA2\xC2\x80\xC2\x99/\x27/g; # <27> -> '
            s/\xC3\xA2\xC2\x80\xC2\x9A/\x27/g; # <27> -> '
            s/\xC3\xA2\xC2\x80\xC2\x9C/\x22/g; # <22> -> "
            s/\xC3\xA2\xC2\x80\xC2\x9D/\x22/g; # <22> -> "
            s/\xC3\xA2\xC2\x80\xC2\x9E/\x22/g; # <22> -> "
            s/\xC3\xA2\xC2\x80\xC2\x9F/\x22/g; # <22> -> "
            s/\xC3\xA2\xC2\x80\xC2\xA6/\x2E\x2E\x2E/g; # <2E><2E><2E> -> ...
        }

        # Fix mis-encoded UTF-8 characters (6/8 bytes)
        # ============================================
        #
        # Frequently seen in the data (especially in film listings) are completely 
        # mis-encoded sequences of UTF-8 characters. Each sequence of bad bytes 
        # starts with a correctly encoded 2 byte UTF-8 character but it then
        # followed by 2 or 3 mis-encoded ASCII-range characters. When encoded into
        # UTF-8 these ASCII chars should take 1 byte each, but in this situation
        # use 2 bytes which then fail to decode or display correctly.
        #

        # This fixup looks for mis-encoded character sequences in the range 
        # [C3][A0-AF][C2][80-BF][C2][80-BF] (6 bytes)
        #
        t("    Looking for mis-encoded [C3][A0-AF] bytes...");
        if (m/[\xC3][\xA0-\xAF][\xC2][\x80-\xBF][\xC2][\x80-\xBF]/) {
            # first capture each sequence of mis-encoded UTF-8 bytes 
            # (6 in total)
            my @misencoded_bytes = 
                    ($page =~ m/[\xC3][\xA0-\xAF][\xC2][\x80-\xBF][\xC2][\x80-\xBF]/g);
            # get a unique list of the different mis-encoded byte sequences
            my %unique_misencoded_bytes;
            MIS_ENC_BYTE:
            foreach (@misencoded_bytes) {
                # the Unicode Replacement Character is handled below, so ignore here
                # (when double-encoded, it will match the regex above)
                if (m/\xC3\xAF\xC2\xBF\xC2\xBD/) {
                    t("      Ignoring double-encoded Unicode Replacement Character (handled separately)");
                    next MIS_ENC_BYTE;
                }
                $unique_misencoded_bytes{$_} = $_;
            }
            # Get a new list of the unique 6-byte sequences
            @misencoded_bytes = sort values %unique_misencoded_bytes;
            foreach (@misencoded_bytes) {
                t("      Found mis-encoded bytes: " . $_);
            }
            # process the list, reading 4 bytes in each iteration. Bytes
            # 1 and 2 are correct and left untouched, bytes 4 and 6 are 
            # extracted and corrected before being output
            foreach (@misencoded_bytes) {
                /([\xC3][\xA0-\xAF])[\xC2]([\x80-\xBF])[\xC2]([\x80-\xBF])/;
                my $goodbytes = $1; # correct, and used in replacement
                my $badbyte_1 = $2; # incorrect byte value
                my $badbyte_2 = $3; # incorrect byte value
                # the bad bytes require 0x40 (DEC 64) to be subtracted from the char 
                # value. 0xA0 are a special case and always converted to regular
                # space char (0x20)
                my $goodbyte_1;
                if ($badbyte_1 !~ m/\xA0/) {
                    $goodbyte_1 = chr( (ord $badbyte_1) - 64);
                }
                else {
                    $goodbyte_1 = "\x20";
                }
                my $goodbyte_2;
                if ($badbyte_2 !~ m/\xA0/) {
                    $goodbyte_2 = chr( (ord $badbyte_2) - 64);
                }
                else {
                    $goodbyte_2 = "\x20";
                }
                # finally globally replace each sequence of bad bytes with 
                # the correct replacement bytes
                $page =~ s/$_/$goodbytes$goodbyte_1$goodbyte_2/g;
                t("      Replaced mis-encoded [C3][A0-AF] bytes '" . $_
                                . "' with bytes '" 
                                . $goodbytes . $goodbyte_1 . $goodbyte_2 . "'");
            }
        }

        # This fixup looks for mis-encoded character sequences in the range 
        # [C3][B0-BF][C2][80-BF][C2][80-BF][C2][80-BF] (8 bytes)
        #
        t("    Looking for mis-encoded [C3][B0-BF] bytes...");
        if (m/[\xC3][\xB0-\xBF][\xC2][\x80-\xBF][\xC2][\x80-\xBF][\xC2][\x80-\xBF]/) {
            # first capture each sequence of mis-encoded UTF-8 bytes 
            # (8 in total)
            my @misencoded_bytes = 
                    ($page =~ m/[\xC3][\xB0-\xBF][\xC2][\x80-\xBF][\xC2][\x80-\xBF][\xC2][\x80-\xBF]/g);
            # get a unique list of the different mis-encoded byte sequences
            my %unique_misencoded_bytes;
            foreach(@misencoded_bytes) {
                $unique_misencoded_bytes{$_} = $_;
            }
            # Get a new list of the unique 8-byte sequences
            @misencoded_bytes = sort values %unique_misencoded_bytes;
            foreach (@misencoded_bytes) {
                t("      Found mis-encoded bytes: " . $_);
            }
            # process the list, reading 5 bytes in each iteration. Bytes
            # 1 and 2 are correct and left untouched, bytes 4, 6 and 8 are 
            # extracted and corrected before being output
            foreach (@misencoded_bytes) {
                /([\xC3][\xB0-\xBF])[\xC2]([\x80-\xBF])[\xC2]([\x80-\xBF])[\xC2]([\x80-\xBF])/;
                my $goodbytes = $1; # correct, and used in replacement
                my $badbyte_1 = $2; # incorrect byte value
                my $badbyte_2 = $3; # incorrect byte value
                my $badbyte_3 = $4; # incorrect byte value
                # the bad bytes require 0x40 (DEC 64) to be subtracted from the char 
                # value. 0xA0 are a special case and always converted to regular
                # space char (0x20)
                my $goodbyte_1;
                if ($badbyte_1 !~ m/\xA0/) {
                    $goodbyte_1 = chr( (ord $badbyte_1) - 64);
                }
                else {
                    $goodbyte_1 = "\x20";
                }
                my $goodbyte_2;
                if ($badbyte_2 !~ m/\xA0/) {
                    $goodbyte_2 = chr( (ord $badbyte_2) - 64);
                }
                else {
                    $goodbyte_2 = "\x20";
                }
                my $goodbyte_3;
                if ($badbyte_3 !~ m/\xA0/) {
                    $goodbyte_3 = chr( (ord $badbyte_3) - 64);
                }
                else {
                    $goodbyte_3 = "\x20";
                }
                # finally globally replace each sequence of bad bytes with 
                # the correct replacement bytes
                $page =~ s/$_/$goodbytes$goodbyte_1$goodbyte_2$goodbyte_3/g;
                t("      Replaced mis-encoded [C3][B0-BF] bytes '" . $_
                                . "' with bytes '" 
                                . $goodbytes . $goodbyte_1 . $goodbyte_2 . $goodbyte_3 . "'");
            }
        }

        # Manual Replacements
        # ===================
        #
        # Here we replace specific sequences of characters seen in the source 
        # data that cannot be handled automatically above. These include
        # occurences of the Unicode Replace Character (single and double
        # encoded) and other mis-encoded characters.
        # 
        # We use a supplemental file to store these fixups to allow updating
        # without needing to update the grabber itself.
        #
        # Unicode Replacement Character (U+FFFD)
        # ======================================
        #
        # The UTF-8 source data may also contain the bytes [EF][BF][BD] which 
        # are the UTF-8 encoding of the Unicode Replacement Character U+FFFD.
        # It is likely that these are introduced during preparation of the 
        # listings data by the Radio Times, as any characters that cannot be 
        # understood are replaced by this character.
        #
        t("    Looking for Unicode Replacement Character...");
        if (m/\xEF\xBF\xBD/) {
            if (%utf8_fixups && exists $utf8_fixups{'EFBFBD'}) {
                foreach my $bad_chars (keys %{$utf8_fixups{'EFBFBD'}}) {
                    my $replacement = $utf8_fixups{'EFBFBD'}{$bad_chars};
                    # Search for the regex string and replace with char string
                    if ($page =~ s/$bad_chars/$replacement/g) {
                        t("      Replaced Unicode Replacement Character with \"" 
                                . $replacement . "\"");
                    }
                }
            }
            if ($page =~ s/\xEF\xBF\xBD/\x3F/g) {
                t("    After fixups, data for '$rt_name' still contains Unicode "
                        . "Replacement character. Replaced with \"?\"\n");
                $hadEFBFBD{$rt_name} = $rt_listings_uri;
            }
        }

        # Double-encoded Unicode Replacement Character (6 bytes)
        # ======================================================
        #
        # The correct encoding for the Unicode Replacement Character is 
        # [EF][BF][BD], however it has been seen double-encoded in the listings 
        # data as [C3][AF][C2][BF][C2][BD]. As with the normal replacement 
        # character, there is no way to determine which replacement character 
        # to use in this case, so we substitute a '?' char if we cannot handle 
        # the specific occurence. This error needs to have been seen at least 
        # once in source data to be able to construct a suitable fixup.
        #
        t("    Looking for double-encoded Unicode Replacement Character...");
        if (m/\xC3\xAF\xC2\xBF\xC2\xBD/) {
            if (%utf8_fixups && exists $utf8_fixups{'C3AFC2BFC2BD'}) {
                foreach my $bad_chars (keys %{$utf8_fixups{'C3AFC2BFC2BD'}}) {
                    my $replacement = $utf8_fixups{'C3AFC2BFC2BD'}{$bad_chars};
                    # Search for the regex string and replace with char string
                    if ($page =~ s/$bad_chars/$replacement/g) {
                        t("      Replaced double-encoded Unicode Replacement Character with \"" 
                                . $replacement . "\"");
                    }
                }
            }
            if ($page =~ s/\xC3\xAF\xC2\xBF\xC2\xBD/\x3F/g) {
                t("    After fixups, data for '$rt_name' still contains "
                       . "double-encoded Unicode Replacement character. "
                       . "Replaced with \"?\"\n");
                $hadC3AFC2BFC2BD{$rt_name} = $rt_listings_uri;
            }
        }

        # Mis-encoded characters in range [C2][80-9F]
        # ===========================================
        #
        # Single characters that are seen in the source data as bytes in the
        # range [C2][80-9F] that UTF-8 decode as non-printing characters
        # instead of their intended character.
        #
        t("    Looking for mis-encoded characters in range [C2][80-9F]...");
        if (m/\xC2[\x80-\x9F]/) {
            if (%utf8_fixups && exists $utf8_fixups{'C2809F'}) {
                foreach my $bad_chars (keys %{$utf8_fixups{'C2809F'}}) {
                    my $replacement = $utf8_fixups{'C2809F'}{$bad_chars};
                    # Search for the regex string and replace with char string
                    if ($page =~ s/$bad_chars/$replacement/g) {
                        t("      Replaced mis-encoded character with \"" 
                                . $replacement . "\"");
                    }
                }
            }
            if ($page =~ m/\xC2[\x80-\x9F]/g) {
                t("    After fixups, data for '$rt_name' still contains " 
                        . "characters in range [C2][7F-9F]\n");
                $hasC27FBFchars{$rt_name} = $rt_listings_uri;
            }
        }

        # Replacements for specific strings seen in source data
        # =====================================================
        #
        t("    Looking for specific strings to replace...");
        # Replacement for Pound Sterling symbol seen as {pound}
        if (s/\x7B\x70\x6F\x75\x6E\x64\x7D/\xC2\xA3/g) {
            t("      Replaced \"{pound}\" with Pound Sterling symbol");
        }
        # Replace any non-breaking (NBSP) space chars with regular spaces
        if (s/\xC2\xA0/\x20/g) {
            t("      Replaced non-breaking spaces with regular spaces");
        }

        # Finally, remove any remaining non-printing control characters (keep 
        # \t \n and \r). Refer to above table for ISO-8859-1 and UTF-8 Unicode 
        # encodings for chars.
        #
        # First, chars in UTF-8 range [00-1F] (ISO-8859-1 range [00-1F])
        if (s/[\x00-\x08\x0B-\x0C\x0E-\x1F]//g) {
            t("    Removed non-printing characters (range [00]-[1F]) from "
                    . "'$rt_name' listings data");
        }
        # Next, chars in UTF-8 range [C2][7F-9F] (ISO-8859-1 range [7F-9F])
        if (s/[\xC2][\x7F-\x9F]//g) {
            t("    Removed non-printing characters (range [C2][7F-9F]) from "
                    . "'$rt_name' listings data");
        }
    }

    return $page;
}

# Remove non-title text found in programme title. This text is placed at the 
# start of the 'real' title, separated from it by a colon.
#
# Text to try and match against the programme title is stored in a hash of arrays
# to shortcut the list of possible matches to those beginning with the same
# first character as the title. It would seem to be quicker to use a regex
# to match some amount of text up to colon character in the programme title,
# and then use a hash lookup against the matched text. However, there is no
# limit to the number of colons in the text to remove, so this approach cannot
# be used. NOTE: the method is used for several of the title consistency
# routines in order to speed up processing.
# 
sub process_non_title_info($) {
    my $prog = shift;
    
    if ($have_title_data && %non_title_info && $prog->{'_title'} =~ m/:/) {
        my $idx_char = lc(substr $prog->{'_title'}, 0, 1);
        NON_TITLE_TEXT:
        foreach my $non_title_info (@{$non_title_info{$idx_char}}) {
            if ($prog->{'_title'} =~ s/^(\Q$non_title_info\E)\s*:\s*//i) {
                t("  Removed '" . $non_title_info 
                  . "' from title. New title '" . $prog->{'_title'} . "'");
                last NON_TITLE_TEXT;
            }
        }
    }
}

# Some given programme titles contain both the title and episode data, 
# separated by a colon ($title:$episode) or a hyphen ($title - $episode). 
# Here we reassign the episode to the $episode element, leaving only the 
# programme's title in the $title element
#
sub process_mixed_title_subtitle($) {
    my $prog = shift;
    
    if ($have_title_data && %mixed_title_subtitle && $prog->{'_title'} =~ m/:|-/) {
        my $idx_char = lc(substr $prog->{'_title'}, 0, 1);
        MIXED_TITLE_SUBTITLE:
        foreach my $mixed_title_subtitle (@{$mixed_title_subtitle{$idx_char}}) {
            if ($prog->{'_title'} =~ m/^(\Q$mixed_title_subtitle\E)\s*(?::|-)\s*(.*)/) {
                # store the captured text
                my $new_title = $1;
                my $new_episode = $2;
                $prog->{'_titles_processed'} = 1;
                if (!defined $prog->{'_episode'}) {
                    t("  Moved '" . $new_episode . "' to sub-title,"
                      . " new title is '" . $new_title . "'");
                    $prog->{'_title'} = $new_title;
                    $prog->{'_episode'} = $new_episode;
                    last MIXED_TITLE_SUBTITLE;
                }
                elsif ($prog->{'_episode'} eq $new_episode) {
                    t("  Sub-title '" . $new_episode . "' seen in "
                      . "title already exists, new title is '"
                      . $new_title . "'");
                    $prog->{'_title'} = $new_title;
                    last MIXED_TITLE_SUBTITLE;
                }
                # concat subtitle after any episode numbering (x/y)
                elsif ($prog->{'_episode'} =~ m/^\d+\s*(?:&\d+)?\s*\/\s*\d+\s*(?:\.|\/|,|;|:)?\s*-?\s*(?:series\s*(\w+)\s*(?:;|\.)?)?\s*$/) {
                    t("  Concatenating sub-title '" . $new_episode 
                      . "' seen in title after existing episode numbering '" 
                      . $prog->{'_episode'} . "'");
                    $prog->{'_title'} = $new_title;
                    $prog->{'_episode'} = $prog->{'_episode'} . ": " . $new_episode;
                    last MIXED_TITLE_SUBTITLE;
                }
                else {
                    t("  Concatenating sub-title '" . $new_episode 
                      . "' seen in title with existing episode info '" 
                      . $prog->{'_episode'} . "'");
                    $prog->{'_title'} = $new_title;
                    $prog->{'_episode'} = $new_episode . ": " . $prog->{'_episode'};
                    last MIXED_TITLE_SUBTITLE;
                }
            }
        }
    }
}

# Some given programme titles contain both the episode and title data, 
# separated by a colon ($episode:$title) or a hyphen ($episode - $title). 
# Here we reassign the episode to the $episode element, leaving only the 
# programme's title in the $title element
#
sub process_mixed_subtitle_title($) {
    my $prog = shift;
    
    if ($have_title_data && @mixed_subtitle_title && $prog->{'_title'} =~ m/:|-/) {
        MIXED_SUBTITLE_TITLE:
        foreach my $mixed_subtitle_title (@mixed_subtitle_title) {
            if ($prog->{'_title'} =~ m/^(.*)\s*(?::|-)\s*(\Q$mixed_subtitle_title\E)/) {
                # store the captured text
                my $new_title = $2;
                my $new_episode = $1;
                $prog->{'_titles_processed'} = 1;
                if (!defined $prog->{'_episode'}) {
                    t("  Moved '" . $new_episode . "' to sub-title, " 
                      . "new title is '" . $new_title . "'");
                    $prog->{'_title'} = $new_title;
                    $prog->{'_episode'} = $new_episode;
                    last MIXED_SUBTITLE_TITLE;
                }
                elsif ($prog->{'_episode'} eq $new_episode) {
                    t("  Identical sub-title '" . $new_episode 
                      . "' also seen in title, new title is '" 
                      . $new_title . "'");
                    $prog->{'_title'} = $new_title;
                    last MIXED_SUBTITLE_TITLE;
                }
                # concat subtitle after any episode numbering (x/y)
                elsif ($prog->{'_episode'} =~ m/^\d+\s*(?:&\d+)?\s*\/\s*\d+\s*(?:\.|\/|,|;|:)?\s*-?\s*(?:series\s*(\w+)\s*(?:;|\.)?)?\s*$/) {
                    t("  Concatenating sub-title '" . $new_episode 
                      . "' seen in title after existing episode numbering '" 
                      . $prog->{'_episode'} . "'");
                    $prog->{'_title'} = $new_title;
                    $prog->{'_episode'} = $prog->{'_episode'} . ": " . $new_episode;
                    last MIXED_SUBTITLE_TITLE;
                }
                else {
                    t("  Concatenating sub-title '" . $new_episode 
                      . "' seen in title with existing episode info '" 
                      . $prog->{'_episode'} . "'");
                    $prog->{'_title'} = $new_title;
                    $prog->{'_episode'} = $new_episode . ": " . $prog->{'_episode'};
                    last MIXED_SUBTITLE_TITLE;
                }
            }
        }
    }
}

# Listings for some programmes may have reversed title and sub-title information
# ($title = 'real' episode and $episode = 'real' title. In order to create more 
# consistent data, we check for flagged programme titles and reverse the given 
# title and sub-title when found.
#
sub process_reversed_title_subtitle($) {
    my $prog = shift;
    
    if ($have_title_data && %reversed_title_subtitle && defined $prog->{'_episode'}) {
        my $idx_char = lc(substr $prog->{'_title'}, 0, 1);
        REVERSED_TITLE_SUBTITLE:
        foreach my $reversed_title_subtitle (@{$reversed_title_subtitle{$idx_char}}) {
            if ($reversed_title_subtitle eq $prog->{'_episode'}) {
                t("  Seen reversed title-subtitle for '" 
                  . $prog->{'_title'} . ":" . $prog->{'_episode'} . "' - reversing" );
                $prog->{'_episode'} = $prog->{'_title'};
                $prog->{'_title'} = $reversed_title_subtitle;
                t("  New title is '" . $prog->{'_title'} . "' and new " 
                  . "sub-title is '" . $prog->{'_episode'} . "'");
                $prog->{'_titles_processed'} = 1;
                last REVERSED_TITLE_SUBTITLE;
            }
        }
    }
}

# Process inconsistent titles, replacing any flagged bad titles with good 
# titles. A straightforward hash lookup against the programme title is used.
#
sub process_replacement_titles($) {
    my $prog = shift;
    if ($have_title_data && %replacement_titles) {
        my $bad_title = $prog->{'_title'};
        if (defined $replacement_titles{$bad_title}) {
            $prog->{'_title'} = $replacement_titles{$bad_title};
            t("  Replaced title '" . $bad_title . "' with '"
              . $prog->{'_title'} . "' for consistency");
            $prog->{'_titles_processed'} = 1;
        }
    }
}

# Process inconsistent episodes. The %replacement_episodes data structure 
# is a hash of hashes.
#
sub process_replacement_episodes($) {
    my $prog = shift;
    
    if ($have_title_data && %replacement_episodes && defined $prog->{'_episode'}) {
        my $bad_episode_title = $prog->{'_title'};
        my $bad_episode = $prog->{'_episode'};
        # First, check whether we have matched the programme title
        if (defined $replacement_episodes{$bad_episode_title}) {
            # Now look for a specific episode match for the title
            if (defined $replacement_episodes{$bad_episode_title}->{$bad_episode}) {
                $prog->{'_episode'} = $replacement_episodes{$bad_episode_title}->{$bad_episode};
                t("  Replaced episode info '" . $bad_episode . "' for title '" 
                  . $bad_episode_title . "' with '" . $prog->{'_episode'} . "' for consistency");
                $prog->{'_subtitles_processed'} = 1;
            }
        }
    }
}

# Allow arbitrary replacement of one title/episode pair with another.
# Intended to be used where previous title/episode replacement routines
# do not allow the desired correction (i.e. for one-off changes).
#
# *** THIS MUST BE USED WITH CARE! ***
#
sub process_replacement_titles_episodes($) {
    my $prog = shift;

    if ($have_title_data && %replacement_title_eps) {
        my $tmp_ep;
        my $tmp_ep_num;
        my $tmp_ep_num_text = '';
        # Handle potential undef episode value, as the empty string
        # was used in place of an undef episode during concatenation
        # in the replacement hash
        if (not defined $prog->{'_episode'}) {
            $tmp_ep = '';
        }
        # Also handle an episode number that may be present in source
        # data but not in replacement text
        elsif ($prog->{'_episode'} =~ m/^\d+\s*(?:&\d+)?\s*\/\s*\d+\s*(?:\.|\/|,|;|:)?\s*-?\s*(?:series\s*(\w+)\s*(?:;|\.)?)?\s*$/) {
            $tmp_ep = '';
            $tmp_ep_num = $prog->{'_episode'};
            $tmp_ep_num_text = " (Preserving existing numbering)";
        }
        else {
            $tmp_ep = $prog->{'_episode'};
        }
        my $key = "" . $prog->{'_title'} . "|" . $tmp_ep;
        # Check whether we have matched the old programme title/episode combo
        if (defined $replacement_title_eps{$key}) {
            # Now replace the old title/ep values with new ones
            my ($old_title, $old_ep) = ($prog->{'_title'}, $tmp_ep);
            my ($new_title, $new_ep) = @{$replacement_title_eps{$key}};
            # update the title
            $prog->{'_title'} = $new_title;
            # if new episode value is empty string, replace with undef;
            # otherwise use new value
            if ($new_ep eq '') {
                if (defined $tmp_ep_num) {
                    $prog->{'_episode'} = $tmp_ep_num;
                }
                else {
                    $prog->{'_episode'} = undef;
                }
            }
            else {
                if (defined $tmp_ep_num) {
                    $prog->{'_episode'} = $tmp_ep_num . ": " . $new_ep;
                }
                else {
                    $prog->{'_episode'} = $new_ep;
                }
            }
            t("  Replaced old title/ep '" . $old_title . ": " . $old_ep
                . "' with new title/ep '" . $new_title . ": " . $new_ep
                . "' for consistency" . $tmp_ep_num_text);
            $prog->{'_titles_processed'} = 1;
        }
    }
}

# Replace an inconsistent or missing episode subtitle based a given description.
# The description should therefore be unique for each episode of the programme.
# The %replacement_ep_from_desc data structure is a hash of hashes.
#
sub process_replacement_ep_from_desc($) {
    my $prog = shift;
    
    if ($have_title_data && %replacement_ep_from_desc && defined $prog->{'_desc'}) {
        my $bad_episode_title = $prog->{'_title'};
        my $bad_ep_desc = $prog->{'_desc'};
        # First, check whether we have matched the programme title
        if (defined $replacement_ep_from_desc{$bad_episode_title}) {
            # Now look for a specific desc match for the title
            if (defined $replacement_ep_from_desc{$bad_episode_title}->{$bad_ep_desc}) {
                my $old_ep;
                (defined $prog->{'_episode'}) ? ($old_ep = $prog->{'_episode'}) : ($old_ep = '');
                $prog->{'_episode'} = $replacement_ep_from_desc{$bad_episode_title}->{$bad_ep_desc};
                t("  Updated episode from '" . $old_ep . "' to '" . $prog->{'_episode'} 
                    . "' for title '" . $bad_episode_title . "', based on desc '");
                $prog->{'_subtitles_processed'} = 1;
            }
        }
    }
}

# Process programmes that may not be categorised, or are categorised with 
# various categories in the source data. Untitled programmes ("To Be Announced")
# and films are ignored. Different programmes having identical titles should not
# be replaced using this routine as it may cause such programmes to be given 
# inaccurate genres.
#
sub process_replacement_genres($) {
    my $prog = shift;

    if ($have_title_data && %replacement_cats && $prog->{'_title'} !~ m/^(To Be Announced|TBA)/i && !$prog->{'_film'}) {
        if (defined $replacement_cats{$prog->{'_title'}}) {
            $prog->{'_genre'} = $replacement_cats{$prog->{'_title'}};
            t("  Assigned title '" . $prog->{'_title'} . "' to category '" . $prog->{'_genre'} . "'");
        }
    }
}

# Extract series/episode numbering found in $prog->{'_sub_title'}. Series 
# and episode numbering are parsed out of the text and eventually made 
# available in the <episode-num> element, being stored in intermediary
# variables during processing.
#
sub extract_numbering_from_sub_title($) {
    my $prog = shift;

    if (defined $prog->{'_sub_title'}
             && $prog->{'_sub_title'} =~ m/\d+|series|episode/i) {
        
        # ) check for "x/y" format covering following formats
        #
        # "1"
        # "1/6"
        # "1/6, series 1"
        # "1, series 1"
        # "1&2/6, series 1" - second episode unused
        # "1&2"
        # "1 and 2/6, series 1"
        # "1A/6, series 1" - episode "part A or B" currently unused
        #            
        if ($prog->{'_sub_title'} =~
            s{
              ^                       # start at beginning of sub_title details
              (\d+)                   # CAPTURE the first number(s) found ($episode_num)
              (?:A|B)?                # ignore optional episode "part"
              \s*                     # ignore any whitespace
              (?:(?:&|and)\s*\d+)?    # check for "&2", "and 2" details relating to following episode
              \s*                     # ignore any whitespace
              \/?                     # forward slash
              \s*                     # ignore any whitespace
              (\d+)?                  # CAPTURE the second number(s) found ($num_episodes)
              \s*                     # ignore any whitespace
              (?:,)?                  # check for punctuation characters
              \s*                     # ignore any whitespace
              (?:series\s*(\d+))?     # check for series number information ($series_num)
              \s*                     # ignore any whitespace
              $                       # stop at end of sub_title details
            }
            {}ix ) {
                $prog->{'_episode_num'} = $1 - 1;
                # Check that source episode number is not greater than number of episodes
                # Rather than discard the episode number, we discard the total instead which
                # is more likely to be incorrect based on observation.
                if (defined $2) {
                    if ($1 <= $2) {
                        $prog->{'_num_episodes'} = $2;
                        t("    Episode number/total found: episode $1 of $2 (subtitle, x/y)");
                    }
                    else {
                        t("    Bad episode total found: episode $1 of $2, discarding total (subtitle, x/y)");
                    }
                }
                else {
                    t("    Episode number found: episode $1 (subtitle, x/y)");
                }
                if (defined $3) {
                    t("    Series number found: series $3 (subtitle, x/y)");
                    $prog->{'_series_num'} = $3 - 1;
                }
                $prog->{'_processed'} = 1;
        }
        
        # ) check for special case of "x/y/z, series n" format where two parts of a series have
        # been edited into a single programme for transmission. Only the first episode number
        # given is output in episode-num
        #
        # "1/2/6, series 1"
        #            
        elsif ($prog->{'_sub_title'} =~
            s{
              ^                       # start at beginning of sub_title details
              (\d+)                   # CAPTURE the first number(s) found ($episode_num)
              \s*                     # ignore any whitespace
              \/                      # forward slash
              \s*                     # ignore any whitespace
              (\d+)                   # CAPTURE the second number(s) found (unused at present)
              \s*                     # ignore any whitespace
              \/                      # forward slash
              \s*                     # ignore any whitespace
              (\d+)                   # CAPTURE the third number(s) found ($num_episodes)
              \s*                     # ignore any whitespace
              ,                       # check for punctuation characters
              \s*                     # ignore any whitespace
              series\s*(\d+)          # check for series number information ($series_num)
              \s*                     # ignore any whitespace
              $                       # stop at end of sub_title details
            }
            {}ix ) {
                $prog->{'_episode_num'} = $1 - 1;
                # Check that source episode number is not greater than number of episodes
                # Rather than discard the episode number, we discard the total instead which
                # is more likely to be incorrect based on observation.
                if (defined $3) {
                    if ($1 <= $3) {
                        $prog->{'_num_episodes'} = $3;
                        t("    Episode number/total found: episode $1 of $3 (subtitle, x/y/z. series n)");
                    }
                    else {
                        t("    Bad episode total found: episode $1 of $3, discarding total (subtitle, x/y/z, series n)");
                    }
                }
                if (defined $4) {
                    t("    Series number found: series $4 (subtitle, x/y/z, series n)");
                    $prog->{'_series_num'} = $4 - 1;
                }
                $prog->{'_processed'} = 1;
        }
        
        # ) check for "Series x" format covering following formats:
        #
        # "Series 1"
        #
        elsif ($prog->{'_sub_title'} =~
            s{
              ^                      # start at beginning of sub_title details
              (?:Series)             # ignore "Series" text
              \s*                    # ignore any whitespace
              (\d+)                  # CAPTURE the first number(s) found ($series_num)
              $                      # finish at end of sub_title details
            }
            {}ix ) {
                if (defined $1) {
                    t("    Series number found: series $1 (subtitle, series x)");
                    $prog->{'_series_num'} = $1 - 1;
                }
                $prog->{'_processed'} = 1;
        }
        
        # ) check for "Series  " format where series number is missing. Here
        # we remove the text from the sub_title field
        #
        elsif ($prog->{'_sub_title'} =~
            s{
              ^                      # start at beginning of sub_title details
              Series                 # "Series" text
              \s*                    # ignore any whitespace
              $                      # finish at end of sub_title details
            }
            {}ix ) {
                t("    Missing series number found (subtitle, series)");
        }

        # ) check for "Episode x" format covering following formats:
        #
        # "Episode 1"
        # "Episode one"
        #
        elsif ($prog->{'_sub_title'} =~
            s{
              ^                      # start at beginning of sub_title details
              (?:Episode|Ep|Epiosde) # ignore "Episode" text
              \s*                    # ignore any whitespace
              (\w+|\d+)              # CAPTURE the first number(s) found ($episode_num)
              $                      # finish at end of sub_title details
            }
            {}ix ) {
                my $digits = word_to_digit($1);
                if (defined $digits and $digits > 0) {
                    t("    Episode number found: episode $digits (parsed as $1, subtitle, episode x)");
                    $prog->{'_episode_num'} = $digits - 1;
                }
                $prog->{'_processed'} = 1;
        }
    }
}

# Extract series/episode numbering found in $prog->{'_episode'}. Series 
# and episode numbering are parsed out of the text and eventually made 
# available in the <episode-num> element, being stored in intermediary
# variables during processing as when parsing the $prog->{'_sub_title'}.
# With most numbering being parsed out of $prog->{'_sub_title'} directly
# from the source data, this routine will extract most numbering inserted
# through the title/episode update/consistency routines.
#
sub extract_numbering_from_episode($) {
    my $prog = shift;

    if (defined $prog->{'_episode'}
             && $prog->{'_episode'} =~ m/\d+|episode/i) {

        # ) check for "x/y" format covering following formats
        #
        # "1/6 - ..."
        # "1/6, series 1 - ..."
        # "1, series 1 - ..."
        # "1/6, series one - "...
        # "1, series one - ..."
        #            
        if ($prog->{'_episode'} =~
            s{
              ^                       # start at beginning of episode details
              (\d+)                   # CAPTURE the first number(s) found ($episode_num)
              \s*                     # ignore any whitespace
              \/?                     # forward slash
              \s*                     # ignore any whitespace
              (\d+)?                  # CAPTURE the second number(s) found ($num_episodes)
              \s*                     # ignore any whitespace
              (?:,)?                  # check for punctuation characters
              \s*                     # ignore any whitespace
              (?:series\s*(\w+|\d+))? # check for series number information ($series_num)
              \s*                     # ignore any whitespace
              (?:-)                   # hyphen to separate numbering from episode text
              \s*                     # ignore any whitespace
            }
            {}ix ) {
                $prog->{'_episode_num'} = $1 - 1;
                # Check that source episode number is not greater than number of episodes
                # Rather than discard the episode number, we discard the total instead which
                # is more likely to be incorrect based on observation.
                if (defined $2) {
                    if ($1 <= $2) {
                        $prog->{'_num_episodes'} = $2;
                        t("    Episode number/total found: episode $1 of $2 (subtitle, x/y)");
                    }
                    else {
                        t("    Bad episode total found: episode $1 of $2, discarding total (subtitle, x/y)");
                    }
                }
                else {
                    t("    Episode number found: episode $1 (episode, x/y)");
                }
                if (defined $3) {
                    my $digits = word_to_digit($3);
                    if (defined $digits and $digits > 0) {
                        t("    Series number found: series $digits (parsed as $3, episode, x/y)");
                        $prog->{'_series_num'} = $digits - 1;
                    }
                }
                $prog->{'_processed'} = 1;
        }

        # ) check for "Episode x" format covering following formats:
        #
        # "Episode 1"
        # "Episode one"
        #
        elsif ($prog->{'_episode'} =~
            s{
              ^                      # start at beginning of episode details
              (?:Episode|Ep)         # ignore "Episode" text
              \s*                    # ignore any whitespace
              (\w+|\d+)              # CAPTURE the first number(s) found ($episode_num)
              $                      # finish at end of episode details
            }
            {}ix ) {
                my $digits = word_to_digit($1);
                if (defined $digits and $digits > 0) {
                    t("    Episode number found: episode $digits (parsed as $1, episode, episode x)");
                    $prog->{'_episode_num'} = $digits - 1;
                }
                $prog->{'_processed'} = 1;
        }
    }
}

# Part numbering is parsed but unused. However, when part numbering is
# seen in the text it is processed to make its format consistent.
#
# FIXME should we export part number in <episode-num> and remove
# it from the text?
#
sub extract_part_numbering_from_episode($) {
    my $prog = shift;
    
    if (defined $prog->{'_episode'} 
             && $prog->{'_episode'} =~ m/Part|Pt|\d\s*$/i) {
        
        # this regex looks for part numbering in parentheses
        #
        # "Dead Man's Eleven (Part 1)"
        # "Dead Man's Eleven - (Part 1)"
        # "Dead Man's Eleven - (Part 1/2)"
        # "Dead Man's Eleven (Pt 1)"
        # "Dead Man's Eleven - (Pt. 1)"
        # "Dead Man's Eleven - (Pt. 1/2)"
        if ($prog->{'_episode'} =~
            m{
              ^                      # start at beginning of episode details
              (.*\b[\x21-\x2F\x3F]?) # CAPTURE the episode details before part numbering
              \s*                    # ignore any whitespace
              (?:,|;|:|-)?           # check for optional punctuation characters
              \s*                    # ignore any whitespace
              (?:\()                 # opening paren
              (?:Part|Pt(?:\.)?)     # check for Part/Pt text
              \s*                    # ignore any whitespace
              (\d+)                  # CAPTURE part number
              \s*                    # ignore any whitespace
              (?:\/\s*\d+)?          # ignore any total part number
              (?:\))                 # closing paren
              $                      # finish at end of episode details
             }ix )
        {
                    t("    Part number found: part $2 (regex #1)");
                    $prog->{'_episode'} = $1 . " (Part " . $2 . ")";
                    $prog->{'_part_num'} = $2 - 1;
                    $prog->{'_processed'} = 1;
        }
        
        # this regex looks for part numbering with no other episode information
        #
        # "Part 1"
        # "Part 1/3"
        # "Pt 2"
        # "Pt 2/3"
        # "Pt. 3"
        elsif ($prog->{'_episode'} =~
            m{
              ^                    # start at beginning of episode details
              (?:Part|Pt(?:\.)?)   # check for Part/Pt text
              \s*                  # ignore any whitespace
              (\d+)                # CAPTURE part number
              \s*                  # ignore any whitespace
              (?:\/\s*\d+)?        # ignore any total part number
              $                    # finish at end of episode details
             }ix )
        {
                    t("    Part number found: part $1 (regex #2)");
                    $prog->{'_episode'} = "Part " . $1;
                    $prog->{'_part_num'} = $1 - 1;
                    $prog->{'_processed'} = 1;
        }
        
        # this regex looks for bare part numbering after a comma, semicolon, 
        # colon or hyphen
        #
        # "Dead Man's Eleven - Part 1"
        # "Dead Man's Eleven: Part 1"
        # "Dead Man's Eleven; Pt 1"
        # "Dead Man's Eleven, Pt. 1"
        elsif ($prog->{'_episode'} =~
            m{
              ^                      # start at beginning of episode details
              (.*\b[\x21-\x2F\x3F]?) # CAPTURE the episode details before part numbering
              \s*                    # ignore any whitespace
              (?:,|;|:|-)            # punctuation characters
              \s*                    # ignore any whitespace
              (?:Part|Pt(?:\.)?)     # check for Part/Pt text
              \s*                    # ignore any whitespace
              (\d+)                  # CAPTURE part number
              \s*                    # ignore any whitespace
              (?:\/\s*\d+)?          # ignore any total part number
              $                      # finish at end of episode details
             }ix )
        {
                    t("    Part number found: part $2 (regex #3)");
                    $prog->{'_episode'} = $1 . " (Part " . $2 . ")";
                    $prog->{'_part_num'} = $2 - 1;
                    $prog->{'_processed'} = 1;
        }
        
        # this regex looks for part numbering immediately following episode info
        #
        # "Dead Man's Eleven Part 1"
        # "Dead Man's Eleven Pt 1"
        # "Dead Man's Eleven Pt 1/2"
        # "Dead Man's Eleven Pt. 1"
        elsif ($prog->{'_episode'} =~
            m{
              ^                      # start at beginning of episode details
              (.*\b[\x21-\x2F\x3F]?) # CAPTURE the episode details before part numbering
              \s*                    # ignore any whitespace
              (?:Part|Pt(?:\.)?)     # check for Part/Pt text
              \s*                    # ignore any whitespace
              (\d+)                  # CAPTURE part number
              \s*                    # ignore any whitespace
              (?:\/\s*\d+)?          # ignore any total part number
              $                      # finish at end of episode details
             }ix )
        {
                    t("    Part number found: part $2 (regex #4)");
                    $prog->{'_episode'} = $1 . " (Part " . $2 . ")";
                    $prog->{'_part_num'} = $2 - 1;
                    $prog->{'_processed'} = 1;
        }
        
        # this regex looks for a digit (conservatively between 1 and 6) following
        # the episode details, a colon and at least one space
        #
        # "Dead Man's Eleven: 1"
        elsif ($prog->{'_episode'} =~
            m{
              ^                    # start at beginning of episode details
              (.*)                 # CAPTURE the episode details before part numbering
              \s*                  # ignore any whitespace
              (?::)                # colon
              \s+                  # ignore any whitespace - min 1 space
              (\d{1})              # CAPTURE single digit part number between 1 and 6
              $                    # finish at end of episode details
             }ix )
        {
                    if ($2 ge 1 && $2 le 6) {
                        t("    Part number found: part $2 (regex #5, range 1-6)");
                        $prog->{'_episode'} = $1 . " (Part " . $2 . ")";
                        $prog->{'_part_num'} = $2 - 1;
                        $prog->{'_processed'} = 1;
                    }
        }
        
        # this regex looks for worded part numbering with no other episode information
        #
        # "Part One"
        # "Pt Two"
        # "Pt. Three"
        elsif ($prog->{'_episode'} =~
            m{
              ^                    # start at beginning of episode details
              (?:Part|Pt(?:\.)?)   # check for Part/Pt text
              \s+                  # ignore any whitespace
              (\w+)                # CAPTURE part number wording
              $                    # finish at end of episode details
             }ix )
        {
                    my $part_digits = word_to_digit($1);
                    if (defined $part_digits and $part_digits > 0) {
                        t("    Part number found: part $part_digits (regex #6, parsed as $1)");
                        $prog->{'_episode'} = "Part " . $part_digits;
                        $prog->{'_part_num'} = $part_digits - 1;
                        $prog->{'_processed'} = 1;
                    }
        }
        
        # this regex looks for bare part numbering after a comma, semicolon, 
        # colon or hyphen, where the numbering is given in words
        #
        # "Dead Man's Eleven - Part One"
        # "Dead Man's Eleven: Part One"
        # "Dead Man's Eleven; Pt One"
        # "Dead Man's Eleven, Pt. One"
        elsif ($prog->{'_episode'} =~
            m{
              ^                      # start at beginning of episode details
              (.*\b[\x21-\x2F\x3F]?) # CAPTURE the episode details before part numbering
              \s*                    # ignore any whitespace
              (?:,|;|:|-)            # punctuation characters
              \s*                    # ignore any whitespace
              (?:Part|Pt(?:\.)?)     # check for Part/Pt text
              \s+                    # ignore any whitespace
              (\w+)                  # CAPTURE part number wording
              $                      # finish at end of episode details
             }ix )
        {
                    my $part_digits = word_to_digit($2);
                    if (defined $part_digits and $part_digits > 0) {
                        t("    Part number found: part $part_digits (regex #7, parsed as $2)");
                        $prog->{'_episode'} = $1 . " (Part " . $part_digits . ")";
                        $prog->{'_part_num'} = $part_digits - 1;
                        $prog->{'_processed'} = 1;
                    }
        }
        
        # this regex looks for worded part numbering immediately following episode info
        #
        # "Dead Man's Eleven Part One"
        # "Dead Man's Eleven Pt One"
        # "Dead Man's Eleven Pt. One"
        elsif ($prog->{'_episode'} =~
            m{
              ^                      # start at beginning of episode details
              (.*\b[\x21-\x2F\x3F]?) # CAPTURE the episode details before part numbering
              \s*                    # ignore any whitespace
              (?:Part|Pt(?:\.)?)     # check for Part/Pt text
              \s*                    # ignore any whitespace
              (\w+)                  # CAPTURE part number wording
              $                      # finish at end of episode details
             }ix )
        {
                    my $part_digits = word_to_digit($2);
                    if (defined $part_digits and $part_digits > 0) {
                        t("    Part number found: part $part_digits (regex #8, parsed as $2)");
                        $prog->{'_episode'} = $1 . " (Part " . $part_digits . ")";
                        $prog->{'_part_num'} = $part_digits - 1;
                        $prog->{'_processed'} = 1;
                    }
        }
        
        # this regex looks for worded part numbering in parentheses
        #
        # "Dead Man's Eleven (Part One)"
        # "Dead Man's Eleven - (Part One)"
        # "Dead Man's Eleven (Pt One)"
        # "Dead Man's Eleven - (Pt. One)"
        elsif ($prog->{'_episode'} =~
            m{
              ^                      # start at beginning of episode details
              (.*\b[\x21-\x2F\x3F]?) # CAPTURE the episode details before part numbering
              \s*                    # ignore any whitespace
              (?:,|;|:|-)?           # check for optional punctuation characters
              \s*                    # ignore any whitespace
              (?:\()                 # opening paren
              (?:Part|Pt(?:\.)?)     # check for Part/Pt text
              \s*                    # ignore any whitespace
              (\w+)                  # CAPTURE part number wording
              \s*                    # ignore any whitespace
              (?:\))                 # closing paren
              $                      # finish at end of episode details
             }ix )
        {
                    my $part_digits = word_to_digit($2);
                    if (defined $part_digits and $part_digits > 0) {
                        t("    Part number found: part $part_digits (regex #9, parsed as $2)");
                        $prog->{'_episode'} = $1 . " (Part " . $part_digits . ")";
                        $prog->{'_part_num'} = $part_digits - 1;
                        $prog->{'_processed'} = 1;
                    }
        }
        
        # check for potential part numbering left unprocessed
        #
        # we do this at the end of the if-else because the (Part x) text is
        # not (yet) removed from the episode details, only made consistent
        elsif ($opt->{debug} && $prog->{'_episode'} =~ m/\b(Part|Pt(\.)?)(\d+|\s+\w+)/i) {
            t("    Possible part numbering still seen: " . $prog->{'_episode'});
            $possible_part_nums{$prog->{'_episode'}} = $prog->{'_episode'};
        }
    }
}

# Check for potential season numbering in title
#
sub extract_numbering_from_title($) {
    my $prog = shift;

    if ($prog->{'_title'} =~ m/Series|Season/i) {
        
        # this regex looks for season numbering in title with
        # in parentheses
        #
        # "Wheeler Dealers - (Series 1)"
        # "Wheeler Dealers (Season 1)"
        if ($prog->{'_title'} =~
            m{
              ^                      # start at beginning of title details
              (.*\b[\x21-\x2F\x3F]?) # CAPTURE the title details before season numbering
              \s*                    # ignore any whitespace
              (?:,|;|:|-)?           # check for optional punctuation characters
              \s*                    # ignore any whitespace
              (?:\()                 # opening paren
              (?:Series|Season)      # check for Part/Pt text
              \s*                    # ignore any whitespace
              (\d+)                  # CAPTURE season number
              (?:,)?                 # ignore comma if present
              \s*                    # ignore any whitespace
              (\d+)?                 # CAPTURE episode number if present
              \s*                    # ignore any whitespace
              (?:\))                 # closing paren
              $                      # finish at end of title details
             }ix )
        {
                    $prog->{'_title'} = $1;
                    if (defined $prog->{'_series_num'} && $prog->{'_series_num'} != $2) {
                        t("    Season number (" . $prog->{'_series_num'} . ") already defined. "
                            . "Ignoring different season number (" . $2 . ") in title.");
                    }
                    else {
                        t("    Season number found: Season $2 (title regex)");
                        $prog->{'_series_num'} = $2 - 1;
                    }
                    $prog->{'_episode_num'} = $3 - 1 if $3;
                    $prog->{'_processed'} = 1;
        }
        
        # this regex looks for season numbering in title without
        # parentheses
        #
        # "Wheeler Dealers Series 1"
        # "Wheeler Dealers Series 1, 3"
        elsif ($prog->{'_title'} =~
            m{
              ^                      # start at beginning of title details
              (.*\b[\x21-\x2F\x3F]?) # CAPTURE the title details before season numbering
              \s*                    # ignore any whitespace
              (?:,|;|:|-)?           # check for optional punctuation characters
              \s*                    # ignore any whitespace
              (?:Season|Series)      # check for Part/Pt text
              \s*                    # ignore any whitespace
              (\d+)                  # CAPTURE season number
              (?:,)?                 # ignore comma if present
              \s*                    # ignore any whitespace
              (\d+)?                 # CAPTURE episode number if present
              \s*                    # ignore any whitespace
              $                      # finish at end of title details
             }ix )
        {
                    $prog->{'_title'} = $1;
                    if (defined $prog->{'_series_num'} && $prog->{'_series_num'} != $2) {
                        t("    Season number (" . $prog->{'_series_num'} . ") already defined. "
                            . "Ignoring different season number (" . $2 . ") in title.");
                    }
                    else {
                        t("    Season number found: Season $2 (title regex)");
                        $prog->{'_series_num'} = $2 - 1;
                    }
                    $prog->{'_episode_num'} = $3 - 1 if $3;
                    $prog->{'_processed'} = 1;
        }
    }
}

# Check for potential season/episode numbering in description
#
sub extract_numbering_from_desc($) {
    my $prog = shift;
    
    # Extract episode and series info from start of description
    # "1/6. ..."
    # "1/6; series one. ..."
    if ($prog->{'_desc'} =~
        s{
          ^                    # start at beginning of episode details
          (\d+)                # CAPTURE the first number(s) found ($episode_num)
          \s*                  # ignore any whitespace
          (?:&\d+)?            # check for "&2" details relating to following episode
          \s*                  # ignore any whitespace
          \/                   # forward slash
          \s*                  # ignore any whitespace
          (\d+)                # CAPTURE the second number(s) found ($num_episodes)
          \s*                  # ignore any whitespace
          (?:\.|;)?            # check for punctuation characters
          \s*                  # ignore any whitespace
          (?:series\s+(\w+)\s*\.)? 
                               # check for series number information ($series_num)
          \s*                  # ignore any whitespace
        }
        {}ix ) {
            $prog->{'_episode_num'} = $1 - 1;
            $prog->{'_num_episodes'} = $2;
            t("    Episode number found: episode $1 of $2 (desc)");
            if (defined $3) {
                my $series_digits = word_to_digit($3);
                if (defined $series_digits and $series_digits > 0) {
                    t("    Series number found: series $series_digits (parsed as $3 from desc)");
                    $prog->{'_series_num'} = $series_digits - 1;
                    $prog->{'_processed'} = 1;
                }
            }
    }
}

# Download listings data for configured channels that are available
sub write_listings_data {
    my ( $available_channels, $wanted_chs ) = @_;
    
    my $num_req_chans = scalar @{$wanted_chs};

    if (!$opt->{quiet}) {
        display_copyright();
    }

    if (!$opt->{quiet}) {
        say("Will download listings for $num_req_chans configured channels\n");
    }

    my $listings_bar;
    if (!$opt->{quiet} && !$opt->{debug}) {
        $listings_bar = new XMLTV::ProgressBar({name  => 'Retrieving listings',
                                                count => $num_req_chans,
                                                ETA   => 'linear', });
    }

    # Was title processing enabled in config file?
    if ($title_processing eq 'enabled') {
        t("Extra title processing is enabled\n");
        load_prog_titles_to_process();
    }
    else {
        t("Extra title processing is disabled\n");
    }

    # Load the UTF-8 fixups
    load_utf8_fixups();

    # Hash to hold warnings of incorrect number of fields. The warning
    # is given once per listings file if noticed more than once
    my %warned_wrong_num_fields;

    # Reset check for final progress bar update
    $need_final_update = 0;

    # Process all of the channels we want listings for
    WANTED_CH:
    foreach my $ch (@{$wanted_chs}) {
        my $c = ${$available_channels}{$ch};
        my $xmltv_id = $c->{id};
        my $rt_id = $c->{rt_id};
        my $rt_name = $c->{'display-name'}->[0]->[0];
        if (!defined $rt_id) {
            t("No Radio Times ID for channel '$rt_name', skipping");
            next WANTED_CH;
        }

        # Create the channel's URL based on ID
        my $rt_listings_uri = "$rt_root_dir/$rt_id.dat";
        # Include the URL in any warn/die messages
        local $SIG{__DIE__} = sub { die "$rt_listings_uri: $_[0]" };
        local $SIG{__WARN__} = sub { warn "$rt_listings_uri: $_[0]" };

        # Read in the listings data for the channel as UTF-8 octets. We will 
        # process the raw octets before decoding them to Perl's internal
        # format below.
        t("\nRetrieving listings for '$rt_name'");
        my $page = get_octets( $rt_listings_uri );

        if (!defined $page || $page eq '') {
            if (!$opt->{quiet}) {
                say("No listings data available for '$rt_name' ($xmltv_id), skipping");
            }
            $warnings++;
            next WANTED_CH;
        }
        if (!$opt->{quiet}) {
            say("Processing listings for '$rt_name' ($xmltv_id)");
        }
        t("");
        if (defined $channel_offset{$xmltv_id}) {
            t("  Detected a channel offset of '$channel_offset{$xmltv_id}'" 
              . " for '$rt_name'");
        }

        # If the Radio Times name for the channel contains timezone information,
        # use it, otherwise set the timezone to default of UTC
        my $base_tz;
        if ($rt_name =~ m/\((UTC|GMT|CET)\)\s*$/) {
            $base_tz = $1;
            t("  Base timezone for utc_offset set to '$base_tz' (via channel name)\n");
        }
        else {
            $base_tz = 'UTC';
            t("  Base timezone for utc_offset set to 'UTC' (default)\n");
        }

        # correct UTF-8 errors in source data
        $page = process_utf8_fixups($page, $rt_name, $rt_listings_uri);

        # Decode source UTF-8 octets, process for HTML entities, and encode 
        # into configured output encoding
        my $decoded_page;
        t("\nDecoding listings data from $source_encoding octets into Perl's internal format");
        $decoded_page = decode($source_encoding, $page);
        t("Processing for HTML entities seen in the listings data");
        decode_entities($decoded_page);
        t("Encoding listings data from Perl's internal format into $xml_encoding octets\n");
        $page = encode($xml_encoding, $decoded_page);

        ##### From this point, $data is in octets #####

        # Start to process individual programme entries found in listings
        t("  Started writing <programme> elements for channel '$rt_name'\n");

        # list to store programme elements for writing when each channel is parsed
        my @programmes = ();

        # Track number of programmes per channel
        my $num_titles = 0;

        # Keep a reference to the previous programme that was processed. Set it
        # to undef initially as programmes are processed in transmission order.
        # We update it whenever a programme is successfully processed below.
        my $prev_prog = undef;

        PROGRAMME:
        foreach my $prog (split /\n/, $page) {

            # ignore empty line and disclaimer at start of each file
            if ($prog =~ m/^\s*$/ || $prog =~ m/^In accessing this XML feed/) {
                next PROGRAMME;
            }

            my @fields = split /\~/, $prog;
            if (scalar @fields != 23) {
                if ($warned_wrong_num_fields{$ch}++) {
                    t("  Wrong number of fields in line:\n  $prog\n");
                }
                next PROGRAMME;
            }
            # Remove any spaces at start/end of fields
            foreach my $field (@fields) {
                $field =~ s/^\s+//;
                $field =~ s/\s+$//;
                undef $field if !length $field;
            }
            
            # Description of Radio Times data fields (23 in total):
            #
            #  1            title - the programme title (text)
            #  2        sub_title - used to carry series/episode numbering (text)
            #  3          episode - used to carry the name/subtitle of an episode of the 
            #                       programme (text)
            #  4             year - the year of production (text)
            #  5         director - the programme's director(s) (text)
            #  6             cast - the programme's cast (may include character details) (text)
            #  7         premiere - whether this is a film's first showing (boolean)
            #  8             film - whether the programme is a film (boolean)
            #  9           repeat - whether the programme has been shown before (boolean)
            # 10        subtitles - whether subtitles are available (boolean)
            # 11       widescreen - whether the broadcast is 16:9 widescreen (boolean)
            # 12       new_series - whether the programme is the first episode in a 
            #                       series new (boolean)
            # 13      deaf_signed - whether in-vision signing is available (boolean)
            # 14  blank_and_white - whether the broadcast is not in colour (boolean)
            # 15      star_rating - a star rating between 0 and 5 for films (text)
            # 16      certificate - the BBFC certificate for the programme (text)
            # 17            genre - the genre of the programme (text)
            # 18             desc - a description of the programme. Can be a specific review 
            #                       by a Radio Times reviewer (text)
            # 19           choice - whether the programme is recommended by the 
            #                       Radio Times (boolean)
            # 20             date - the transmission date (text)
            # 21            start - the transmission start time for the programme (text)
            # 22             stop - the transmissions stop time for the programme (text)
            # 23    duration_mins - the duration of the programme in minutes (text)
            #

            # Hash to store all programme-specific variables. Initially store
            # the channel ID.
            my %prog = (channel => $ch);

            # Store fields against temp keys. We will assign values to the XMLTV
            # specific keys during processing. Key names starting with "_" are
            # ignored by XMLTV::Writer.
            ( $prog{'_title'},       $prog{'_sub_title'},       $prog{'_episode'},
              $prog{'_year'},        $prog{'_director'},        $prog{'_cast'},
              $prog{'_premiere'},    $prog{'_film'},            $prog{'_repeat'},
              $prog{'_subtitles'},   $prog{'_widescreen'},      $prog{'_new_series'},
              $prog{'_deaf_signed'}, $prog{'_black_and_white'}, $prog{'_star_rating'},
              $prog{'_certificate'}, $prog{'_genre'},           $prog{'_desc'},
              $prog{'_choice'},      $prog{'_date'},            $prog{'_start'},
              $prog{'_stop'},        $prog{'_duration_mins'},
            ) = @fields;

#FIXME need to update validity checks for hash keys/values

            if (!defined $prog{'_title'}) {
                t("  Missing title in entry '$prog', skipping");
                next PROGRAMME;
            }
            t("  Processing programme title '" . $prog{'_title'} . "'");

            if (!defined $prog{'_date'}) {
                t("  Missing date in entry '$prog', skipping");
                next PROGRAMME;
            }

            # Check the true/false fields for valid data
            foreach my $field ($prog{'_premiere'},    $prog{'_film'},
                               $prog{'_repeat'},      $prog{'_subtitles'},
                               $prog{'_widescreen'},  $prog{'_new_series'},
                               $prog{'_deaf_signed'}, $prog{'_black_and_white'},
                               $prog{'_choice'}, ) {
                if (!defined $field) {
                    t("  A required true/false value was undefined for '" . $prog{'_title'} . "', skipping");
                    next PROGRAMME;
                }
                elsif ($field eq 'true') {
                    $field = 1;
                }
                elsif ($field eq 'false') {
                    $field = 0;
                }
                else {
                    t("  A bad true/false value '$field' was seen for '" . $prog{'_title'} . "', skipping");
                    next PROGRAMME;
                }
            }

            # Check for any DST-related information the RT may include in the title
            # for a programme. If we find any explicit DST information we store it 
            # for use later and remove it from the title.
            if ($prog{'_title'} =~ s/^\((GMT|UTC|BST|UTC\+1)\)\s*//) {
                $prog{'_explicit_tz'} = $1;
            }

            # Remove any last-minute scheduling info inserted into regular 
            # description that will affect later regexes.
            if (defined $prog{'_desc'}) {
                $prog{'_desc'} =~ s/\s+/ /g;
                if ($prog{'_desc'} =~ s/\s?(?:UPDATED|UPADTED)\s+LISTING\s?(?:-|:|@)\s?(.*)$//i) {
                    $prog{'_updated_listing_info'} = $1;
                    t("  Removed updated listing information:\n"
                      . "    '" . $1 . "'");
                }
            }
            
            # Episode/series numbering is provided in the sub_title field in
            # the source data, which is parsed out if seen.
            # Retain episode $sub_title data if $episode contains only episode
            # numbering.
            if (defined $prog{'_sub_title'}) {
                extract_numbering_from_sub_title(\%prog);
                # sub_title should be empty after successful parsing
                if ($prog{'_sub_title'} eq '') {
                    $prog{'_sub_title'} = undef;
                }
                # text left in sub_title is most likely _episode info, so
                # move it to _episode
                else {
                    if (!defined $prog{'_episode'}) {
                        t("  Using sub-title '" . $prog{'_sub_title'} . "' as episode not given");
                        $prog{'_episode'} = $prog{'_sub_title'};
                        $prog{'_sub_title'} = undef;
                    }
                    else {
                        t("  Merging episode '" . $prog{'_episode'} . "' with sub_title '" . $prog{'_sub_title'} . "'");
                        $prog{'_episode'} = $prog{'_episode'} . ": " . $prog{'_sub_title'};
                        $prog{'_sub_title'} = undef;
                    }
                }

                # at this point, $prog{'_sub_title'} should be undefined with all 
                # text either parsed out or moved into $prog{'_episode'}
            }

            # Remove production year information from $episode for films
            if (defined $prog{'_episode'}) {
                if ($prog{'_film'} && $prog{'_episode'} =~ s/Prod Year (\d{4})//i) {
                    t("  Removed production year info from episode details");
                    $prog{'_episode'} = undef;
                    if (!defined $prog{'_year'}) {
                        $prog{'_year'} = $1;
                    }
                }
            }

            # Remove vertical bar/colon from end of title string. The v-bar
            # can interfere with title fixups (fixup file uses the v-bar as
            # as delimiter
            if ($prog{'_title'} =~ s/([|:])$//) {
                t("  Removed '" . $1 . "' from end of title");
            }

            # Title and episode processing. We process titles if the user has 
            # not explicitly disabled title processing during configuration
            # and we have supplement data to process programmes against.
            #
            # We run a series of search and replace routines to clean up 
            # the title/episode information. Leaving non-title information in 
            # the title, or having inconsistent title/episode formatting  will 
            # result in PVR applications being unable to consistently match 
            # programme titles and episode information.
            #
            # Some listings appear to be added without being processed upstream 
            # to provide subtitle (episode) information. The titles of these
            # programmes are uppercase and may contain season numbering. Here
            # we monitor these before further title processing is carried out.
            if ($prog{'_title'} eq uc($prog{'_title'})) {
                $uc_prog_titles{$prog{'_title'}} = $prog{'_title'};
            }

            # Debug output before any title processing takes place
            if (defined $prog{'_episode'}) {
                t("  Pre-processing title/episode details '" 
                        . $prog{'_title'} . " / " . $prog{'_episode'} . "'");
            }
            
            # 2010-04-16
            # Listings for a few programmes are being provided that contain the
            # programme title duplicated in the episode field, followed by
            # the 'real' episode in parentheses or following a colon.
            if (defined $prog{'_episode'}) {
                my $tmp_title = $prog{'_title'};
                my $tmp_episode = $prog{'_episode'};
                my $key = $tmp_title . "|" . $tmp_episode;

                # Remove the duplicated title from episode field if we find it
                if ($tmp_episode =~ m/^\Q$tmp_title\E\s+\((.+)\)$/ || $tmp_episode =~ m/^\Q$tmp_title\E:\s+(.+)$/) {
                    $prog{'_episode'} = $1;
                    t("    Removing title text from beginning of episode field");
                    if ($opt->{debug}) {
                        $title_in_subtitle_fixed{$key} = { 'title'    => $tmp_title,
                                                           'episode'  => $tmp_episode,
                                                         };
                    }
                }
            }
            
            # First, remove non-title text found in programme title.
            #
            # Applied to all titles in the source data (type = 1)
            process_non_title_info(\%prog);
            
            # Track when titles/subtitles have been updated to allow 
            # short-circuiting of title processing
            $prog{'_titles_processed'} = 0;
            $prog{'_subtitles_processed'} = 0;
           
            # Next, process titles to make them consistent
            #
            # One-off title and episode replacements (type = 8)
            if (!$prog{'_titles_processed'}) {
                process_replacement_titles_episodes(\%prog);
            }
            # Look for $title:$episode in source title (type = 2)
            if (!$prog{'_titles_processed'}) {
                process_mixed_title_subtitle(\%prog);
            }
            # Look for $episode:$title in source title (type = 3)
            if (!$prog{'_titles_processed'}) {
                process_mixed_subtitle_title(\%prog);
            }
            # Look for reversed title and subtitle information (type = 4)
            if (!$prog{'_titles_processed'}) {
                process_reversed_title_subtitle(\%prog);
            }
            # Look for inconsistent programme titles (type = 5)
            #
            # This fixup is applied to all titles (processed or not) to handle
            # titles split out in fixups of types 2-4 above
            process_replacement_titles(\%prog);

            # Next, process subtitles to make them consistent
            #
            # Look for inconsistent programme subtitles (type = 7)
            if (!$prog{'_subtitles_processed'}) {
                process_replacement_episodes(\%prog);
            }
            # Replace subtitle based on description (type = 9)
            if (!$prog{'_subtitles_processed'}) {
                process_replacement_ep_from_desc(\%prog);
            }

            # Last, provide/update a programme's category based on 'corrected' title
            #
            # Applied to all 'corrected' titles (type = 6)
            process_replacement_genres(\%prog);

            # Look for series/episode/part numbering in programme title/subtitle
            extract_numbering_from_episode(\%prog);
            extract_numbering_from_title(\%prog);
            extract_part_numbering_from_episode(\%prog);

            # after processing see if $title contains "season" text that should
            # probably be removed
            if ($opt->{debug} && $prog{'_title'} =~ m/season/i) {
                t("    Title text contains \"Season\":  " . $prog{'_title'});
                $title_text_to_remove{$prog{'_title'}} = $prog{'_title'};
            }

            # after processing see if $episode contains "series" text
            if ($opt->{debug} && defined $prog{'_episode'} && $prog{'_episode'} =~ m/series/i) {
                t("    Possible series numbering still seen:  " . $prog{'_episode'});
                $possible_series_nums{$prog{'_episode'}} = $prog{'_episode'};
            }

            # check for potential episode numbering left unprocessed
            if ($opt->{debug} && defined $prog{'_episode'}
                    && ($prog{'_episode'} =~ m/^\d{1,2}\D/ || $prog{'_episode'} =~ m/\D\d{1,2}$/) 
                    && $prog{'_episode'} !~ m/(Part|Pt(\.)?)(\d+|\s+\w+)/) {
                t("    Possible episode numbering still seen: " . $prog{'_episode'});
                $possible_episode_nums{$prog{'_episode'}} = $prog{'_episode'};
            }

            # Set $episode to undefined if empty/whitespace
            if (defined $prog{'_episode'} && $prog{'_episode'} =~ m/^\s*$/) {
                $prog{'_episode'} = undef;
            }

            # output updated title/episode information after processing
            if ($prog{'_processed'}) {
                my $ep_out = "<UNDEF>";
                if (defined $prog{'_episode'}) {
                    $prog{'_episode'} =~ s/\s+/ /g;  # tidy whitespace
                    $ep_out = $prog{'_episode'};
                }
                t("  Post-processing title/episode details '" 
                        . $prog{'_title'} . " / " . $ep_out . "'");
            }

            # Monitor for case/punctuation-insensitive title variations
            if ($opt->{debug}) {
                my $title_nopunc = lc $prog{'_title'};
                $title_nopunc =~ s/^the\s+//;
                $title_nopunc =~ s/(\s+and\s+|\s+&\s+)/ /g;
                $title_nopunc =~ s/\s+No 1s$//g;
                $title_nopunc =~ s/\s+No 1's$//g;
                $title_nopunc =~ s/\s+Number Ones$//g;
                $title_nopunc =~ s/' //g;
                $title_nopunc =~ s/'s/s/g;
                $title_nopunc =~ s/\W//g;
                # count number of each variant by genre and channel name
                my $tmp_genre;
                $tmp_genre = $prog{'_genre'}; $tmp_genre = "No Genre" if not defined $tmp_genre;
                $case_insens_titles{$title_nopunc}{$prog{'_title'}}{$tmp_genre}{$rt_name}++;
                $case_insens_titles{$title_nopunc}{$prog{'_title'}}{'count'}++;
            }
            
            # Check for title text still present in episode details
            if ($opt->{debug} && defined $prog{'_episode'} && $prog{'_episode'} =~ m/^\Q$prog{'_title'}\E.*$/) {
                my $key = $prog{'_title'} . "|" . $prog{'_episode'};
                $title_in_subtitle_notfixed{$key} = { 'title'    => $prog{'_title'},
                                                      'episode'  => $prog{'_episode'},
                                                    };
            }

            # For non-films which may contain subtitle/episode information in
            # the title, add the title to the list of programme titles for
            # later debugging
            if (!$prog{'_film'}) {
                $prog_titles{$prog{'_title'}} = $prog{'_title'};
            }

            # Occasionally film listings contain the title duplicated in the 
            # $episode field, so we remove it here
            if ($prog{'_film'} && defined $prog{'_episode'} 
                               && (uc $prog{'_title'} eq uc $prog{'_episode'})) {
                $prog{'_episode'} = undef;
            }

            # Write out the programme's post-processed title 
            $prog{title} = [ [ $prog{'_title'} ] ];
            
            # Write out the programme's episode title ($episode) if present
            if (defined $prog{'_episode'} && $prog{'_episode'} !~ m/^\s*$/) {
                $prog{'sub-title'} = [ [ $prog{'_episode'} ] ];
            }

            # Check episode description if present
            if (defined $prog{'_desc'}) {
                $prog{'_desc'} =~ s/\s+/ /g;

                # Check if desc starts with "New series..."
                if ($prog{'_desc'} =~ m/^New series/i) {
                    # set the premiere flag
                    $prog{premiere} = [ '' ];
                    
                    # Now check if desc starts with "New series [(x/y)]. "
                    # Remove text and preserve numbering for processing below
                    if ($prog{'_desc'} =~ m/^New series(\s*\d+\/\d+\s*)?\.\s*/i) {
                        $prog{'_desc'} =~ s/^New series\s*//i;
                        $prog{'_desc'} =~ s/^\s*\.\s*//i;
                    }
                }
                
                # Look for series/episode numbering in programme description
                extract_numbering_from_desc(\%prog);

                # Finally write out the non-empty description
                if ($prog{'_desc'} !~ m/^\s*$/) {
                    $prog{desc} = [ [ $prog{'_desc'}, 'en' ] ];
                }
            }

            # Write out episode numbering information extracted from $episode 
            # and $desc fields. At this stage, numbering has been corrected
            # for zero-indexed counts but needs to be validated before output.
            #
            # series number is zero-indexed
            if (!defined $prog{'_series_num'} || $prog{'_series_num'} < 0) {
                $prog{'_series_num'} = '';
            }
            # episode number is zero-indexed
            if (!defined $prog{'_episode_num'} || $prog{'_episode_num'} < 0) {
                $prog{'_episode_num'} = '';
            }
            # episode total is one-indexed and should always be greater than the
            # max episode number (which is zero-indexed)
            if (defined $prog{'_num_episodes'} 
                        && $prog{'_num_episodes'} > 0 
                        && $prog{'_num_episodes'} > $prog{'_episode_num'} ) {
                $prog{'_num_episodes'} = "/" . $prog{'_num_episodes'};
            }
            else {
                $prog{'_num_episodes'} = '';
            }
            # Write out the details if we have the series and/or episode numbers
            if ($prog{'_series_num'} ne '' || $prog{'_episode_num'} ne '') {
                $prog{'episode-num'} = [ [ "" . $prog{'_series_num'} . "."
                                              . $prog{'_episode_num'} 
                                              . $prog{'_num_episodes'} . "."
                                              . "", "xmltv_ns" ] ];
            }

            if (defined $prog{'_director'} && $prog{'_director'} !~ m/^\s*$/) {
                $prog{credits}{director} = [ $prog{'_director'} ];
            }

            # The Radio Times data includes cast information in 2 formats:
            #
            # a) pairings of 'character*actor' with subsequent pairings 
            #    separated by '|' - '*' does not appear in any text
            # b) a comma separated list of actors with no character details
            #
            # If 'Director' appears in the character entry, this is to be used 
            # as a regular cast member, not the programme's director
            if (defined $prog{'_cast'}) {
                my @cast;
                $prog{'_cast'} =~ s/\s+/ /g; # remove extra spaces
                $prog{'_cast'} =~ s/\|\|/\|/g;  # remove empty pipe-separated fields
                $prog{'_cast'} =~ s/,,/,/g;  # remove empty comma-separated fields
                
                # First we check for 'character*actor' entries
                if ($prog{'_cast'} =~ tr/*//) {
                    # Multiple 'character*actor'entries
                    if ($prog{'_cast'} =~ tr/|//) {
                        @cast = split /\|/, $prog{'_cast'};
                    }
                    # Single 'character*actor' entry
                    else {
                        push @cast, $prog{'_cast'};
                    }
                    
                    # Now process the list of cast entries
                    foreach my $cast (@cast) {
                        # Replace any actor given as Himself/Herself with the
                        # character name given
                        if ($cast =~ m/^(.*)[*](Himself|Herself)$/) {
                            $cast = "$1*$1";
                        }
                        # Remove the 'character*' portion of the entry
                        if ($cast !~ s/^.*[*]//) {
                            t("  Bad cast entry for '" . $prog{'_title'} . "': " . $cast);
                        }
                    }
                }
                # Next we check for CSV-style actor entries
                elsif ($prog{'_cast'} =~ tr/,//) {
                    @cast = split /,/, $prog{'_cast'};
                }
                # Finally we assume a single actor's name that contains neither 
                # '*' nor ','
                else {
                    push @cast, $prog{'_cast'};
                }
                # Trim whitespace from beginning/end of actor names
                foreach my $cast (@cast) {
                    $cast =~ s/^\s+//;
                    $cast =~ s/\s+$//;
                }
                $prog{credits}{actor} = \@cast;
            }

            if (defined $prog{'_year'}) {
                $prog{date} = $prog{'_year'};
            }
            if ($prog{'_film'}) {
                push @{$prog{category}}, [ 'Film', 'en' ];
            }
            if (defined $prog{'_genre'} && !$prog{'_film'}) {
                push @{$prog{category}}, [ $prog{'_genre'}, 'en' ];
                # check for unhelpful/duplicated categories
                if ($opt->{debug}) {
                    $categories{$prog{'_genre'}} = $prog{'_genre'};
                    if ($prog{'_genre'} =~ m/^(No Genre)$/ 
                            && $prog{'_title'} !~ m/^(To Be Announced|TBA|Close)$/) {
                        $uncategorised_progs{$prog{'_title'}} = $prog{'_title'};
                    }
                    $cats_per_prog{$prog{'_title'}}{$prog{'_genre'}}++;
                }
            }
            # check for missing categories
            if ($opt->{debug} && !defined $prog{'_genre'} && $prog{'_title'} !~ m/^(To Be Announced|TBA|Close)$/) {
                $uncategorised_progs{$prog{'_title'}} = $prog{'_title'};
            }
            if ($prog{'_widescreen'}) {
                $prog{video}{aspect} = '16:9';
            }
            if ($prog{'_black_and_white'}) {
                $prog{video}{colour} = 0;
            }
            # override aspect if channel is flagged as HDTV
            if (defined $video_quality{$xmltv_id}) {
                if ($video_quality{$xmltv_id} =~ m/HDTV/) {
                    $prog{video}{quality} = 'HDTV';
                    $prog{video}{aspect} = '16:9';
                }
                elsif ($video_quality{$xmltv_id} =~ m/SDTV/) {
                    $prog{video}{quality} = 'SDTV';
                }
            }
            if ($prog{'_repeat'}) {
                $prog{'previously-shown'} = {};
            }
            if ($prog{'_premiere'}) {
                $prog{premiere} = [ '' ];
            }
            if ($prog{'_new_series'}) {
                $prog{new} = 1;
            }
            if ($prog{'_subtitles'}) {
                push @{$prog{subtitles}}, {type=>'teletext'};
            }
            if ($prog{'_deaf_signed'}) {
                push @{$prog{subtitles}}, {type=>'deaf-signed'};
            }
            if (defined $prog{'_certificate'} && $prog{'_certificate'} !~ m/^\s*$/) {
                $prog{rating} = [ [ $prog{'_certificate'}, 'BBFC' ] ];
            }
            if (defined $prog{'_star_rating'}  && $prog{'_star_rating'} !~ m/^\s*$/ && $prog{'_film'}) {
                push @{$prog{'star-rating'}}, [ "" . $prog{'_star_rating'} . "/5", 'Radio Times Film Rating' ];
            }
            if ($prog{'_choice'}) {
                push @{$prog{'star-rating'}}, [ '1/1', 'Radio Times Recommendation' ];
            }

            # Broadcast date, start/stop times, and timezone adjustments.
            #
            # For each programme entry, the Radio Times data includes the 
            # date at start of broadcast, the start time and the stop time.
            #
            # The Radio Times sometimes explicitly flags a programme's start/stop
            # times as being in a specific timezone (GMT or BST). We parse this
            # information out when processing the programme's title and apply it
            # to the start time of any such programmes ($explicit_tz). Flagged
            # programmes are usually seen in the data in March and October, when
            # British Summer Times begins and ends.
            #
            # For the majority of programmes where the timezone is not flagged 
            # explicitly, we determine the UTC offset of the programme's start time
            # via XMLTV::DST::utc_offset().
            #
            # We then calculate the programme's stop time using the 
            # UTC-offset-corrected start time and its stated length. This allows 
            # us to handle occasions when programmes having mixed GMT/BST 
            # timings are not flagged.
            #
            # On the day of the  GMT->BST transition, any unflagged programme 
            # starting before 0100 +0000 generally has both start/stop times 
            # given in GMT (+0000) in the RT data (although this is not always 
            # the case).
            #
            # The Summer Time Order of 2002 defines British Summer Time as 
            # "...the period beginning at one o'clock, Greenwich mean time, in 
            # the morning of the last Sunday in March and ending at one o'clock, 
            # Greenwich mean time, in the morning of the last Sunday in October."
            #
            # utc_offset() will only provide the correct stop time if we ensure 
            # dates have the correct UTC offset applied. DateCalc will always use
            # TZ=+0000 when processing/displaying dates ( Date_Init('TZ=+0000') ) 
            # so we must also allow for this when adjusting dates and using this 
            # output with utc_offset (we employ UnixDate() to help).
            #
            my ($yyyy, $mm, $dd);
            my ($implicit_tz, $tz);
            
            # Check for valid date format
            if ($prog{'_date'} !~ m{(\d\d)/(\d\d)/(\d{4})$}) {
                t("  A bad date '" . $prog{'_date'} . "' was seen for '" . $prog{'_title'} . "', skipping");
                next PROGRAMME;
            }
            ($dd, $mm, $yyyy) = ($1, $2, $3);
            t("  Start time given as '" . $yyyy . "/" . $mm . "/" . $dd . " " . $prog{'_start'} 
                    . "', duration " . $prog{'_duration_mins'} . " mins");

            # Use BST information found in title, otherwise calculate it ourselves
            if (defined $prog{'_explicit_tz'}) {
                t("  Explicit timezone '" . $prog{'_explicit_tz'} . "' detected in title");
                $tz = $prog{'_explicit_tz'};
            }
            else {
                $prog{start} = utc_offset( "" . $yyyy . $mm . $dd . $prog{'_start'} . "", $base_tz );
                if ($prog{start} !~ m/([+-]\d{4})$/) {
                    t("  Bad UTC offset '" . $1 . "' detected for '" . $prog{'_title'} . "', skipping");
                    next PROGRAMME;
                }
                $implicit_tz = $1;
                t("  Implicit timezone calculated to be '" . $implicit_tz . "'");
                $tz = $implicit_tz;
            }

            # Calculate start time with correct UTC offset
            $prog{start} = utc_offset("" . $yyyy . $mm . $dd . $prog{'_start'} . " " . $tz . "", $base_tz);

#FIXME - need to work around stupid programme lengths e.g. 1500 minutes
            # Calculate stop time by adding length of programme to start time
            my $datecalc_stop
                = DateCalc(ParseDate("" . $yyyy . $mm . $dd . $prog{'_start'} . " " . $tz . ""),
                           ParseDateDelta($prog{'_duration_mins'} . "minutes")
                           );
            #t("  Stop time calculated as '$datecalc_stop' via DateCalc()");
            my $unixdate_stop
                = UnixDate($datecalc_stop, "%Y%m%d%H%M %z");
            #t("  Stop time formatted as  '$unixdate_stop' via UnixDate()");

            $prog{stop} = utc_offset($unixdate_stop, $base_tz);

            t("  " . $prog{start} . " - Start time");
            t("  " . $prog{stop} . " - Stop time");

            # Now we have determined the correct start/stop times for the programme
            # add any required timeshift defined in channel_ids and preserve the
            # correct timezone information
            #
            if (defined $channel_offset{$xmltv_id}) {
                my $timeshift = $channel_offset{$xmltv_id};
                my $start_ts = DateCalc( ParseDateString( $prog{start} ), $timeshift );
                my $stop_ts = DateCalc( ParseDateString( $prog{stop} ), $timeshift );
                $prog{start} = utc_offset( UnixDate( $start_ts, "%Y%m%d%H%M %z" ), $base_tz );
                $prog{stop} = utc_offset( UnixDate( $stop_ts, "%Y%m%d%H%M %z" ), $base_tz );
                t("  " . $prog{start} . " - Start time after applying '" . $timeshift . "' timeshift");
                t("  " . $prog{stop} . " - Stop time after applying '" . $timeshift . "' timeshift");
            }

            # Now check to see whether the channel broadcasting the programme is a
            # part-time channel, and if so, see whether this programme's timeslot 
            # times fall within the broadcast window. If a channel broadcasts
            # through the night, we also need to test against the next day's
            # broadcast times.
            #
            if (defined $broadcast_hours{$xmltv_id}) {
                $broadcast_hours{$xmltv_id} =~ m/(\d{4})-(\d{4})/;
                my ($chan_start, $chan_stop) = ($1, $2);
                $chan_start = utc_offset( "" . $yyyy . $mm . $dd . $chan_start . "", $base_tz );
                $chan_stop  = utc_offset( "" . $yyyy . $mm . $dd . $chan_stop. "", $base_tz );
                # Correct the stop time if it is earlier than the start time
                my $chan_stop_next_day = 0;
                if (Date_Cmp( $chan_start, $chan_stop ) > 0) {
                    $chan_stop_next_day = 1;
                    $chan_stop =  utc_offset( 
                                      UnixDate( 
                                          DateCalc( 
                                              ParseDateString($chan_stop), 
                                              ParseDateDelta("+ 1 day")
                                          ), 
                                          "%Y%m%d%H%M %z"),
                                      $base_tz
                                  );
                }

                # Include the current programme if its timeslot lies inside the
                # channel's broadcast window
                if (Date_Cmp($prog{start}, $chan_start) >= 0
                  && Date_Cmp($prog{stop}, $chan_stop) <= 0) {
                    t("  " . $chan_start . " - Start time of channel");
                    t("  " . $chan_stop . " - Stop time of channel");
                    t("  '" . $prog{'_title'} . "' shown whilst channel is on-air, adding");
                }
                # If the channel starts and stops broadcasting on the same 
                # calendar day and the programme's timeslot is outside the 
                # broadcast window, skip it
                elsif (( Date_Cmp( $prog{start}, $chan_start) < 0 
                                  || Date_Cmp($prog{stop}, $chan_stop) > 0 ) 
                            && $chan_stop_next_day == 0 ) {
                    t("  " . $chan_start . " - Start time of channel");
                    t("  " . $chan_stop . " - Stop time of channel");
                    t("  '" . $prog{'_title'} . "' shown whilst channel is off-air, skipping\n");
                    next PROGRAMME;
                }
                else {
                    # If the channel broadcasts through the night, and the channel
                    # start time is later than the stop time, it is possible for a
                    # program shown at or after midnight to result in the generation
                    # of incorrect channel start/stop times (shifted +1day forward).
                    # We therefore generate another pair of channel start/stop 
                    # times for the previous day to match against
                    #
                    $chan_start = utc_offset(
                                      UnixDate(
                                          DateCalc(
                                              ParseDateString($chan_start),
                                              ParseDateDelta("- 1 day")
                                          ),
                                          "%Y%m%d%H%M %z"),
                                      $base_tz
                                  );

                    $chan_stop  = utc_offset(
                                      UnixDate(
                                          DateCalc(
                                              ParseDateString($chan_stop),
                                              ParseDateDelta("- 1 day")
                                          ),
                                          "%Y%m%d%H%M %z"),
                                      $base_tz
                                  );

                    t("  " . $chan_start . " - Start time of channel");
                    t("  " . $chan_stop . " - Stop time of channel");

                    # Test again to see if the programme falls between the adjusted
                    # channel broadcast times
                    if (Date_Cmp($prog{start}, $chan_start) >= 0
                      && Date_Cmp($prog{stop}, $chan_stop) <= 0 ) {
                        t("  '" . $prog{'_title'} . "' shown whilst channel is on-air, adding");
                    } else {
                        t("  '" . $prog{'_title'} . "' shown whilst channel is off-air, skipping\n");
                        next PROGRAMME;
                    }
                }
            }

            # Compare the stated and calculated durations of the programme. Since
            # we use the given programme length to determine the stop time, any
            # problems here need investigating
            my $rt_prog_length = ParseDateDelta( $prog{'_duration_mins'} . " minutes" );
            my $real_prog_length = DateCalc( ParseDate( $prog{start} ),
                                             ParseDate( $prog{stop} ) );
            if ($rt_prog_length ne $real_prog_length) {
                t("  Calculated/stated programme durations do not agree for '" . $prog{'_title'} . "':");
                t("    Start time: '" . $prog{start} . "'\t\tCalculated:  '" . $real_prog_length . "'");
                t("    Stop time:  '" . $prog{stop} . "'\t\tRadio Times: '" . $rt_prog_length . "'");
            }

            # Check for a programme having zero length and skip it
            my $zero_prog_length = ParseDateDelta( "0 minutes" );
            if (($rt_prog_length eq $zero_prog_length) ||
                            ($real_prog_length eq $zero_prog_length)) {
                t("  Programme '" . $prog{'_title'} . "' has zero length, skipping");
                t("");
                next PROGRAMME;
            }

            # Check that this programme's start time is not earlier than the
            # previous programme's stop time. This is seen very infrequently
            # in the data, and might be due to last minute schedule changes
            # where the overlapping timings have not been noticed/corrected.
            #
            # If an overlap is detected, move the start time of the second 
            # programme to be the same as the stop time for the first programme. 
            # This has been chosen to try to ensure that the first programme 
            # does not lose its ending, which is almost always more important 
            # than the start of a programme.
            #
            # FIXME
            # However, examination of a recent occurence of an overlap showed 
            # that the start time of the second programme was correct, and that
            # the end time and duration of the previous programme was incorrect
            # so this behaviour may need to be reversed/altered if start times
            # are found to be consistently correct
            #
            if (defined $prev_prog && Date_Cmp($prog{start}, $prev_prog->{stop}) < 0) {
                # ignore programmes that are listed as starting before the 
                # previous programe has finished *and* finishing before or 
                # at the same time the previous programme has finished 
                # (seen with news/weather bulletins)
                if (Date_Cmp($prog{stop}, $prev_prog->{stop}) <= 0) {
                    t("  Programme '" . $prog{'_title'} . "' starts/stops during previous programme, ignoring");
                    t("");
                    push @overlapping_progs,
                            $prog{start} . " (" . $rt_name . "): " . $prog{'_title'}
                            . " starts/stops during previous programme, IGNORING";
                    next PROGRAMME;
                }
                # determine overlap
                my $delta = DateCalc( ParseDate( $prog{start} ),
                                      ParseDate( $prev_prog->{stop} ) );
                # get the overlap in minutes
                my $overlap = Delta_Format($delta, 1, "%mv");
                t("  Scheduling overlap detected (" . $overlap . " mins) with " 
                        . "previous programme '" . $prev_prog->{'_title'} . "'");
                # Record the overlap for debug output
                push @overlapping_progs, 
                        $prog{start} . " (" . $rt_name . "): Start of '" . $prog{'_title'}
                        . "' overlaps end of '" . $prev_prog->{'_title'}
                        . "' by " . $overlap . " minutes, ADJUSTING";
                $prog{start} = $prev_prog->{stop};
                t("  Adjusting start time of '" . $prog{'_title'} . "' to remove overlap");
                t("  " . $prog{start} . " - Start time after overlap adjustment");
                t("  " . $prog{stop} . " - Stop time after overlap adjustment");
            }

            # Finally, write the programme's XML data to programme list
            push @programmes, \%prog;
            # Update reference to previous programme
            $prev_prog = \%prog;
            
            $num_titles++;
            t("");
        }

        if ($num_titles < 1) {
            $empty_listings{$rt_name} = $rt_listings_uri;
            t("  No programmes found for '$rt_name' - check source file");
        }
        else {
            # Write the channel's programme elements to output
            foreach my $prog (@programmes) {
                $writer->write_programme($prog);
            }
            t("  Finished processing $num_titles <programme> elements for '$rt_name'");
        }
        
        t("Finished processing listings for '$rt_name' ($xmltv_id)\n");
        
        # Update the progres bar by one increment
        if (defined $listings_bar) {
            $listings_bar->update();
        }
    }

    if (defined $listings_bar) {
        # Only update the progress bar to 100% if we need to
        if ($need_final_update) {
            $listings_bar->update($num_req_chans);
        }
        $listings_bar->finish();
        if (!$opt->{quiet}) {
            say("\n");
        }
    }
}

sub write_xmltv_footer {
    t("\nWriting XMLTV footer\n");
    $writer->end;
}

# Debug subroutines follow...

sub print_titles_with_colons {
    if (%prog_titles) {
        my @titles_colons;
        my %precolon;  # store the title elements that appear before and
        my %postcolon; # after the first colon with the full title
        foreach my $title (sort keys %prog_titles) {
            if ($title =~ m/^([^:]+)\s*:\s*(.*)$/) {
                push @titles_colons, $title;
                push @{$precolon{$1}}, $title;
                push @{$postcolon{$2}}, $title;
            }
        }

        if (@titles_colons) {
            say("\nStart of list of titles containing colons");
            say("  " . $_) foreach @titles_colons;
            
            # now store the possible fixups if we see more than 1 title having 
            # common pre/post colon text.
            my @prefixups;
            foreach my $text (sort keys %precolon) {
                if (@{$precolon{$text}} > 1) {
                    push @prefixups, "2|" . $text
                }
            }
            if (@prefixups) {
                say("\nPossible fixups for title:episode :\n");
                say($_) foreach sort @prefixups;
                say("");
            }

            my @postfixups;
            foreach my $text (sort keys %postcolon) {
                if (@{$postcolon{$text}} > 1) {
                    push @postfixups, "3|" . $text
                }
            }
            if (@postfixups) {
                say("\nPossible fixups for episode:title :\n");
                say($_) foreach sort @postfixups;
                say("");
            }
            say("End of list of titles containing colons");
        }
    }
}

sub print_titles_with_hyphens {
    if (%prog_titles) {
        my @titles_hyphens;
        my @fixups;
        foreach my $title (sort keys %prog_titles) {
            if ($title =~ m/\s+-\s+/) {
                push @titles_hyphens, $title;
                my $idx_hyphen = index($title, "-");
                my $idx_colon = index($title, ":"); # -1 = no colon
                # Do not suggest title fixup if colon precedes hyphen
                if ($idx_colon == -1 || $idx_hyphen < $idx_colon) {
                    my $rec_title = $title;
                    $rec_title =~ s/\s+-\s+/: /;
                    push @fixups, "5|" . $title . "~" . $rec_title;
                }
            }
        }

        if (@titles_hyphens) {
            say("\nStart of list of titles containing hyphens");
            say("  " . $_) foreach @titles_hyphens;
            
            if (@fixups) {
                say("\nPossible fixups for hyphenated titles:\n");
                say($_) foreach sort @fixups;
                say("");
            }
            say("End of list of titles containing hyphens");
        }
    }
}

sub print_new_titles {
    if (%prog_titles) {
        my @titles_special;
        foreach my $title (sort keys %prog_titles) {
            push(@titles_special, $title) if ($title =~ m/Special\b/i);
        }
        if (@titles_special) {
            say("\nStart of list of titles containing \"Special\"");
            say("  " . $_) foreach @titles_special;
            say("End of list of titles containing \"Special\"");
        }
        
        my @titles_new;
        my @titles_premiere;
        my @titles_finale;
        my @titles_anniv;
        foreach my $title (sort keys %prog_titles) {
            push(@titles_new, $title) if ($title =~ m/^(All New|New)\b/i);
            push(@titles_premiere, $title) if ($title =~ m/Premiere\b/i);
            push(@titles_finale, $title) if ($title =~ m/Final\b/i);
            push(@titles_finale, $title) if ($title =~ m/Finale/i);
            push(@titles_anniv, $title) if ($title =~ m/Anniversary/i);
        }
        if (@titles_new || @titles_premiere || @titles_finale || @titles_anniv) {
            say("\nStart of list of titles containing \"New/Premiere/Finale/etc...\"");
            if (@titles_new) {
                say("  " . $_) foreach @titles_new;
                say("");
            }
            if (@titles_premiere) {
                say("  " . $_) foreach @titles_premiere;
                say("");
            }
            if (@titles_finale) {
                say("  " . $_) foreach @titles_finale;
                say("");
            }
            if (@titles_anniv) {
                say("  " . $_) foreach @titles_anniv;
                say("");
            }
            say("End of list of titles containing \"New/Premiere/Finale/etc...\"");
        }
        
        my @titles_day;
        my @titles_night;
        my @titles_week;
        foreach my $title (sort keys %prog_titles) {
            push(@titles_day, $title) if ($title =~ m/\bDay\b/i);
            push(@titles_night, $title) if ($title =~ m/\bNight\b/i);
            push(@titles_week, $title) if ($title =~ m/\bWeek\b/i);
        }
        if (@titles_day || @titles_night || @titles_week) {
            say("\nStart of list of titles containing \"Day/Night/Week\"");
            if (@titles_day) {
                say("  " . $_) foreach @titles_day;
                say("");
            }
            if (@titles_night) {
                say("  " . $_) foreach @titles_night;
                say("");
            }
            if (@titles_week) {
                say("  " . $_) foreach @titles_week;
                say("");
            }
            say("End of list of titles containing \"Day/Night/Week\"");
        }
        
        my @titles_christmas;
        my @titles_newyear;
        foreach my $title (sort keys %prog_titles) {
            push(@titles_christmas, $title) if ($title =~ m/\bChristmas\b/i);
            push(@titles_newyear, $title) if ($title =~ m/\bNew\s+Year/i);
        }
        if (@titles_christmas || @titles_newyear) {
            say("\nStart of list of titles containing \"Christmas/New Year\"");
            if (@titles_christmas) {
                say("  " . $_) foreach @titles_christmas;
                say("");
            }
            if (@titles_newyear) {
                say("  " . $_) foreach @titles_newyear;
                say("");
            }
            say("End of list of titles containing \"Christmas/New Year\"");
        }
        
        my @titles_bestof;
        my @titles_highlights;
        my @titles_results;
        my @titles_top;
        foreach my $title (sort keys %prog_titles) {
            push(@titles_bestof, $title) if ($title =~ m/Best of\b/i);
            push(@titles_highlights, $title) if ($title =~ m/Highlights/i);
            push(@titles_results, $title) if ($title =~ m/Results?/i);
            push(@titles_top, $title) if ($title =~ m/\bTop\b/i);
        }
        if (@titles_bestof || @titles_results || @titles_top) {
            say("\nStart of list of titles containing \"Results/Best of/Highlights/etc...\"");
            if (@titles_bestof) {
                say("  " . $_) foreach @titles_bestof;
                say("");
            }
            if (@titles_highlights) {
                say("  " . $_) foreach @titles_highlights;
                say("");
            }
            if (@titles_results) {
                say("  " . $_) foreach @titles_results;
                say("");
            }
            if (@titles_top) {
                say("  " . $_) foreach @titles_top;
                say("");
            }
            say("End of list of titles containing \"Results/Best of/Highlights/etc...\"");
        }
    }
}

sub print_misencoded_utf8_data {
    if (%hasC27FBFchars && scalar keys %hasC27FBFchars > 0) {
        say("\nStart of list of channels containing bad bytes in range [C2][7F-BF]");
        foreach my $chan (sort keys %hasC27FBFchars) {
            say("  $chan ($hasC27FBFchars{$chan})");
        }
        say("End of list of channels");
    }
    if (%hadEFBFBD && scalar keys %hadEFBFBD > 0) {
        say("\nStart of list of channels containing Unicode Replacement Character");
        foreach my $chan (sort keys %hadEFBFBD) {
            say("  $chan ($hadEFBFBD{$chan})");
        }
        say("End of list of channels");
    }
    if (%hadC3AFC2BFC2BD && scalar keys %hadC3AFC2BFC2BD > 0) {
        say("\nStart of list of channels containing double-encoded Unicode Replacement Character");
        foreach my $chan (sort keys %hadC3AFC2BFC2BD) {
            say("  $chan ($hadC3AFC2BFC2BD{$chan})");
        }
        say("End of list of channels");
    }
}

sub print_possible_prog_numbering {
    if (%possible_series_nums && scalar keys %possible_series_nums > 0) {
        say("\nStart of list of possible series numbering seen in listings");
        foreach my $poss (sort keys %possible_series_nums) {
            say("  $poss");
        }
        say("End of list of possible series numbering seen in listings");
    }
    if (%possible_episode_nums && scalar keys %possible_episode_nums > 0) {
        say("\nStart of list of possible episode numbering seen in listings");
        foreach my $poss (sort keys %possible_episode_nums) {
            say("  $poss");
        }
        say("End of list of possible episode numbering seen in listings");
    }
    if (%possible_part_nums && scalar keys %possible_part_nums > 0) {
        say("\nStart of list of possible part numbering seen in listings");
        foreach my $poss (sort keys %possible_part_nums) {
            say("  $poss");
        }
        say("End of list of possible part numbering seen in listings");
    }
    if (%title_text_to_remove && scalar keys %title_text_to_remove > 0) {
        say("\nStart of list of titles containing \"Season\"");
        foreach my $t (sort keys %title_text_to_remove) {
            say("  $t");
        }
        say("End of list of titles containing \"Season\"");
    }
}

sub print_overlapping_progs {
    if (@overlapping_progs) {
        say("\nStart of list of overlapping programmes seen in listings");
        foreach my $overlap (@overlapping_progs) {
            say("  " . $overlap);
        }
        say("End of list of overlapping programmes seen in listings");
    }
}

sub print_empty_listings {
    if (%empty_listings && scalar keys %empty_listings > 0) {
        say("\nStart of list of channels providing no listings");
        foreach my $chan (sort keys %empty_listings) {
            say("  $chan ($empty_listings{$chan})");
        }
        say("End of list of channels providing no listings");
    }
}

sub print_flagged_title_eps {
    if (%flagged_title_eps && scalar keys %flagged_title_eps > 0) {
        my %titles_to_output; # temp hash to store matches
        foreach my $flagged_title (sort keys %flagged_title_eps) {
            foreach my $title (sort keys %prog_titles) {
                if (lc $flagged_title eq lc $title) {
                    $titles_to_output{$flagged_title} = $flagged_title;
                }
            }
        }
        # only output something if at least 1 matching title
        if (%titles_to_output && scalar keys %titles_to_output > 0) {
            say("\nStart of list of titles that may need fixing individually");
            foreach my $title (sort keys %titles_to_output) {
                say("  $title");
            }
            say("End of list of titles that may need fixing individually");
        }
    }
}

sub print_dotdotdot_titles {
    if (%dotdotdot_titles && scalar keys %dotdotdot_titles > 0) {
        my %titles_to_output; # temp hash to store matches
        if (%prog_titles) {
            DOTDOTDOT_TITLE:
            # In %dotdotdot_titles, the key is the 'normalised' title to match, 
            # value is the full title to use in replacement
            foreach my $dotdotdot_title (sort keys %dotdotdot_titles) {
                PROG_TITLE:
                foreach my $title (sort keys %prog_titles) {
                    # ignore title having ellipses already
                    next PROG_TITLE if $title =~ m/.*\.\.\.$/;
                    
                    if ($title =~ m/\Q$dotdotdot_title\E/i) {
                        $titles_to_output{$title} = $dotdotdot_titles{$dotdotdot_title};
                    }
                }
            }
        }
        # only output something if at least 1 matching title
        if (%titles_to_output && scalar keys %titles_to_output > 0) {
            say("\nStart of list of potential \"...\" titles that may need fixing individually");
            foreach my $title (sort keys %titles_to_output) {
                say("  Title '$title' may need to be fixed based on fixup '$titles_to_output{$title}'");
            }
            say("End of list of potential \"...\" titles that may need fixing individually");
        }
    }
}

sub print_title_in_subtitle {
    if (%title_in_subtitle_fixed && scalar keys %title_in_subtitle_fixed > 0) {
        say("\nStart of list of programmes where title was removed from sub-title field");
        foreach my $prog_ref (sort keys %title_in_subtitle_fixed) {
            say("  $title_in_subtitle_fixed{$prog_ref}->{'title'} / $title_in_subtitle_fixed{$prog_ref}->{'episode'}");
        }
        say("\nEnd of list of programmes where title was removed from sub-title field");
    }
    if (%title_in_subtitle_notfixed && scalar keys %title_in_subtitle_notfixed > 0) {
        say("\nStart of list of programmes where title is still present in sub-title field");
        foreach my $prog_ref (sort keys %title_in_subtitle_notfixed) {
            say("  $title_in_subtitle_notfixed{$prog_ref}->{'title'} / $title_in_subtitle_notfixed{$prog_ref}->{'episode'}");
        }
        say("\nEnd of list of programmes where title is still present in sub-title field");
    }
}

sub print_uc_titles_pre {
    if (scalar keys %uc_prog_titles > 0) {
        say("\nStart of list of uppercase titles before processing");
        foreach my $title (sort keys %uc_prog_titles) {
            say("  $title");
        }
        say("End of list of uppercase titles before processing");
    }
}

sub print_uc_titles_post {
    if (%prog_titles) {
        my @titles_uc_post;
        foreach my $title (sort keys %prog_titles) {
            if ($title eq uc($title) && $title !~ m/^\d+$/) {
                push @titles_uc_post, $title;
            }
        }
        if (@titles_uc_post) {
            say("\nStart of list of uppercase titles after processing");
            say("  " . $_) foreach @titles_uc_post;
            say("End of list of uppercase titles after processing");
        }
    }
}

sub print_titles_inc_years {
    if (%prog_titles) {
        my @titles_years;
        foreach my $title (sort keys %prog_titles) {
            if ($title =~ m/\b\d{4}\b/) {
                push @titles_years, $title;
            }
        }
        if (@titles_years) {
            say("\nStart of list of titles including possible years");
            say("  " . $_) foreach @titles_years;
            say("End of list of titles including possible years");
        }
    }
}

sub print_title_variants {
    if (%prog_titles) {
        # iterate over each unique "normalised" title
        my @titles_variants;
        my @fixups;
        foreach my $unique_title (sort keys %case_insens_titles) {
            if (scalar keys %{$case_insens_titles{$unique_title}} > 1) {
                my %variants;
    
                # iterate over each actual title seen in listings
                foreach my $title (sort keys %{$case_insens_titles{$unique_title}}) {

                    # need to remove 'count' key before genre processing later
                    my $title_cnt = delete $case_insens_titles{$unique_title}{$title}{'count'};
                    # hash lists of title variants keyed on frequency
                    push @{$variants{$title_cnt}}, $title;
                    
                    my $line = "  $title (";
                    # iterate over each title's genres
                    foreach my $genre (sort keys %{$case_insens_titles{$unique_title}{$title}}) {
                        # iterate over each title's channel availability by genre
                        foreach my $chan (sort keys %{$case_insens_titles{$unique_title}{$title}{$genre}}) {
                            $line .= $genre . "/" . $chan . " [" . $case_insens_titles{$unique_title}{$title}{$genre}{$chan} . " occurences], ";
                        }
                    }
                    $line =~ s/,\s*$//; # remove last comma
                    $line .= ")";
                    push @titles_variants, $line;
                }
                push @titles_variants, "";

                # now find list of titles with highest freq and check if it contains
                # a single entry to use in suggested fixups
                my @title_freqs = sort {$a <=> $b} keys %variants;
                my $highest_freq = $title_freqs[-1];
                my $best_title;
                if (@{$variants{$highest_freq}} == 1) {
                    # grab the title and remove key from $case_insens_titles{$unique_title}
                    $best_title = shift @{$variants{$highest_freq}};
                    delete $case_insens_titles{$unique_title}{$best_title};
                    # now iterate over remaining variations of title and generate fixups
                    foreach my $rem_title (keys %{$case_insens_titles{$unique_title}}) {
                        push @fixups, "5|" . $rem_title . "~" . $best_title;
                    }
                }
            }
        }
        if (@titles_variants) {
            say("\nStart of possible title variations");
            say("  " . $_) foreach @titles_variants;
            if (@fixups) {
                say("\nPossible fixups for title variations:\n");
                say($_) foreach sort @fixups;
                say("");
            }
            say("End of possible title variations");
        }
    }
}

sub print_categories {
    if (%categories && scalar keys %categories > 0) {
        say("\nStart of list of programme categories seen");
        foreach my $category (sort keys %categories) {
            say("  $category");
        }
        say("End of list of programme categories seen");
    }
}

sub print_uncategorised_progs {
    if (%uncategorised_progs && scalar keys %uncategorised_progs > 0) {
        say("\nStart of list of uncategorised programmes");
        foreach my $title (sort keys %uncategorised_progs) {
            say("  $title");
        }
        say("End of list of uncategorised programmes");
    }
}

sub print_cats_per_prog {
    if (%cats_per_prog) {
        my @titles_cats;
        my @fixups;
        foreach my $title (sort keys %cats_per_prog) {
            if (scalar keys %{$cats_per_prog{$title}} > 1) {
                push @titles_cats, "  '" . $title . "' is categorised as:";
                my $best_cat_cnt = 1;
                my $best_cat = '';
                foreach my $cat (sort keys %{$cats_per_prog{$title}}) {
                    push @titles_cats, "    $cat (" . $cats_per_prog{$title}{$cat} . " occurences)";
                    if ($cats_per_prog{$title}{$cat} > $best_cat_cnt) {
                        $best_cat = $cat;
                        $best_cat_cnt = $cats_per_prog{$title}{$cat};
                    }
                }
                push @titles_cats, "";
                if ($best_cat_cnt > 1) {
                   push @fixups, "6|" . $title . "~" . $best_cat;
                }
            }
        }
        if (@titles_cats) {
            say("\nStart of programmes with multiple categories");
            say("  " . $_) foreach @titles_cats;
            if (@fixups) {
                say("\nPossible fixups for programme categories:\n");
                say($_) foreach sort @fixups;
                say("");
            }
            say("End of programmes with multiple categories");
        }
    }
}

#FIXME
#
# The following subs are subject to change and are currently proof of concept

sub list_lineups {
    say("test.freeview.co.uk|DVB-T|FreeView");
    say("test.freesat.co.uk|DVB-S|Freesat");
}

sub get_lineup {

    my $opt = shift;

    # get the lineup_id we're passed
    # my $lineup_id = shift;
    # say("Lineup provided was: " . $lineup_id);
 
    # Configure output and write XMLTV lineup data - header, name, channels 
    # and footer
    #
    #my $lineup_writer;
    setup_xmltv_lineup_writer($opt);
    write_xmltv_lineup_header();

    write_lineup_name();
    write_lineup_icon();
    write_lineup_channels();

    write_xmltv_lineup_footer();
}

# Determine options for, and create XMLTV::Lineup::Writer object
sub setup_xmltv_lineup_writer {

    my $opt = shift;

    # output options
    my %g_args = ();
    if (defined $opt->{output}) {
#        t("\nOpening XML output file '$opt->{output}'\n");
        my $fh = new IO::File ">$opt->{output}";
        die "Error: Cannot write to '$opt->{output}', exiting" if (!$fh);
        %g_args = (OUTPUT => $fh);
    }

#    t("Started writing XMLTV output using " . $xml_encoding . " encoding");
    $lineup_writer = new XMLTV::Lineup::Writer(%g_args, encoding => $xml_encoding);
}

sub write_xmltv_lineup_header {

#    t("Writing XMLTV lineup header");
    $lineup_writer->start(\%xmltv_lineup_attributes);
}

sub write_lineup_name {

    my @values = ( 'Freeview (UK)', 'en' );
    $lineup_writer->write_display_name(\@values);
}

sub write_lineup_icon {

    my %icon_url = ( 'src' => 'http://www.freeview.co.uk/logo.jpg' );
    $lineup_writer->write_lineup_icon(\%icon_url);
}

sub write_lineup_channels {
    
    my %ch = ( 'id' => 'bbc1.bbc.co.uk',
               'old-id' => [ 'old.bbc1.bbc.co.uk', 'evenolder.bbc1.bbc.co.uk' ],
               'display-name' => [ [ 'BBC ONE', 'en' ] ],
               'preferred-preset' => '1',
               'service-id' => '4165',
               'transport-id' => '4101',
               'network-id' => '9018',
               'icon' => [ { 'src' => 'http://www.bbc.co.uk/bbcone.jpg' } ],
               'homepage-url' => "http://www.bbc.co.uk/bbcone", );

    $lineup_writer->write_channel(\%ch);
}

sub write_xmltv_lineup_footer {
#    t("Writing XMLTV lineup footer\n");
    $lineup_writer->end;
}

__END__

=head1 NAME

tv_grab_uk_rt - Grab TV listings for United Kingdom/Republic of Ireland

=head1 SYNOPSIS

tv_grab_uk_rt --help
	
tv_grab_uk_rt --version

tv_grab_uk_rt --capabilities

tv_grab_uk_rt --description

tv_grab_uk_rt [--config-file FILE]
              [--days N] [--offset N]
              [--output FILE] [--quiet] [--debug]

tv_grab_uk_rt --configure [--config-file FILE]

tv_grab_uk_rt --configure-api [--stage NAME]
              [--config-file FILE] [--output FILE]

tv_grab_uk_rt --list-channels [--config-file FILE]
              [--output FILE] [--quiet] [--debug]

tv_grab_uk_rt --list-lineups

tv_grab_uk_rt --lineup LINEUP_ID

=head1 DESCRIPTION

Output TV listings in XMLTV format for many channels available in the 
United Kingdom and Republic of Ireland.  Source data comes from 
machine-readable files made available from the Radio Times website.

=head1 USAGE

First run B<tv_grab_uk_rt --configure> to choose which channels you want to 
receive listings for.  Then run B<tv_grab_uk_rt> (with optional arguments) to get
around 14 days of listings for your configured channels.

=head1 OPTIONS

B<--help> Print a help message and exit.

B<--version> Show the versions of the XMLTV libraries, the grabber and of
key modules used for processing listings.

B<--capabilities> Show which capabilities the grabber supports. For more
information, see L<http://wiki.xmltv.org/index.php/XmltvCapabilities>

B<--description> Show a brief description of the grabber.

B<--config-file FILE> Specify the name of the configuration file to use. 
If not specified, a default of B<~/.xmltv/tv_grab_uk_rt.conf> is used.  This 
is the file written by B<--configure> and read when grabbing.

B<--output FILE> When grabbing, write output to FILE rather than to standard
output.

B<--days N> When grabbing, grab N days of data instead of all available.
Supported values are 1-15.

B<--offset N> Start grabbing at today + N days. Supported values are 0-14.

Note that due to the format of the source data, tv_grab_uk_rt always downloads 
data for all available days and then filters for days specified with --days and 
--offset. Specifying --days and/or --offset in order to speed up downloads or
reduce data transfer will therefore not work.

B<--quiet> Suppress all progress messages normally written to standard error.

B<--debug> Provide detailed progress messages to standard error. Due to the
volume of debug information produced, it is not advised to use this option 
during normal grabber use.

B<--gui OPTION> Use this option to enable a graphical interface to be used.
OPTION may be 'Tk', or left blank for the best available choice.
Additional allowed values of OPTION are 'Term' for normal terminal output
(default) and 'TermNoProgressBar' to disable the use of Term::ProgressBar.

B<--configure> Prompt for which channels to download listings for, where to 
store the cache directory for retrieved listings, what character encoding
to use for output, and also specify regional and TV platform information.

B<--list-channels> Outputs a list of every channel available to the grabber
in XMLTV format.

B<--list-lineups> Outputs a list of every channel lineup available to the 
grabber in XMLTV format.

B<--lineup LINEUP_ID> Outputs the given channel lineup in XMLTV format for
the given LINEUP_ID.

=head1 SOURCE DATA TERMS OF USE

All data is the copyright of the Radio Times and the use of this data is 
restricted to personal use only. Commercial use of this data is forbidden. 
L<http://www.radiotimes.com/>

In accessing this XML feed, you agree that you will only access its contents 
for your own personal and non-commercial use and not for any commercial 
or other purposes, including advertising or selling any goods or services, 
including any third-party software applications available to the general public.

=head1 CHARACTER ENCODING

During configuration, the software asks the user to choose the character
encoding to be used for output. Current supported encodings are UTF-8 and 
ISO-8859-1. If any users would like support for other encodings, please contact
the maintainer.

=head1 TITLE PROCESSING

Over time, the listings may contain inconsistent programme details, such as
the programme title combined with episode details for some showings of a 
programme, but separate for others; or the episode title being given as the 
programme title, and the programme title given as the episode title. Some
programme titles may also change slightly over time, or across channels. 
Enabling title processing during configuration enables this software to 
process programme titles against a list of flagged titles. The 
software will correct such programme titles, which in turn should result in 
better performance of PVR software which rely on consistent programme data. 
Please be aware that enabling title processing will result in the grabber 
taking slightly longer to complete its operation due to the extra 
processing overhead.

=head1 REGIONAL CHANNELS

During configuration, the software asks for UK postcode information. The first
half of a user's postcode is used to determine which regional channels
are likely available in their locality, and only these matching regional 
channels are included in the list of available channels. A user can enter 
'none' during configuration to disable regional channel filtering. Users in 
the Republic of Ireland should use the pseudo-postcode 'EIRE' to enable 
filtering of regional channels. Errors are quite possible, so please report 
any incorrect channel/postcode information.

=head1 TV PLATFORMS

After specifying a postcode, the software will ask the user to select their
TV service platform (Freeview, Freesat, Virgin TV, etc) from a list. Selecting one 
of these entries will filter the channel selection list (shown at the end of 
the configuration phase) to show only those channels available on the chosen 
platform. If the user has entered a valid postcode, the channel list will 
also only include those regional channels available in the user's locality. 
Again, errors are quite possible, so please report any incorrect 
channel/platform information.

=head1 ERROR HANDLING

tv_grab_uk_rt will only terminate early if it is impossible to continue with grabbing
data. This can be due to a lack of channel configuration data, a bad/missing
configuration file, or filesystem permission problems. Running the grabber in
non-quiet mode should report why the grabber failed.

Non-fatal errors are reported during a grabber run, and can result in listings
for a channel being skipped either in part, or entirely. Progress messages
will state why data is missing when it is possible to do so. A non-zero exit 
status will normally be given when the grabber has encountered problems 
during listings retrieval.

=head1 ENVIRONMENT VARIABLES

The environment variable HOME can be set to change where the configuration
file is stored. All configuration is stored in $HOME/.xmltv/ by default. On
Windows it might be necessary to set HOME to a pathname containing no spaces.

The environment variable XMLTV_SUPPLEMENT can be set to change where the 
supplemental XMLTV files are retrieved from. By default, the file is 
retrieved from the XMLTV supplement server. See L<XMLTV::Supplement> for 
more information.

If you want the grabber to use customised local copies of the supplemental
files, you should set XMLTV_SUPPLEMENT to the path of the directory containing
a tv_grab_uk_rt/ directory containing the supplement files. For example, if
your local supplement files are stored in /usr/local/share/xmltv/tv_grab_uk_rt/
you should `export XMLTV_SUPPLEMENT="/usr/local/share/xmltv/"` before running the
grabber.

=head1 RADIO LISTINGS

Ironically, the Radio Times does not offer listings for radio. They 
have been asked about the possibility of adding radio listings, but stated 
that this would require significant development effort. It has not been
ruled out entirely, but is unlikely to be added soon.

Users who would like to obtain BBC radio listings in XMLTV format are advised 
to investigate a new grabber that obtains listings from the BBC Backstage 
service. See L<http://wiki.xmltv.org/index.php/BBC_Backstage> for more 
information.

=head1 LINEUPS

** Work in progress ** In order to allow more straightforward configuration
of programs that make use of XMLTV data, lineups contain a list of channels
and their names, EPG numbers and identifiers, to allow such programs to quickly
match XMLTV channel data against channels configured in the program. The
intention is to allow programs to be made aware of changes to a lineup and to
allow automated configuration of XMLTV listings.

=head1 MAILING LIST

You can subscribe to and read the XMLTV users mailing list by visiting 
L<http://lists.sourceforge.net/lists/listinfo/xmltv-users>. This is a source
of help and advice for new users. A searchable archive of the list is
available at L<http://news.gmane.org/gmane.comp.tv.xmltv.general>.

=head1 SEE ALSO

L<xmltv(5)>, L<http://wiki.xmltv.org>, L<http://www.radiotimes.com/>

=head1 BUGS

If you encounter a reproducible bug, please report it on the XMLTV bug 
tracker at L<http://sourceforge.net/tracker/?group_id=39046&atid=424135>, 
making sure you assign the bug to the tv_grab_uk_rt category. Please check
that the bug has not already been reported.

The source data on the Radio Times website is generated daily before 0800. 
Occasionally the source data may not get recreated, leaving 
the source files for some (or all) channels empty. Users are encouraged 
to wait at least 1 day before reporting an issue with missing listings, 
as they frequently reappear in the next update or later the same day. If listings continue to 
be missing from the Radio Times website, please report the fact on the XMLTV users 
mailing list.

There have been several occasions in the past when the Radio Times channel index has been
missing from the Radio Times website. This file is essential to being able to
run the grabber, as it contains the list of channels having available listings
data. If this file is missing or empty, and there is no locally-cached copy of
the file, it will not be possible to run the grabber. The file usually
regenerates automatically over the course of the next day, at which point it
will be possible to run the grabber successfully.

There is an ongoing issue with the Radio Times source data containing
mis-encoded UTF-8 characters, which are handled both  automatically in the grabber
and on a case-by-case basis when required. The issue has been reported to 
the Radio Times.

There are no other reported ongoing issues.

=head1 AUTHOR

Since 2007 the maintainer has been Nick Morrott (knowledgejunkie at gmail dot com).

The original author was Ed Avis (ed at membled dot com). Parts of this code
were copied from tv_grab_se_swedb by Mattias Holmlund, and from the XMLTV
wiki L<http://wiki.xmltv.org/>. Regional postcode information was kindly
made available from L<http://www.ukfree.tv>.

=cut

