package ksb::KDEXMLReader;

# Class: KDEXMLReader
#
# kde_projects.xml module-handling code.
# The core of this was graciously contributed by Allen Winter, and then
# touched-up and kdesrc-build'ed by myself -mpyne.
#
# In C++ terms this would be a singleton-class (as it uses package variables
# for everything due to XML::Parser limitations). So it is neither re-entrant
# nor thread-safe.

use strict;
use warnings;
use v5.10;

our $VERSION = '0.10';

use XML::Parser;

# Method: new
#
# Constructs a new KDEXMLReader. This doesn't contradict any part of the class
# documentation which claims this class is a singleton however. This should be
# called as a method (e.g. KDEXMLReader->new(...)).
#
# Parameters:
#  $inputHandle - Ref to filehandle to read from. Must implement _readline_ and
#  _eof_.
sub new
{
    my $class = shift;
    my $inputHandle = shift;

    my $self = {
        inputHandle => $inputHandle,
    };

    return bless ($self, $class);
}

sub inputHandle
{
    my $self = shift;
    return $self->{inputHandle};
}

my @nameStack = ();        # Used to assign full names to modules.
my %xmlGroupingIds;        # XML tags which group repositories.
my %repositories;          # Maps short names to repo info blocks
my $curRepository;         # ref to hash table when we are in a repo
my $inRepo = 0;            # >0 if we are actually in a repo element.
my $desiredProtocol = '';  # URL protocol desired (normally 'git')

# Note on $proj: A /-separated path is fine, in which case we look
# for the right-most part of the full path which matches all of searchProject.
# e.g. kde/kdebase/kde-runtime would be matched by a proj of either
# "kdebase/kde-runtime" or simply "kde-runtime".
sub getModulesForProject
{
    # These are the elements that can have <repo> under them AFAICS, and
    # participate in module naming. e.g. kde/calligra or
    # extragear/utils/kdesrc-build
    @xmlGroupingIds{qw/component module project/} = 1;

    my ($self, $proj, $protocol) = @_;

    # Sanity-check
    if ($proj eq '*' || !$proj) {
        die "You are trying to import all modules. This is unwise. Ensure " .
            "you do not have any use-module items with a bare '*'";
    }

    if (!%repositories) {
        @nameStack = ();
        $inRepo = 0;
        $curRepository = undef;
        $desiredProtocol = $protocol;

        my $parser = XML::Parser->new(
            Handlers =>
                {
                    Start => \&xmlTagStart,
                    End => \&xmlTagEnd,
                    Char => \&xmlCharData,
                },
        );

        # Will die if the XML is not well-formed.
        $parser->parse($self->inputHandle());
    }

    # A hash is used to hold results since the keys inherently form a set,
    # since we don't want dups.
    my %results;
    my $findResults = sub {
        for my $result (
            grep {
                _projectPathMatchesWildcardSearch(
                    $repositories{$_}->{'fullName'}, $proj)
            } keys %repositories)
        {
            $results{$result} = 1;
        };
    };

    # Wildcard matches happen as specified if asked for. Non-wildcard matches
    # have an implicit "$proj/*" search as well for compatibility with previous
    # use-modules
    if ($proj !~ /\*/) {
        # We have to do a search to account for over-specified module names
        # like phonon/phonon
        $findResults->();

        # Now setup for a wildcard search to find things like kde/kdelibs/baloo
        # if just 'kdelibs' is asked for.
        $proj .= '/*';
    }

    $findResults->();

    return @repositories{keys %results};
}

sub xmlTagStart
{
    my ($expat, $element, %attrs) = @_;

    if (exists $xmlGroupingIds{$element}) {
        push @nameStack, $attrs{'identifier'};
    }

    # This code used to check for direct descendants and filter them out.
    # Now there are better ways (kde-build-metadata/build-script-ignore and
    # the user can customize using ignore-modules), and this filter made it
    # more difficult to handle kde/kdelibs{,/nepomuk-{core,widgets}}, so leave
    # it out for now. See also bug 321667.
    if ($element eq 'repo')
    {
        # This flag is cleared by the <repo>-end handler, so this *should* be
        # logically impossible.
        die "We are already tracking a repository" if $inRepo > 0;

        $inRepo = 1;
        $curRepository = {
            'fullName' => join('/', @nameStack),
            'repo' => '',
            'name' => $nameStack[-1],
            'active' => 'false',
            'tarball' => '',
            'branch'   => '',
            'branches' => [ ],
            'branchtype' => '', # Either branch:stable or branch:trunk
        }; # Repo/Active/tarball to be added by char handler.

        $repositories{$nameStack[-1]} = $curRepository;
    }

    # Currently we only pull data while under a <repo> tag, so bail early if
    # we're not doing this to simplify later logic.
    return unless $inRepo;

    # Character data is integrated by the char handler. To avoid having it
    # dump all willy-nilly into our dict, we leave a flag for what the
    # resultant key should be.
    if ($element eq 'active') {
        $curRepository->{'needs'} = 'active';

        # Unset our default value since one is present in the XML
        $curRepository->{'active'} = '';
    }
    # For git repos we want to retain the repository data and any snapshot
    # tarballs available.
    elsif ($element eq 'url') {
        $curRepository->{'needs'} =
            #                     this proto       | needs this attr set
            $attrs{'protocol'} eq $desiredProtocol ? 'repo'    :
            $attrs{'protocol'} eq 'tarball'        ? 'tarball' : undef;
    }
    # i18n data gives us the defined stable and trunk branches.
    elsif ($element eq 'branch') {
        $curRepository->{'needs'} = 'branch';

        my $branchType = $attrs{'i18n'} // '';
        $curRepository->{'branchtype'} = "branch:$branchType" if $branchType;
    }
}

sub xmlTagEnd
{
    my ($expat, $element) = @_;

    if (exists $xmlGroupingIds{$element}) {
        pop @nameStack;
    }

    # If gathering data for char handler, stop now.
    if ($inRepo && defined $curRepository->{'needs'}) {
        delete $curRepository->{'needs'};
    }

    # Save all branches encountered, mark which ones are 'stable' and 'trunk'
    # for i18n purposes, as this keys into use-stable-kde handling.
    if ($element eq 'branch') {
        my $branch = $curRepository->{'branch'};
        push @{$curRepository->{'branches'}}, $branch;
        $curRepository->{'branch'} = '';

        my $branchType = $curRepository->{'branchtype'};
        $curRepository->{$branchType} = $branch if $branchType;
        $curRepository->{'branchtype'} = '';
    }

    if ($element eq 'repo' && $inRepo) {
        $inRepo = 0;
        $curRepository = undef;
    }
}

sub xmlCharData
{
    my ($expat, $utf8Data) = @_;

    # The XML::Parser manpage makes it clear that the char handler can be
    # called consecutive times with data for the same tag, so we use the
    # append operator and then clear our flag in xmlTagEnd.
    if ($curRepository && defined $curRepository->{'needs'}) {
        $curRepository->{$curRepository->{'needs'}} .= $utf8Data;
    }
}

# Utility subroutine, returns true if the given kde-project full path (e.g.
# kde/kdelibs/nepomuk-core) matches the given search item.
#
# The search item itself is based on path-components. Each path component in
# the search item must be present in the equivalent path component in the
# module's project path for a match. A '*' in a path component position for the
# search item matches any project path component.
#
# Finally, the search is pinned to search for a common suffix. E.g. a search
# item of 'kdelibs' would match a project path of 'kde/kdelibs' but not
# 'kde/kdelibs/nepomuk-core'. However 'kdelibs/*' would match
# 'kde/kdelibs/nepomuk-core'.
#
# First parameter is the full project path from the kde-projects database.
# Second parameter is the search item.
# Returns true if they match, false otherwise.
sub _projectPathMatchesWildcardSearch
{
    my ($projectPath, $searchItem) = @_;

    my @searchParts = split(m{/}, $searchItem);
    my @nameStack   = split(m{/}, $projectPath);

    if (scalar @nameStack >= scalar @searchParts) {
        my $sizeDifference = scalar @nameStack - scalar @searchParts;

        # We might have to loop if we somehow find the wrong start point for our search.
        # E.g. looking for a/b/* against a/a/b/c, we'd need to start with the second a.
        my $i = 0;
        while ($i <= $sizeDifference) {
            # Find our common prefix, then ensure the remainder matches item-for-item.
            for (; $i <= $sizeDifference; $i++) {
                last if $nameStack[$i] eq $searchParts[0];
            }

            return if $i > $sizeDifference; # Not enough room to find it now

            # At this point we have synched up nameStack to searchParts, ensure they
            # match item-for-item.
            my $found = 1;
            for (my $j = 0; $found && ($j < @searchParts); $j++) {
                return 1   if $searchParts[$j] eq '*'; # This always works
                $found = 0 if $searchParts[$j] ne $nameStack[$i + $j];
            }

            return 1 if $found; # We matched every item to the substring we found.
            $i++; # Try again
        }
    }

    return;
}

1;