You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

285 lines
9.3 KiB

package ksb::KDEXMLReader;
# Class: KDEXMLReader
#
# kde_projects.xml module-handling code.
# The core of this was graciously contributed by Allen Winter, and then
# touched-up and kdesrc-build'ed by myself -mpyne.
#
# In C++ terms this would be a singleton-class (as it uses package variables
# for everything due to XML::Parser limitations). So it is neither re-entrant
# nor thread-safe.
use strict;
use warnings;
use 5.014;
our $VERSION = '0.10';
use XML::Parser;
# Method: new
#
# Constructs a new KDEXMLReader. This doesn't contradict any part of the class
# documentation which claims this class is a singleton however. This should be
# called as a method (e.g. KDEXMLReader->new(...)).
#
# Parameters:
# $inputHandle - Ref to filehandle to read from. Must implement _readline_ and
# _eof_.
sub new
{
my $class = shift;
my $inputHandle = shift;
my $self = {
inputHandle => $inputHandle,
};
return bless ($self, $class);
}
sub inputHandle
{
my $self = shift;
return $self->{inputHandle};
}
my @nameStack = (); # Used to assign full names to modules.
my %xmlGroupingIds; # XML tags which group repositories.
my %repositories; # Maps short names to repo info blocks
my $curRepository; # ref to hash table when we are in a repo
my $inRepo = 0; # >0 if we are actually in a repo element.
my $desiredProtocol = ''; # URL protocol desired (normally 'git')
# Note on $proj: A /-separated path is fine, in which case we look
# for the right-most part of the full path which matches all of searchProject.
# e.g. kde/kdebase/kde-runtime would be matched by a proj of either
# "kdebase/kde-runtime" or simply "kde-runtime".
sub getModulesForProject
{
# These are the elements that can have <repo> under them AFAICS, and
# participate in module naming. e.g. kde/calligra or
# extragear/utils/kdesrc-build
@xmlGroupingIds{qw/component module project/} = 1;
my ($self, $proj, $protocol) = @_;
# Sanity-check
if ($proj eq '*' || !$proj) {
die "You are trying to import all modules. This is unwise. Ensure " .
"you do not have any use-module items with a bare '*'";
}
if (!%repositories) {
@nameStack = ();
$inRepo = 0;
$curRepository = undef;
$desiredProtocol = $protocol;
my $parser = XML::Parser->new(
Handlers =>
{
Start => \&xmlTagStart,
End => \&xmlTagEnd,
Char => \&xmlCharData,
},
);
# Will die if the XML is not well-formed.
$parser->parse($self->inputHandle());
}
# A hash is used to hold results since the keys inherently form a set,
# since we don't want dups.
my %results;
my $findResults = sub {
for my $result (
grep {
_projectPathMatchesWildcardSearch(
$repositories{$_}->{'fullName'}, $proj)
} keys %repositories)
{
$results{$result} = 1;
};
};
# Wildcard matches happen as specified if asked for. Non-wildcard matches
# have an implicit "$proj/*" search as well for compatibility with previous
# use-modules
if ($proj !~ /\*/) {
# We have to do a search to account for over-specified module names
# like phonon/phonon
$findResults->();
# Now setup for a wildcard search to find things like kde/kdelibs/baloo
# if just 'kdelibs' is asked for.
$proj .= '/*';
}
$findResults->();
return @repositories{keys %results};
}
sub xmlTagStart
{
my ($expat, $element, %attrs) = @_;
if (exists $xmlGroupingIds{$element}) {
push @nameStack, $attrs{'identifier'};
}
# This code used to check for direct descendants and filter them out.
# Now there are better ways (kde-build-metadata/build-script-ignore and
# the user can customize using ignore-modules), and this filter made it
# more difficult to handle kde/kdelibs{,/nepomuk-{core,widgets}}, so leave
# it out for now. See also bug 321667.
if ($element eq 'repo')
{
# This flag is cleared by the <repo>-end handler, so this *should* be
# logically impossible.
die "We are already tracking a repository" if $inRepo > 0;
$inRepo = 1;
$curRepository = {
'fullName' => join('/', @nameStack),
'repo' => '',
'name' => $nameStack[-1],
'active' => 'false',
'tarball' => '',
'branch' => '',
'branches' => [ ],
'branchtype' => '', # Either branch:stable or branch:trunk
}; # Repo/Active/tarball to be added by char handler.
$repositories{$nameStack[-1]} = $curRepository;
}
# Currently we only pull data while under a <repo> tag, so bail early if
# we're not doing this to simplify later logic.
return unless $inRepo;
# Character data is integrated by the char handler. To avoid having it
# dump all willy-nilly into our dict, we leave a flag for what the
# resultant key should be.
if ($element eq 'active') {
$curRepository->{'needs'} = 'active';
# Unset our default value since one is present in the XML
$curRepository->{'active'} = '';
}
# For git repos we want to retain the repository data and any snapshot
# tarballs available.
elsif ($element eq 'url') {
$curRepository->{'needs'} =
# this proto | needs this attr set
$attrs{'protocol'} eq $desiredProtocol ? 'repo' :
$attrs{'protocol'} eq 'tarball' ? 'tarball' : undef;
}
# i18n data gives us the defined stable and trunk branches.
elsif ($element eq 'branch') {
$curRepository->{'needs'} = 'branch';
my $branchType = $attrs{'i18n'} // '';
$curRepository->{'branchtype'} = "branch:$branchType" if $branchType;
}
}
sub xmlTagEnd
{
my ($expat, $element) = @_;
if (exists $xmlGroupingIds{$element}) {
pop @nameStack;
}
# If gathering data for char handler, stop now.
if ($inRepo && defined $curRepository->{'needs'}) {
delete $curRepository->{'needs'};
}
# Save all branches encountered, mark which ones are 'stable' and 'trunk'
# for i18n purposes, as this keys into use-stable-kde handling.
if ($element eq 'branch') {
my $branch = $curRepository->{'branch'};
push @{$curRepository->{'branches'}}, $branch;
$curRepository->{'branch'} = '';
my $branchType = $curRepository->{'branchtype'};
$curRepository->{$branchType} = $branch if $branchType;
$curRepository->{'branchtype'} = '';
}
if ($element eq 'repo' && $inRepo) {
$inRepo = 0;
$curRepository = undef;
}
}
sub xmlCharData
{
my ($expat, $utf8Data) = @_;
# The XML::Parser manpage makes it clear that the char handler can be
# called consecutive times with data for the same tag, so we use the
# append operator and then clear our flag in xmlTagEnd.
if ($curRepository && defined $curRepository->{'needs'}) {
$curRepository->{$curRepository->{'needs'}} .= $utf8Data;
}
}
# Utility subroutine, returns true if the given kde-project full path (e.g.
# kde/kdelibs/nepomuk-core) matches the given search item.
#
# The search item itself is based on path-components. Each path component in
# the search item must be present in the equivalent path component in the
# module's project path for a match. A '*' in a path component position for the
# search item matches any project path component.
#
# Finally, the search is pinned to search for a common suffix. E.g. a search
# item of 'kdelibs' would match a project path of 'kde/kdelibs' but not
# 'kde/kdelibs/nepomuk-core'. However 'kdelibs/*' would match
# 'kde/kdelibs/nepomuk-core'.
#
# First parameter is the full project path from the kde-projects database.
# Second parameter is the search item.
# Returns true if they match, false otherwise.
sub _projectPathMatchesWildcardSearch
{
my ($projectPath, $searchItem) = @_;
my @searchParts = split(m{/}, $searchItem);
my @nameStack = split(m{/}, $projectPath);
if (scalar @nameStack >= scalar @searchParts) {
my $sizeDifference = scalar @nameStack - scalar @searchParts;
# We might have to loop if we somehow find the wrong start point for our search.
# E.g. looking for a/b/* against a/a/b/c, we'd need to start with the second a.
my $i = 0;
while ($i <= $sizeDifference) {
# Find our common prefix, then ensure the remainder matches item-for-item.
for (; $i <= $sizeDifference; $i++) {
last if $nameStack[$i] eq $searchParts[0];
}
return if $i > $sizeDifference; # Not enough room to find it now
# At this point we have synched up nameStack to searchParts, ensure they
# match item-for-item.
my $found = 1;
for (my $j = 0; $found && ($j < @searchParts); $j++) {
return 1 if $searchParts[$j] eq '*'; # This always works
$found = 0 if $searchParts[$j] ne $nameStack[$i + $j];
}
return 1 if $found; # We matched every item to the substring we found.
$i++; # Try again
}
}
return;
}
1;