You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
304 lines
9.8 KiB
304 lines
9.8 KiB
package ksb::KDEXMLReader; |
|
|
|
# Class: KDEXMLReader |
|
# |
|
# kde_projects.xml module-handling code. |
|
# The core of this was graciously contributed by Allen Winter, and then |
|
# touched-up and kdesrc-build'ed by myself -mpyne. |
|
# (By late 2015 this is mostly mpyne's fault -mpyne). |
|
|
|
use strict; |
|
use warnings; |
|
use 5.014; |
|
|
|
our $VERSION = '0.20'; |
|
|
|
use XML::Parser; |
|
|
|
# Method: new |
|
# |
|
# Constructs a new KDEXMLReader. This doesn't contradict any part of the class |
|
# documentation which claims this class is a singleton however. This should be |
|
# called as a method (e.g. KDEXMLReader->new(...)). |
|
# |
|
# Parameters: |
|
# $inputHandle - Ref to filehandle to read from. Must implement _readline_ and |
|
# _eof_. |
|
# $desiredProtocol - Normally 'git', but other protocols like 'http' can also |
|
# be preferred (e.g. for proxy compliance). |
|
sub new |
|
{ |
|
my $class = shift; |
|
my $inputHandle = shift; |
|
my $desiredProtocol = shift; |
|
|
|
my $self = { |
|
# Maps short names to repo info blocks |
|
repositories => { }, |
|
}; |
|
|
|
$self = bless ($self, $class); |
|
$self->_readProjectData($inputHandle, $desiredProtocol); |
|
|
|
return $self; |
|
} |
|
|
|
# XML tags which group repositories. |
|
my %xmlGroupingIds = ( |
|
component => 1, |
|
module => 1, |
|
project => 1, |
|
); |
|
|
|
# The 'main' method for this class. Reads in *all* KDE projects and notes |
|
# their details for later queries. |
|
# Be careful, can throw exceptions. |
|
sub _readProjectData |
|
{ |
|
my ($self, $inputHandle, $desiredProtocol) = @_; |
|
|
|
$desiredProtocol //= ''; |
|
my $auxRef = { |
|
# Used to assign full names to modules. |
|
nameStackRef => [], |
|
# >0 if we are actually in a repo element. |
|
inRepo => 0, |
|
# ref to hash table entry for current XML element. |
|
curRepository => undef, |
|
desiredProtocol => $desiredProtocol, |
|
repositoryRef => $self->{repositories}, |
|
}; |
|
|
|
my $parser = XML::Parser->new( |
|
Handlers => |
|
{ |
|
Start => sub { _xmlTagStart($auxRef, @_); }, |
|
End => sub { _xmlTagEnd ($auxRef, @_); }, |
|
Char => sub { _xmlCharData($auxRef, @_); }, |
|
}, |
|
); |
|
|
|
# Will die if the XML is not well-formed. |
|
$parser->parse($inputHandle); |
|
} |
|
|
|
# Note on $proj: A /-separated path is fine, in which case we look |
|
# for the right-most part of the full path which matches all of searchProject. |
|
# e.g. kde/kdebase/kde-runtime would be matched by a proj of either |
|
# "kdebase/kde-runtime" or simply "kde-runtime". |
|
sub getModulesForProject |
|
{ |
|
my ($self, $proj) = @_; |
|
|
|
my $repositoryRef = $self->{repositories}; |
|
my @results; |
|
my $findResults = sub { |
|
push @results, ( |
|
grep { |
|
_projectPathMatchesWildcardSearch( |
|
$repositoryRef->{$_}->{'fullName'}, $proj) |
|
} (keys %{$repositoryRef})); |
|
}; |
|
|
|
# Wildcard matches happen as specified if asked for. |
|
# Non-wildcard matches have an implicit "$proj/*" search as well for |
|
# compatibility with previous use-modules |
|
# Project specifiers ending in .git are forced to be non-wildcarded. |
|
if ($proj !~ /\*/ && $proj !~ /\.git$/) { |
|
# We have to do a search to account for over-specified module names |
|
# like phonon/phonon |
|
$findResults->(); |
|
|
|
# Now setup for a wildcard search to find things like kde/kdelibs/baloo |
|
# if just 'kdelibs' is asked for. |
|
$proj .= '/*'; |
|
} |
|
|
|
$proj =~ s/\.git$//; |
|
|
|
# If still no wildcard and no '/' then we can use direct lookup by module |
|
# name. |
|
if ($proj !~ /\*/ && $proj !~ /\// && exists $repositoryRef->{$proj}) { |
|
push @results, $proj; |
|
} |
|
else { |
|
$findResults->(); |
|
} |
|
|
|
return @{$repositoryRef}{@results}; |
|
} |
|
|
|
sub _xmlTagStart |
|
{ |
|
my ($aux, $expat, $element, %attrs) = @_; |
|
|
|
my $nameStackRef = $aux->{nameStackRef}; |
|
if (exists $xmlGroupingIds{$element}) { |
|
push @{$nameStackRef}, $attrs{'identifier'}; |
|
} |
|
|
|
my $curRepository = $aux->{curRepository}; |
|
my $inRepo = $aux->{inRepo}; |
|
|
|
# This code used to check for direct descendants and filter them out. |
|
# Now there are better ways (kde-build-metadata/build-script-ignore and |
|
# the user can customize using ignore-modules), and this filter made it |
|
# more difficult to handle kde/kdelibs{,/nepomuk-{core,widgets}}, so leave |
|
# it out for now. See also bug 321667. |
|
if ($element eq 'repo') |
|
{ |
|
# This flag is cleared by the <repo>-end handler, so this *should* be |
|
# logically impossible. |
|
die "We are already tracking a repository" if $inRepo > 0; |
|
|
|
$aux->{inRepo} = 1; |
|
my $name = ${$nameStackRef}[-1]; |
|
$curRepository = { |
|
'fullName' => join('/', @{$nameStackRef}), |
|
'repo' => '', |
|
'name' => $name, |
|
'active' => 'false', |
|
'tarball' => '', |
|
'branch' => '', |
|
'branches' => [ ], |
|
'branchtype' => '', # Either branch:stable or branch:trunk |
|
}; # Repo/Active/tarball to be added by char handler. |
|
|
|
$aux->{repositoryRef}->{$name} = $curRepository; |
|
$aux->{curRepository} = $curRepository; |
|
} |
|
|
|
# Currently we only pull data while under a <repo> tag, so bail early if |
|
# we're not doing this to simplify later logic. |
|
return unless $inRepo; |
|
|
|
my $desiredProtocol = $aux->{desiredProtocol}; |
|
|
|
# Character data is integrated by the char handler. To avoid having it |
|
# dump all willy-nilly into our dict, we leave a flag for what the |
|
# resultant key should be. |
|
if ($element eq 'active') { |
|
$curRepository->{'needs'} = 'active'; |
|
|
|
# Unset our default value since one is present in the XML |
|
$curRepository->{'active'} = ''; |
|
} |
|
# For git repos we want to retain the repository data and any snapshot |
|
# tarballs available. |
|
elsif ($element eq 'url') { |
|
$curRepository->{'needs'} = |
|
# this proto | needs this attr set |
|
$attrs{'protocol'} eq $desiredProtocol ? 'repo' : |
|
$attrs{'protocol'} eq 'tarball' ? 'tarball' : undef; |
|
} |
|
# i18n data gives us the defined stable and trunk branches. |
|
elsif ($element eq 'branch') { |
|
$curRepository->{'needs'} = 'branch'; |
|
|
|
my $branchType = $attrs{'i18n'} // ''; |
|
$curRepository->{'branchtype'} = "branch:$branchType" if $branchType; |
|
} |
|
} |
|
|
|
sub _xmlTagEnd |
|
{ |
|
my ($aux, $expat, $element) = @_; |
|
|
|
my $nameStackRef = $aux->{nameStackRef}; |
|
if (exists $xmlGroupingIds{$element}) { |
|
pop @{$nameStackRef}; |
|
} |
|
|
|
my $inRepo = $aux->{inRepo}; |
|
my $curRepository = $aux->{curRepository}; |
|
|
|
# If gathering data for char handler, stop now. |
|
if ($inRepo && defined $curRepository->{'needs'}) { |
|
delete $curRepository->{'needs'}; |
|
} |
|
|
|
# Save all branches encountered, mark which ones are 'stable' and 'trunk' |
|
# for i18n purposes, as this keys into use-stable-kde handling. |
|
if ($element eq 'branch') { |
|
my $branch = $curRepository->{'branch'}; |
|
push @{$curRepository->{'branches'}}, $branch; |
|
$curRepository->{'branch'} = ''; |
|
|
|
my $branchType = $curRepository->{'branchtype'}; |
|
$curRepository->{$branchType} = $branch if $branchType; |
|
$curRepository->{'branchtype'} = ''; |
|
} |
|
|
|
if ($element eq 'repo' && $inRepo) { |
|
$aux->{inRepo} = 0; |
|
$aux->{curRepository} = undef; |
|
} |
|
} |
|
|
|
sub _xmlCharData |
|
{ |
|
my ($aux, $expat, $utf8Data) = @_; |
|
my $curRepository = $aux->{curRepository}; |
|
|
|
# The XML::Parser manpage makes it clear that the char handler can be |
|
# called consecutive times with data for the same tag, so we use the |
|
# append operator and then clear our flag in _xmlTagEnd. |
|
if ($curRepository && defined $curRepository->{'needs'}) { |
|
$curRepository->{$curRepository->{'needs'}} .= $utf8Data; |
|
} |
|
} |
|
|
|
# Utility subroutine, returns true if the given kde-project full path (e.g. |
|
# kde/kdelibs/nepomuk-core) matches the given search item. |
|
# |
|
# The search item itself is based on path-components. Each path component in |
|
# the search item must be present in the equivalent path component in the |
|
# module's project path for a match. A '*' in a path component position for the |
|
# search item matches any project path component. |
|
# |
|
# Finally, the search is pinned to search for a common suffix. E.g. a search |
|
# item of 'kdelibs' would match a project path of 'kde/kdelibs' but not |
|
# 'kde/kdelibs/nepomuk-core'. However 'kdelibs/*' would match |
|
# 'kde/kdelibs/nepomuk-core'. |
|
# |
|
# First parameter is the full project path from the kde-projects database. |
|
# Second parameter is the search item. |
|
# Returns true if they match, false otherwise. |
|
sub _projectPathMatchesWildcardSearch |
|
{ |
|
my ($projectPath, $searchItem) = @_; |
|
|
|
my @searchParts = split(m{/}, $searchItem); |
|
my @nameStack = split(m{/}, $projectPath); |
|
|
|
if (scalar @nameStack >= scalar @searchParts) { |
|
my $sizeDifference = scalar @nameStack - scalar @searchParts; |
|
|
|
# We might have to loop if we somehow find the wrong start point for our search. |
|
# E.g. looking for a/b/* against a/a/b/c, we'd need to start with the second a. |
|
my $i = 0; |
|
while ($i <= $sizeDifference) { |
|
# Find our common prefix, then ensure the remainder matches item-for-item. |
|
for (; $i <= $sizeDifference; $i++) { |
|
last if $nameStack[$i] eq $searchParts[0]; |
|
} |
|
|
|
return if $i > $sizeDifference; # Not enough room to find it now |
|
|
|
# At this point we have synched up nameStack to searchParts, ensure they |
|
# match item-for-item. |
|
my $found = 1; |
|
for (my $j = 0; $found && ($j < @searchParts); $j++) { |
|
return 1 if $searchParts[$j] eq '*'; # This always works |
|
$found = 0 if $searchParts[$j] ne $nameStack[$i + $j]; |
|
} |
|
|
|
return 1 if $found; # We matched every item to the substring we found. |
|
$i++; # Try again |
|
} |
|
} |
|
|
|
return; |
|
} |
|
|
|
1;
|
|
|