lyrics fetcher: fix fetchers and improve formatting of lyrics

master
Andrzej Rybczak 9 years ago
parent a53c574edd
commit 888b6bc1c9
  1. 1
      NEWS
  2. 21
      src/lyrics_fetcher.cpp
  3. 4
      src/lyrics_fetcher.h
  4. 4
      src/utility/html.cpp

@ -5,6 +5,7 @@ ncmpcpp-0.7.6 (????-??-??)
* Opening playlist editor when there is no MPD playlists directory no longer freezes the application.
* Added info about behavior of MPD_HOST and MPD_PORT environment variables to man page.
* Tilde will now be expanded to home directory in visualizer_fifo_path, execute_on_song_change and external_editor configuration variables.
* Fixed lyricwiki and justsomelyrics fetchers.
ncmpcpp-0.7.5 (2016-08-17)
* Action chains can be now used for seeking.

@ -25,7 +25,9 @@
#include <cstdlib>
#include <cstring>
#include <boost/algorithm/string/join.hpp>
#include <boost/algorithm/string/replace.hpp>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/trim.hpp>
#include <boost/regex.hpp>
@ -105,6 +107,15 @@ std::vector<std::string> LyricsFetcher::getContent(const char *regex_, const std
void LyricsFetcher::postProcess(std::string &data) const
{
stripHtmlTags(data);
// Remove indentation from each line and collapse multiple newlines into one.
std::vector<std::string> lines;
boost::split(lines, data, boost::is_any_of("\r\n"));
for (auto &line : lines)
boost::trim(line);
std::unique(lines.begin(), lines.end(), [](std::string &a, std::string &b) {
return a.empty() && b.empty();
});
data = boost::algorithm::join(lines, "\n");
boost::trim(data);
}
@ -126,7 +137,7 @@ LyricsFetcher::Result LyricwikiFetcher::fetch(const std::string &artist, const s
return result;
}
auto lyrics = getContent("<div class='lyricbox'>(.*?)<!--", data);
auto lyrics = getContent("<div class='lyricbox'>(.*?)</div>", data);
if (lyrics.empty())
{
@ -224,6 +235,14 @@ void Sing365Fetcher::postProcess(std::string &data) const
/**********************************************************************/
void JustSomeLyricsFetcher::postProcess(std::string &data) const
{
data = unescapeHtmlUtf8(data);
LyricsFetcher::postProcess(data);
}
/**********************************************************************/
void MetrolyricsFetcher::postProcess(std::string &data) const
{
// some of lyrics have both \n chars and <br />, html tags

@ -108,7 +108,9 @@ struct JustSomeLyricsFetcher : public GoogleLyricsFetcher
virtual const char *name() const OVERRIDE { return "justsomelyrics.com"; }
protected:
virtual const char *regex() const OVERRIDE { return "<div class=\"content.*?</div>(.*?)</div>"; }
virtual const char *regex() const OVERRIDE { return "<div class=\"content.*?</div>\\s*</div>(.*?)<div"; }
virtual void postProcess(std::string &data) const OVERRIDE;
};
struct AzLyricsFetcher : public GoogleLyricsFetcher

@ -66,7 +66,7 @@ void stripHtmlTags(std::string &s)
for (size_t i = s.find("<"); i != std::string::npos; i = s.find("<"))
{
size_t j = s.find(">", i)+1;
if (s.compare(i, j-i, "<p>") == 0 || s.compare(i, j-i, "</p>") == 0)
if (s.compare(i, std::min(3ul, j-i), "<p ") == 0 || s.compare(i, j-i, "</p>") == 0)
s.replace(i, j-i, "\n");
else
s.replace(i, j-i, "");
@ -87,4 +87,4 @@ void stripHtmlTags(std::string &s)
else if (s[i] == '\t')
s[i] = ' ';
}
}
}

Loading…
Cancel
Save