|
|
|
|
@ -18,6 +18,7 @@ |
|
|
|
|
* 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. * |
|
|
|
|
***************************************************************************/ |
|
|
|
|
|
|
|
|
|
#include "config.h" |
|
|
|
|
#include "curl_handle.h" |
|
|
|
|
|
|
|
|
|
#ifdef HAVE_CURL_CURL_H |
|
|
|
|
@ -26,6 +27,7 @@ |
|
|
|
|
#include <cstring> |
|
|
|
|
#include <boost/algorithm/string/replace.hpp> |
|
|
|
|
#include <boost/algorithm/string/trim.hpp> |
|
|
|
|
#include <boost/regex.hpp> |
|
|
|
|
|
|
|
|
|
#include "charset.h" |
|
|
|
|
#include "lyrics_fetcher.h" |
|
|
|
|
@ -35,16 +37,10 @@ |
|
|
|
|
LyricsFetcher *lyricsPlugins[] = |
|
|
|
|
{ |
|
|
|
|
new LyricwikiFetcher(), |
|
|
|
|
new LyricsvipFetcher(), |
|
|
|
|
new Sing365Fetcher(), |
|
|
|
|
new LoloLyricsFetcher(), |
|
|
|
|
new LyriczzFetcher(), |
|
|
|
|
new SonglyricsFetcher(), |
|
|
|
|
new LyricsmaniaFetcher(), |
|
|
|
|
new LyricstimeFetcher(), |
|
|
|
|
new MetrolyricsFetcher(), |
|
|
|
|
new JustSomeLyricsFetcher(), |
|
|
|
|
new LyrcComArFetcher(), |
|
|
|
|
new InternetLyricsFetcher(), |
|
|
|
|
0 |
|
|
|
|
}; |
|
|
|
|
@ -56,7 +52,7 @@ LyricsFetcher::Result LyricsFetcher::fetch(const std::string &artist, const std: |
|
|
|
|
Result result; |
|
|
|
|
result.first = false; |
|
|
|
|
|
|
|
|
|
std::string url = getURL(); |
|
|
|
|
std::string url = this->url(); |
|
|
|
|
boost::replace_all(url, "%artist%", artist.c_str()); |
|
|
|
|
boost::replace_all(url, "%title%", title.c_str()); |
|
|
|
|
|
|
|
|
|
@ -69,35 +65,40 @@ LyricsFetcher::Result LyricsFetcher::fetch(const std::string &artist, const std: |
|
|
|
|
return result; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
bool parse_ok = getContent(getOpenTag(), getCloseTag(), data); |
|
|
|
|
auto lyrics = getContent(regex(), data); |
|
|
|
|
|
|
|
|
|
if (!parse_ok || notLyrics(data)) |
|
|
|
|
if (lyrics.empty() || notLyrics(data)) |
|
|
|
|
{ |
|
|
|
|
result.second = msgNotFound; |
|
|
|
|
return result; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
postProcess(data); |
|
|
|
|
data.clear(); |
|
|
|
|
for (auto it = lyrics.begin(); it != lyrics.end(); ++it) |
|
|
|
|
{ |
|
|
|
|
postProcess(*it); |
|
|
|
|
if (!it->empty()) |
|
|
|
|
{ |
|
|
|
|
data += *it; |
|
|
|
|
if (it != lyrics.end()-1) |
|
|
|
|
data += "\n\n----------\n\n"; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
result.second = data; |
|
|
|
|
result.first = true; |
|
|
|
|
return result; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
bool LyricsFetcher::getContent(const char *open_tag, const char *close_tag, std::string &data) |
|
|
|
|
std::vector<std::string> LyricsFetcher::getContent(const char *regex, const std::string &data) |
|
|
|
|
{ |
|
|
|
|
size_t a, b; |
|
|
|
|
if ((a = data.find(open_tag)) != std::string::npos) |
|
|
|
|
{ |
|
|
|
|
a += strlen(open_tag); |
|
|
|
|
if ((b = data.find(close_tag, a)) != std::string::npos) |
|
|
|
|
data = data.substr(a, b-a); |
|
|
|
|
else |
|
|
|
|
return false; |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
return false; |
|
|
|
|
return true; |
|
|
|
|
std::vector<std::string> result; |
|
|
|
|
boost::regex rx(regex); |
|
|
|
|
auto first = boost::sregex_iterator(data.begin(), data.end(), rx); |
|
|
|
|
auto last = boost::sregex_iterator(); |
|
|
|
|
for (; first != last; ++first) |
|
|
|
|
result.push_back(first->str(1)); |
|
|
|
|
return result; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void LyricsFetcher::postProcess(std::string &data) |
|
|
|
|
@ -124,23 +125,36 @@ LyricsFetcher::Result LyricwikiFetcher::fetch(const std::string &artist, const s |
|
|
|
|
return result; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
bool parse_ok = getContent("'17'/></a></div>", "<!--", data); |
|
|
|
|
auto lyrics = getContent("<div class='lyricbox'><div class='rtMatcher'>.*?</div>(.*?)<!--", data); |
|
|
|
|
|
|
|
|
|
if (!parse_ok) |
|
|
|
|
if (lyrics.empty()) |
|
|
|
|
{ |
|
|
|
|
result.second = msgNotFound; |
|
|
|
|
return result; |
|
|
|
|
} |
|
|
|
|
data = unescapeHtmlUtf8(data); |
|
|
|
|
if (data.find("Unfortunately, we are not licensed to display the full lyrics for this song at the moment.") != std::string::npos) |
|
|
|
|
std::transform(lyrics.begin(), lyrics.end(), lyrics.begin(), unescapeHtmlUtf8); |
|
|
|
|
bool license_restriction = std::any_of(lyrics.begin(), lyrics.end(), [](const std::string &s) { |
|
|
|
|
return s.find("Unfortunately, we are not licensed to display the full lyrics for this song at the moment.") != std::string::npos; |
|
|
|
|
}); |
|
|
|
|
if (license_restriction) |
|
|
|
|
{ |
|
|
|
|
result.second = "Licence restriction"; |
|
|
|
|
return result; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
boost::replace_all(data, "<br />", "\n"); |
|
|
|
|
stripHtmlTags(data); |
|
|
|
|
boost::trim(data); |
|
|
|
|
data.clear(); |
|
|
|
|
for (auto it = lyrics.begin(); it != lyrics.end(); ++it) |
|
|
|
|
{ |
|
|
|
|
boost::replace_all(*it, "<br />", "\n"); |
|
|
|
|
stripHtmlTags(*it); |
|
|
|
|
boost::trim(*it); |
|
|
|
|
if (!it->empty()) |
|
|
|
|
{ |
|
|
|
|
data += *it; |
|
|
|
|
if (it != lyrics.end()-1) |
|
|
|
|
data += "\n\n----------\n\n"; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
result.second = data; |
|
|
|
|
result.first = true; |
|
|
|
|
@ -163,8 +177,8 @@ LyricsFetcher::Result GoogleLyricsFetcher::fetch(const std::string &artist, cons |
|
|
|
|
std::string search_str = artist; |
|
|
|
|
search_str += "+"; |
|
|
|
|
search_str += title; |
|
|
|
|
search_str += "+"; |
|
|
|
|
search_str += getSiteKeyword(); |
|
|
|
|
search_str += "+%2B"; |
|
|
|
|
search_str += siteKeyword(); |
|
|
|
|
|
|
|
|
|
std::string google_url = "http://www.google.com/search?hl=en&ie=UTF-8&oe=UTF-8&q="; |
|
|
|
|
google_url += search_str; |
|
|
|
|
@ -179,15 +193,15 @@ LyricsFetcher::Result GoogleLyricsFetcher::fetch(const std::string &artist, cons |
|
|
|
|
return result; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
bool found_url = getContent("<A HREF=\"", "\">here</A>", data); |
|
|
|
|
auto urls = getContent("<A HREF=\"(.*?)\">here</A>", data); |
|
|
|
|
|
|
|
|
|
if (!found_url || !isURLOk(data)) |
|
|
|
|
if (urls.empty() || !isURLOk(urls[0])) |
|
|
|
|
{ |
|
|
|
|
result.second = msgNotFound; |
|
|
|
|
return result; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
data = unescapeHtmlUtf8(data); |
|
|
|
|
data = unescapeHtmlUtf8(urls[0]); |
|
|
|
|
//result.second = data;
|
|
|
|
|
//return result;
|
|
|
|
|
|
|
|
|
|
@ -197,25 +211,7 @@ LyricsFetcher::Result GoogleLyricsFetcher::fetch(const std::string &artist, cons |
|
|
|
|
|
|
|
|
|
bool GoogleLyricsFetcher::isURLOk(const std::string &url) |
|
|
|
|
{ |
|
|
|
|
return url.find(getSiteKeyword()) != std::string::npos; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/**********************************************************************/ |
|
|
|
|
|
|
|
|
|
bool LyricstimeFetcher::isURLOk(const std::string &url) |
|
|
|
|
{ |
|
|
|
|
// it sometimes returns list of all artists that begin
|
|
|
|
|
// with a given letter, e.g. www.lyricstime.com/A.html, which
|
|
|
|
|
// is 25 chars long, so we want longer.
|
|
|
|
|
return GoogleLyricsFetcher::isURLOk(url) && url.length() > 25; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void LyricstimeFetcher::postProcess(std::string &data) |
|
|
|
|
{ |
|
|
|
|
// lyricstime.com uses iso-8859-1 as the encoding
|
|
|
|
|
// so we need to convert obtained lyrics to utf-8
|
|
|
|
|
data = Charset::toUtf8From(data, "iso-8859-1"); |
|
|
|
|
LyricsFetcher::postProcess(data); |
|
|
|
|
return url.find(siteKeyword()) != std::string::npos; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/**********************************************************************/ |
|
|
|
|
@ -253,33 +249,6 @@ void LyricsmaniaFetcher::postProcess(std::string &data) |
|
|
|
|
|
|
|
|
|
/**********************************************************************/ |
|
|
|
|
|
|
|
|
|
void SonglyricsFetcher::postProcess(std::string &data) |
|
|
|
|
{ |
|
|
|
|
// throw away [ ... lyrics are found on www.songlyrics.com ] info.
|
|
|
|
|
// there is +2 instead of +1 in third line because there is extra
|
|
|
|
|
// space after ] we also want to get rid of
|
|
|
|
|
size_t i = data.find('['), j = data.find(']'); |
|
|
|
|
if (i != std::string::npos && i != std::string::npos) |
|
|
|
|
data.replace(i, j-i+2, ""); |
|
|
|
|
data = unescapeHtmlUtf8(data); |
|
|
|
|
LyricsFetcher::postProcess(data); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**********************************************************************/ |
|
|
|
|
|
|
|
|
|
void LyricsvipFetcher::postProcess(std::string &data) |
|
|
|
|
{ |
|
|
|
|
// throw away <div> with ad
|
|
|
|
|
size_t i = data.find("<div class=\"ad\""), j = data.find("</div>"); |
|
|
|
|
if (i != std::string::npos && i != std::string::npos) |
|
|
|
|
data.replace(i, j-i+const_strlen("</div>"), ""); |
|
|
|
|
data = unescapeHtmlUtf8(data); |
|
|
|
|
LyricsFetcher::postProcess(data); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/**********************************************************************/ |
|
|
|
|
|
|
|
|
|
LyricsFetcher::Result InternetLyricsFetcher::fetch(const std::string &artist, const std::string &title) |
|
|
|
|
{ |
|
|
|
|
GoogleLyricsFetcher::fetch(artist, title); |
|
|
|
|
|