You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
298 lines
8.4 KiB
298 lines
8.4 KiB
/*************************************************************************** |
|
* Copyright (C) 2008-2012 by Andrzej Rybczak * |
|
* electricityispower@gmail.com * |
|
* * |
|
* This program is free software; you can redistribute it and/or modify * |
|
* it under the terms of the GNU General Public License as published by * |
|
* the Free Software Foundation; either version 2 of the License, or * |
|
* (at your option) any later version. * |
|
* * |
|
* This program is distributed in the hope that it will be useful, * |
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of * |
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * |
|
* GNU General Public License for more details. * |
|
* * |
|
* You should have received a copy of the GNU General Public License * |
|
* along with this program; if not, write to the * |
|
* Free Software Foundation, Inc., * |
|
* 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. * |
|
***************************************************************************/ |
|
|
|
#include "curl_handle.h" |
|
|
|
#ifdef HAVE_CURL_CURL_H |
|
|
|
#include <cstdlib> |
|
#include <cstring> |
|
|
|
#include "charset.h" |
|
#include "lyrics_fetcher.h" |
|
#include "utility/html.h" |
|
#include "utility/string.h" |
|
|
|
LyricsFetcher *lyricsPlugins[] = |
|
{ |
|
new LyricwikiFetcher(), |
|
new LyricsvipFetcher(), |
|
new Sing365Fetcher(), |
|
new LoloLyricsFetcher(), |
|
new LyriczzFetcher(), |
|
new SonglyricsFetcher(), |
|
new LyricsmaniaFetcher(), |
|
new LyricstimeFetcher(), |
|
new MetrolyricsFetcher(), |
|
new JustSomeLyricsFetcher(), |
|
new LyrcComArFetcher(), |
|
new InternetLyricsFetcher(), |
|
0 |
|
}; |
|
|
|
const char LyricsFetcher::msgNotFound[] = "Not found"; |
|
|
|
LyricsFetcher::Result LyricsFetcher::fetch(const std::string &artist, const std::string &title) |
|
{ |
|
Result result; |
|
result.first = false; |
|
|
|
std::string url = getURL(); |
|
replace(url, "%artist%", artist.c_str()); |
|
replace(url, "%title%", title.c_str()); |
|
|
|
std::string data; |
|
CURLcode code = Curl::perform(data, url); |
|
|
|
if (code != CURLE_OK) |
|
{ |
|
result.second = curl_easy_strerror(code); |
|
return result; |
|
} |
|
|
|
bool parse_ok = getContent(getOpenTag(), getCloseTag(), data); |
|
|
|
if (!parse_ok || notLyrics(data)) |
|
{ |
|
result.second = msgNotFound; |
|
return result; |
|
} |
|
|
|
postProcess(data); |
|
|
|
result.second = data; |
|
result.first = true; |
|
return result; |
|
} |
|
|
|
bool LyricsFetcher::getContent(const char *open_tag, const char *close_tag, std::string &data) |
|
{ |
|
size_t a, b; |
|
if ((a = data.find(open_tag)) != std::string::npos) |
|
{ |
|
a += strlen(open_tag); |
|
if ((b = data.find(close_tag, a)) != std::string::npos) |
|
data = data.substr(a, b-a); |
|
else |
|
return false; |
|
} |
|
else |
|
return false; |
|
return true; |
|
} |
|
|
|
void LyricsFetcher::postProcess(std::string &data) |
|
{ |
|
stripHtmlTags(data); |
|
trim(data); |
|
} |
|
|
|
/***********************************************************************/ |
|
|
|
LyricsFetcher::Result LyricwikiFetcher::fetch(const std::string &artist, const std::string &title) |
|
{ |
|
LyricsFetcher::Result result = LyricsFetcher::fetch(artist, title); |
|
if (result.first == true) |
|
{ |
|
result.first = false; |
|
|
|
std::string data; |
|
CURLcode code = Curl::perform(data, result.second); |
|
|
|
if (code != CURLE_OK) |
|
{ |
|
result.second = curl_easy_strerror(code); |
|
return result; |
|
} |
|
|
|
bool parse_ok = getContent("'17'/></a></div>", "<!--", data); |
|
|
|
if (!parse_ok) |
|
{ |
|
result.second = msgNotFound; |
|
return result; |
|
} |
|
data = unescapeHtmlUtf8(data); |
|
if (data.find("Unfortunately, we are not licensed to display the full lyrics for this song at the moment.") != std::string::npos) |
|
{ |
|
result.second = "Licence restriction"; |
|
return result; |
|
} |
|
|
|
replace(data, "<br />", "\n"); |
|
stripHtmlTags(data); |
|
trim(data); |
|
|
|
result.second = data; |
|
result.first = true; |
|
} |
|
return result; |
|
} |
|
|
|
bool LyricwikiFetcher::notLyrics(const std::string &data) |
|
{ |
|
return data.find("action=edit") != std::string::npos; |
|
} |
|
|
|
/**********************************************************************/ |
|
|
|
LyricsFetcher::Result GoogleLyricsFetcher::fetch(const std::string &artist, const std::string &title) |
|
{ |
|
Result result; |
|
result.first = false; |
|
|
|
std::string search_str = artist; |
|
search_str += "+"; |
|
search_str += title; |
|
search_str += "+"; |
|
search_str += getSiteKeyword(); |
|
|
|
std::string google_url = "http://www.google.com/search?hl=en&ie=UTF-8&oe=UTF-8&q="; |
|
google_url += search_str; |
|
google_url += "&btnI=I%27m+Feeling+Lucky"; |
|
|
|
std::string data; |
|
CURLcode code = Curl::perform(data, google_url, google_url); |
|
|
|
if (code != CURLE_OK) |
|
{ |
|
result.second = curl_easy_strerror(code); |
|
return result; |
|
} |
|
|
|
bool found_url = getContent("<A HREF=\"", "\">here</A>", data); |
|
|
|
if (!found_url || !isURLOk(data)) |
|
{ |
|
result.second = msgNotFound; |
|
return result; |
|
} |
|
|
|
data = unescapeHtmlUtf8(data); |
|
//result.second = data; |
|
//return result; |
|
|
|
URL = data.c_str(); |
|
return LyricsFetcher::fetch("", ""); |
|
} |
|
|
|
bool GoogleLyricsFetcher::isURLOk(const std::string &url) |
|
{ |
|
return url.find(getSiteKeyword()) != std::string::npos; |
|
} |
|
|
|
/**********************************************************************/ |
|
|
|
bool LyricstimeFetcher::isURLOk(const std::string &url) |
|
{ |
|
// it sometimes returns list of all artists that begin |
|
// with a given letter, e.g. www.lyricstime.com/A.html, which |
|
// is 25 chars long, so we want longer. |
|
return GoogleLyricsFetcher::isURLOk(url) && url.length() > 25; |
|
} |
|
|
|
void LyricstimeFetcher::postProcess(std::string &data) |
|
{ |
|
// lyricstime.com uses iso-8859-1 as the encoding |
|
// so we need to convert obtained lyrics to utf-8 |
|
IConv::convertFromTo("iso-8859-1", "utf-8", data); |
|
LyricsFetcher::postProcess(data); |
|
} |
|
|
|
/**********************************************************************/ |
|
|
|
void MetrolyricsFetcher::postProcess(std::string &data) |
|
{ |
|
// throw away [ from ... ] info |
|
size_t i = data.find('['), j = data.find(']'); |
|
if (i != std::string::npos && i != std::string::npos) |
|
data.replace(i, j-i+1, ""); |
|
// some of lyrics have both \n chars and <br />, html tags |
|
// are always present whereas \n chars are not, so we need to |
|
// throw them away to avoid having line breaks doubled. |
|
replace(data, " ", ""); |
|
replace(data, "<br />", "\n"); |
|
data = unescapeHtmlUtf8(data); |
|
LyricsFetcher::postProcess(data); |
|
} |
|
|
|
bool MetrolyricsFetcher::isURLOk(const std::string &url) |
|
{ |
|
// it sometimes return link to sitemap.xml, which is huge so we need to discard it |
|
return GoogleLyricsFetcher::isURLOk(url) && url.find("sitemap") == std::string::npos; |
|
} |
|
|
|
/**********************************************************************/ |
|
|
|
void LyricsmaniaFetcher::postProcess(std::string &data) |
|
{ |
|
// lyricsmania.com uses iso-8859-1 as the encoding |
|
// so we need to convert obtained lyrics to utf-8 |
|
IConv::convertFromTo("iso-8859-1", "utf-8", data); |
|
LyricsFetcher::postProcess(data); |
|
} |
|
|
|
/**********************************************************************/ |
|
|
|
void SonglyricsFetcher::postProcess(std::string &data) |
|
{ |
|
// throw away [ ... lyrics are found on www.songlyrics.com ] info. |
|
// there is +2 instead of +1 in third line because there is extra |
|
// space after ] we also want to get rid of |
|
size_t i = data.find('['), j = data.find(']'); |
|
if (i != std::string::npos && i != std::string::npos) |
|
data.replace(i, j-i+2, ""); |
|
data = unescapeHtmlUtf8(data); |
|
LyricsFetcher::postProcess(data); |
|
} |
|
|
|
|
|
/**********************************************************************/ |
|
|
|
void LyricsvipFetcher::postProcess(std::string &data) |
|
{ |
|
// throw away <div> with ad |
|
size_t i = data.find("<div class=\"ad\""), j = data.find("</div>"); |
|
if (i != std::string::npos && i != std::string::npos) |
|
data.replace(i, j-i+const_strlen("</div>"), ""); |
|
data = unescapeHtmlUtf8(data); |
|
LyricsFetcher::postProcess(data); |
|
} |
|
|
|
/**********************************************************************/ |
|
|
|
LyricsFetcher::Result InternetLyricsFetcher::fetch(const std::string &artist, const std::string &title) |
|
{ |
|
GoogleLyricsFetcher::fetch(artist, title); |
|
LyricsFetcher::Result result; |
|
result.first = false; |
|
result.second = "The following site may contain lyrics for this song: "; |
|
result.second += URL; |
|
return result; |
|
} |
|
|
|
bool InternetLyricsFetcher::isURLOk(const std::string &url) |
|
{ |
|
URL = url; |
|
return false; |
|
} |
|
|
|
#endif // HAVE_CURL_CURL_H |
|
|
|
|