|
|
|
@ -62,29 +62,22 @@ void unescapeHtmlEntities(std::string &s) |
|
|
|
|
|
|
|
|
|
|
|
void stripHtmlTags(std::string &s) |
|
|
|
void stripHtmlTags(std::string &s) |
|
|
|
{ |
|
|
|
{ |
|
|
|
bool erase = 0; |
|
|
|
bool is_p, is_slash_p; |
|
|
|
for (size_t i = s.find("<"); i != std::string::npos; i = s.find("<")) |
|
|
|
for (size_t i = s.find("<"); i != std::string::npos; i = s.find("<")) |
|
|
|
{ |
|
|
|
{ |
|
|
|
size_t j = s.find(">", i)+1; |
|
|
|
size_t j = s.find(">", i); |
|
|
|
if (s.compare(i, std::min(3ul, j-i), "<p ") == 0 || s.compare(i, j-i, "</p>") == 0) |
|
|
|
if (j != std::string::npos) |
|
|
|
s.replace(i, j-i, "\n"); |
|
|
|
|
|
|
|
else |
|
|
|
|
|
|
|
s.replace(i, j-i, ""); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
unescapeHtmlEntities(s); |
|
|
|
|
|
|
|
for (size_t i = 0; i < s.length(); ++i) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
if (erase) |
|
|
|
|
|
|
|
{ |
|
|
|
{ |
|
|
|
s.erase(s.begin()+i); |
|
|
|
++j; |
|
|
|
erase = 0; |
|
|
|
is_p = s.compare(i, j-i, "<p ") == 0 || s.compare(i, j-i, "<p>") == 0; |
|
|
|
} |
|
|
|
is_slash_p = s.compare(i, j-i, "</p>") == 0; |
|
|
|
if (s[i] == 13) // ascii code for windows line ending, get rid of this shit
|
|
|
|
if (is_p || is_slash_p) |
|
|
|
{ |
|
|
|
s.replace(i, j-i, "\n"); |
|
|
|
s[i] = '\n'; |
|
|
|
else |
|
|
|
erase = 1; |
|
|
|
s.replace(i, j-i, ""); |
|
|
|
} |
|
|
|
} |
|
|
|
else if (s[i] == '\t') |
|
|
|
else |
|
|
|
s[i] = ' '; |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
unescapeHtmlEntities(s); |
|
|
|
} |
|
|
|
} |
|
|
|
|