Merged revisions 782964-782967,782969-783127,783129-783243,783245,783248-783302,783304-783477,783479-783847 via svnmerge from

https://vkrause@svn.kde.org/home/kde/branches/kdepim/enterprise/kdepim

........
  r782964 | vkrause | 2008-03-06 18:31:59 +0100 (Thu, 06 Mar 2008) | 7 lines
  
  Fix corruption of binary attachments introduced in revision 771698.
  
  The actual problem here is that EncodingDetector::decode(_const_ QByteArray &data)
  replaces all 0x00 bytes in data with 0x20 although being const.
  
  CCMAIL: ahartmetz@gmail.com
........
  r783567 | ahartmetz | 2008-03-08 19:21:35 +0100 (Sat, 08 Mar 2008) | 1 line
  
  Remove some of the weirder methods of EncodingDetector and make the one method public that we actually want: analyze(). analyze() also does not alter its input...
........
  r783847 | ahartmetz | 2008-03-09 20:22:17 +0100 (Sun, 09 Mar 2008) | 1 line
  
  fix return value of scriptForLanguageCode() if the language code is unknown
........

svn path=/branches/KDE/3.5/kdepim/; revision=784040
wilder-work
Volker Krause 18 years ago
parent f706dfb9d1
commit 4e2b1fef63
  1. 136
      encodingdetector.cpp
  2. 40
      encodingdetector.h
  3. 4
      kmcomposewin.cpp

@ -809,34 +809,6 @@ static int findXMLEncoding(const QCString &str, int &encodingLength)
return pos;
}
bool EncodingDetector::processNull(char *data, int len)
{
bool bin=false;
if(is16Bit(d->m_codec))
{
for (int i=1; i < len; i+=2)
{
if ((data[i]=='\0') && (data[i-1]=='\0'))
{
bin=true;
data[i]=' ';
}
}
return bin;
}
// replace '\0' by spaces, for buggy pages
int i = len-1;
while(--i>=0)
{
if(data[i]==0)
{
bin=true;
data[i]=' ';
}
}
return bin;
}
bool EncodingDetector::errorsIfUtf8 (const char* data, int length)
{
@ -902,7 +874,6 @@ static const unsigned char highest5Bits = 0xF8;
return false;
}
EncodingDetector::EncodingDetector() : d(new EncodingDetectorPrivate)
{
}
@ -1007,111 +978,9 @@ bool EncodingDetector::setEncoding(const char *_encoding, EncodingChoiceSource t
return true;
}
QString EncodingDetector::decode(const char *data, int len)
{
processNull(const_cast<char *>(data),len);
if (!d->m_analyzeCalled)
{
analyze(data,len);
d->m_analyzeCalled=true;
}
return d->m_decoder->toUnicode(data,len);
}
QString EncodingDetector::decode(const QByteArray &data)
{
processNull(const_cast<char *>(data.data()),data.size());
if (!d->m_analyzeCalled)
{
analyze(data.data(),data.size());
d->m_analyzeCalled=true;
}
return d->m_decoder->toUnicode(data, data.size());
}
QString EncodingDetector::decodeWithBuffering(const char *data, int len)
{
#ifdef DECODE_DEBUG
kWarning() << "EncodingDetector: decoding "<<len<<" bytes";
#endif
if (d->m_writtingHappened)
{
#ifdef DECODE_DEBUG
kWarning() << "EncodingDetector: d->m_writtingHappened "<< d->m_codec->name();
#endif
processNull(const_cast<char *>(data),len);
return d->m_decoder->toUnicode(data, len);
}
else
{
if (d->m_bufferForDefferedEncDetection.isEmpty())
{
if (analyze(data,len)
&& (d->m_seenBody
|| !(d->m_source==AutoDetectedEncoding
||d->m_source==DefaultEncoding
)
)
)//dontWannaSeeHead()
{
#ifdef DECODE_DEBUG
kWarning() << "EncodingDetector: m_writtingHappened first time "<< d->m_codec->name();
#endif
processNull(const_cast<char *>(data),len);
d->m_writtingHappened=true;
return d->m_decoder->toUnicode(data, len);
}
else
{
#ifdef DECODE_DEBUG
kWarning() << "EncodingDetector: begin deffer";
#endif
d->m_bufferForDefferedEncDetection=data;
}
}
else
{
d->m_bufferForDefferedEncDetection+=data;
if ( (analyze(data,len)
&& (d->m_seenBody
|| !(d->m_source==AutoDetectedEncoding
||d->m_source==DefaultEncoding
)
)
) || d->m_bufferForDefferedEncDetection.length()>MAX_BUFFER
)//dontWannaSeeHead()
{
d->m_writtingHappened=true;
d->m_bufferForDefferedEncDetection.replace('\0',' ');
QString result(d->m_decoder->toUnicode(d->m_bufferForDefferedEncDetection,
d->m_bufferForDefferedEncDetection.size()));
d->m_bufferForDefferedEncDetection.resize(0);
#ifdef DECODE_DEBUG
kWarning() << "EncodingDetector: m_writtingHappened in the middle " << d->m_codec->name();
#endif
return result;
}
}
}
return QString();
}
QString EncodingDetector::flush()
bool EncodingDetector::analyze(const QByteArray &data)
{
if (d->m_bufferForDefferedEncDetection.isEmpty())
return QString();
d->m_bufferForDefferedEncDetection.replace('\0',' ');
QString result(d->m_decoder->toUnicode(d->m_bufferForDefferedEncDetection,
d->m_bufferForDefferedEncDetection.size()));
d->m_bufferForDefferedEncDetection.resize(0);
#ifdef DECODE_DEBUG
kWarning() << "EncodingDetector:flush() "<< d->m_bufferForDefferedEncDetection.length()<<" bytes "<< d->m_codec->name();
#endif
return result;
return analyze( data.data(), data.size() );
}
bool EncodingDetector::analyze(const char *data, int len)
@ -1501,6 +1370,7 @@ EncodingDetector::AutoDetectScript EncodingDetector::scriptForLanguageCode(const
if ( lc.startsWith( QString::fromAscii( langStr ) ) )
return pango_script_for_lang[i].scripts[0];
}
return None;
}
#undef DECODE_DEBUG

@ -125,32 +125,16 @@ public:
EncodingChoiceSource encodingChoiceSource() const;
/**
* The main class method
*
* Calls protected analyze() only the first time of the whole object life
*
* Replaces all null chars with spaces.
*/
QString decode(const char *data, int len);
QString decode(const QByteArray &data);
//* You don't need to call analyze() if you use this method.
/**
* Convenience method that uses buffering. It waits for full html head to be buffered
* (i.e. calls analyze every time until it returns true).
*
* Replaces all null chars with spaces.
*
* @returns Decoded data, or empty string, if there was not enough data for accurate detection
* @see flush()
* Analyze text data.
* @returns true if there was enough data for accurate detection
*/
QString decodeWithBuffering(const char *data, int len);
bool analyze( const char *data, int len );
/**
* Convenience method to be used with decodeForHtml. Flushes buffer.
* @see decodeForHtml()
* Analyze text data.
* @returns true if there was enough data for accurate detection
*/
QString flush();
bool analyze( const QByteArray &data );
/**
* Takes lang name _after_ it were i18n()'ed
@ -161,12 +145,6 @@ public:
static bool hasAutoDetectionForScript(AutoDetectScript);
protected:
/**
* This nice method will kill all 0 bytes (or double bytes)
* and remember if this was a binary or not ;)
*/
bool processNull(char* data,int length);
/**
* Check if we are really utf8. Taken from kate
*
@ -176,12 +154,6 @@ protected:
*/
bool errorsIfUtf8 (const char* data, int length);
/**
* Analyze text data.
* @returns true if there was enough data for accurate detection
*/
bool analyze (const char *data, int len);
/**
* @returns QTextDecoder for detected encoding
*/

@ -2758,9 +2758,9 @@ void KMComposeWin::slotAttachFileResult(KIO::Job *job)
EncodingDetector ed;
KLocale *loc = KGlobal::locale();
ed.setAutoDetectLanguage( EncodingDetector::scriptForLanguageCode ( loc->language() ) );
ed.decode( ( *it ).data );
ed.analyze( (*it).data );
partCharset = ed.encoding();
if (partCharset.isEmpty()) //shouldn't happen
if ( partCharset.isEmpty() ) //shouldn't happen
partCharset = mCharset;
}

Loading…
Cancel
Save