Remove some of the weirder methods of EncodingDetector and make the one method public that we actually want: analyze(). analyze() also does not alter its input...

svn path=/branches/kdepim/enterprise/kdepim/; revision=783567
wilder-work
Andreas Hartmetz 18 years ago
parent 78cf645ab7
commit 0a5ed6d612
  1. 135
      encodingdetector.cpp
  2. 40
      encodingdetector.h
  3. 7
      kmcomposewin.cpp

@ -809,34 +809,6 @@ static int findXMLEncoding(const QCString &str, int &encodingLength)
return pos;
}
bool EncodingDetector::processNull(char *data, int len)
{
bool bin=false;
if(is16Bit(d->m_codec))
{
for (int i=1; i < len; i+=2)
{
if ((data[i]=='\0') && (data[i-1]=='\0'))
{
bin=true;
data[i]=' ';
}
}
return bin;
}
// replace '\0' by spaces, for buggy pages
int i = len-1;
while(--i>=0)
{
if(data[i]==0)
{
bin=true;
data[i]=' ';
}
}
return bin;
}
bool EncodingDetector::errorsIfUtf8 (const char* data, int length)
{
@ -902,7 +874,6 @@ static const unsigned char highest5Bits = 0xF8;
return false;
}
EncodingDetector::EncodingDetector() : d(new EncodingDetectorPrivate)
{
}
@ -1007,111 +978,9 @@ bool EncodingDetector::setEncoding(const char *_encoding, EncodingChoiceSource t
return true;
}
QString EncodingDetector::decode(const char *data, int len)
{
processNull(const_cast<char *>(data),len);
if (!d->m_analyzeCalled)
{
analyze(data,len);
d->m_analyzeCalled=true;
}
return d->m_decoder->toUnicode(data,len);
}
QString EncodingDetector::decode(const QByteArray &data)
{
processNull(const_cast<char *>(data.data()),data.size());
if (!d->m_analyzeCalled)
{
analyze(data.data(),data.size());
d->m_analyzeCalled=true;
}
return d->m_decoder->toUnicode(data, data.size());
}
QString EncodingDetector::decodeWithBuffering(const char *data, int len)
{
#ifdef DECODE_DEBUG
kWarning() << "EncodingDetector: decoding "<<len<<" bytes";
#endif
if (d->m_writtingHappened)
{
#ifdef DECODE_DEBUG
kWarning() << "EncodingDetector: d->m_writtingHappened "<< d->m_codec->name();
#endif
processNull(const_cast<char *>(data),len);
return d->m_decoder->toUnicode(data, len);
}
else
{
if (d->m_bufferForDefferedEncDetection.isEmpty())
{
if (analyze(data,len)
&& (d->m_seenBody
|| !(d->m_source==AutoDetectedEncoding
||d->m_source==DefaultEncoding
)
)
)//dontWannaSeeHead()
{
#ifdef DECODE_DEBUG
kWarning() << "EncodingDetector: m_writtingHappened first time "<< d->m_codec->name();
#endif
processNull(const_cast<char *>(data),len);
d->m_writtingHappened=true;
return d->m_decoder->toUnicode(data, len);
}
else
{
#ifdef DECODE_DEBUG
kWarning() << "EncodingDetector: begin deffer";
#endif
d->m_bufferForDefferedEncDetection=data;
}
}
else
{
d->m_bufferForDefferedEncDetection+=data;
if ( (analyze(data,len)
&& (d->m_seenBody
|| !(d->m_source==AutoDetectedEncoding
||d->m_source==DefaultEncoding
)
)
) || d->m_bufferForDefferedEncDetection.length()>MAX_BUFFER
)//dontWannaSeeHead()
{
d->m_writtingHappened=true;
d->m_bufferForDefferedEncDetection.replace('\0',' ');
QString result(d->m_decoder->toUnicode(d->m_bufferForDefferedEncDetection,
d->m_bufferForDefferedEncDetection.size()));
d->m_bufferForDefferedEncDetection.resize(0);
#ifdef DECODE_DEBUG
kWarning() << "EncodingDetector: m_writtingHappened in the middle " << d->m_codec->name();
#endif
return result;
}
}
}
return QString();
}
QString EncodingDetector::flush()
bool EncodingDetector::analyze(const QByteArray &data)
{
if (d->m_bufferForDefferedEncDetection.isEmpty())
return QString();
d->m_bufferForDefferedEncDetection.replace('\0',' ');
QString result(d->m_decoder->toUnicode(d->m_bufferForDefferedEncDetection,
d->m_bufferForDefferedEncDetection.size()));
d->m_bufferForDefferedEncDetection.resize(0);
#ifdef DECODE_DEBUG
kWarning() << "EncodingDetector:flush() "<< d->m_bufferForDefferedEncDetection.length()<<" bytes "<< d->m_codec->name();
#endif
return result;
return analyze( data.data(), data.size() );
}
bool EncodingDetector::analyze(const char *data, int len)

@ -125,32 +125,16 @@ public:
EncodingChoiceSource encodingChoiceSource() const;
/**
* The main class method
*
* Calls protected analyze() only the first time of the whole object life
*
* Replaces all null chars with spaces.
*/
QString decode(const char *data, int len);
QString decode(const QByteArray &data);
//* You don't need to call analyze() if you use this method.
/**
* Convenience method that uses buffering. It waits for full html head to be buffered
* (i.e. calls analyze every time until it returns true).
*
* Replaces all null chars with spaces.
*
* @returns Decoded data, or empty string, if there was not enough data for accurate detection
* @see flush()
* Analyze text data.
* @returns true if there was enough data for accurate detection
*/
QString decodeWithBuffering(const char *data, int len);
bool analyze( const char *data, int len );
/**
* Convenience method to be used with decodeForHtml. Flushes buffer.
* @see decodeForHtml()
* Analyze text data.
* @returns true if there was enough data for accurate detection
*/
QString flush();
bool analyze( const QByteArray &data );
/**
* Takes lang name _after_ it were i18n()'ed
@ -161,12 +145,6 @@ public:
static bool hasAutoDetectionForScript(AutoDetectScript);
protected:
/**
* This nice method will kill all 0 bytes (or double bytes)
* and remember if this was a binary or not ;)
*/
bool processNull(char* data,int length);
/**
* Check if we are really utf8. Taken from kate
*
@ -176,12 +154,6 @@ protected:
*/
bool errorsIfUtf8 (const char* data, int length);
/**
* Analyze text data.
* @returns true if there was enough data for accurate detection
*/
bool analyze (const char *data, int len);
/**
* @returns QTextDecoder for detected encoding
*/

@ -2758,12 +2758,9 @@ void KMComposeWin::slotAttachFileResult(KIO::Job *job)
EncodingDetector ed;
KLocale *loc = KGlobal::locale();
ed.setAutoDetectLanguage( EncodingDetector::scriptForLanguageCode ( loc->language() ) );
// ### FIXME decode(_const_ QByteArray &) replaces 0x00 bytes by 0x20 in the parameter
// despite being const, which obviously breaks binary attachments.
QByteArray copy = (*it).data.copy();
ed.decode( copy );
ed.analyze( (*it).data );
partCharset = ed.encoding();
if (partCharset.isEmpty()) //shouldn't happen
if ( partCharset.isEmpty() ) //shouldn't happen
partCharset = mCharset;
}

Loading…
Cancel
Save