From 4e2b1fef631249b0114b3bf898cc92402c88f5cc Mon Sep 17 00:00:00 2001 From: Volker Krause Date: Mon, 10 Mar 2008 14:10:35 +0000 Subject: [PATCH] Merged revisions 782964-782967,782969-783127,783129-783243,783245,783248-783302,783304-783477,783479-783847 via svnmerge from https://vkrause@svn.kde.org/home/kde/branches/kdepim/enterprise/kdepim ........ r782964 | vkrause | 2008-03-06 18:31:59 +0100 (Thu, 06 Mar 2008) | 7 lines Fix corruption of binary attachments introduced in revision 771698. The actual problem here is that EncodingDetector::decode(_const_ QByteArray &data) replaces all 0x00 bytes in data with 0x20 although being const. CCMAIL: ahartmetz@gmail.com ........ r783567 | ahartmetz | 2008-03-08 19:21:35 +0100 (Sat, 08 Mar 2008) | 1 line Remove some of the weirder methods of EncodingDetector and make the one method public that we actually want: analyze(). analyze() also does not alter its input... ........ r783847 | ahartmetz | 2008-03-09 20:22:17 +0100 (Sun, 09 Mar 2008) | 1 line fix return value of scriptForLanguageCode() if the language code is unknown ........ svn path=/branches/KDE/3.5/kdepim/; revision=784040 --- encodingdetector.cpp | 136 +------------------------------------------ encodingdetector.h | 40 ++----------- kmcomposewin.cpp | 4 +- 3 files changed, 11 insertions(+), 169 deletions(-) diff --git a/encodingdetector.cpp b/encodingdetector.cpp index 9c5c92f1f..e5881d6f7 100644 --- a/encodingdetector.cpp +++ b/encodingdetector.cpp @@ -809,34 +809,6 @@ static int findXMLEncoding(const QCString &str, int &encodingLength) return pos; } -bool EncodingDetector::processNull(char *data, int len) -{ - bool bin=false; - if(is16Bit(d->m_codec)) - { - for (int i=1; i < len; i+=2) - { - if ((data[i]=='\0') && (data[i-1]=='\0')) - { - bin=true; - data[i]=' '; - } - } - return bin; - } - // replace '\0' by spaces, for buggy pages - int i = len-1; - while(--i>=0) - { - if(data[i]==0) - { - bin=true; - data[i]=' '; - } - } - return bin; -} - bool EncodingDetector::errorsIfUtf8 (const char* data, int length) { @@ -902,7 +874,6 @@ static const unsigned char highest5Bits = 0xF8; return false; } - EncodingDetector::EncodingDetector() : d(new EncodingDetectorPrivate) { } @@ -1007,111 +978,9 @@ bool EncodingDetector::setEncoding(const char *_encoding, EncodingChoiceSource t return true; } -QString EncodingDetector::decode(const char *data, int len) -{ - processNull(const_cast(data),len); - if (!d->m_analyzeCalled) - { - analyze(data,len); - d->m_analyzeCalled=true; - } - - return d->m_decoder->toUnicode(data,len); -} - -QString EncodingDetector::decode(const QByteArray &data) -{ - processNull(const_cast(data.data()),data.size()); - if (!d->m_analyzeCalled) - { - analyze(data.data(),data.size()); - d->m_analyzeCalled=true; - } - - return d->m_decoder->toUnicode(data, data.size()); -} - -QString EncodingDetector::decodeWithBuffering(const char *data, int len) -{ -#ifdef DECODE_DEBUG - kWarning() << "EncodingDetector: decoding "<m_writtingHappened) - { -#ifdef DECODE_DEBUG - kWarning() << "EncodingDetector: d->m_writtingHappened "<< d->m_codec->name(); -#endif - processNull(const_cast(data),len); - return d->m_decoder->toUnicode(data, len); - } - else - { - if (d->m_bufferForDefferedEncDetection.isEmpty()) - { - if (analyze(data,len) - && (d->m_seenBody - || !(d->m_source==AutoDetectedEncoding - ||d->m_source==DefaultEncoding - ) - ) - )//dontWannaSeeHead() - { -#ifdef DECODE_DEBUG - kWarning() << "EncodingDetector: m_writtingHappened first time "<< d->m_codec->name(); -#endif - processNull(const_cast(data),len); - d->m_writtingHappened=true; - return d->m_decoder->toUnicode(data, len); - } - else - { -#ifdef DECODE_DEBUG - kWarning() << "EncodingDetector: begin deffer"; -#endif - d->m_bufferForDefferedEncDetection=data; - } - } - else - { - d->m_bufferForDefferedEncDetection+=data; - if ( (analyze(data,len) - && (d->m_seenBody - || !(d->m_source==AutoDetectedEncoding - ||d->m_source==DefaultEncoding - ) - ) - ) || d->m_bufferForDefferedEncDetection.length()>MAX_BUFFER - )//dontWannaSeeHead() - { - d->m_writtingHappened=true; - d->m_bufferForDefferedEncDetection.replace('\0',' '); - QString result(d->m_decoder->toUnicode(d->m_bufferForDefferedEncDetection, - d->m_bufferForDefferedEncDetection.size())); - d->m_bufferForDefferedEncDetection.resize(0); -#ifdef DECODE_DEBUG - kWarning() << "EncodingDetector: m_writtingHappened in the middle " << d->m_codec->name(); -#endif - return result; - } - } - } - - return QString(); -} - -QString EncodingDetector::flush() +bool EncodingDetector::analyze(const QByteArray &data) { - if (d->m_bufferForDefferedEncDetection.isEmpty()) - return QString(); - - d->m_bufferForDefferedEncDetection.replace('\0',' '); - QString result(d->m_decoder->toUnicode(d->m_bufferForDefferedEncDetection, - d->m_bufferForDefferedEncDetection.size())); - d->m_bufferForDefferedEncDetection.resize(0); -#ifdef DECODE_DEBUG - kWarning() << "EncodingDetector:flush() "<< d->m_bufferForDefferedEncDetection.length()<<" bytes "<< d->m_codec->name(); -#endif - return result; + return analyze( data.data(), data.size() ); } bool EncodingDetector::analyze(const char *data, int len) @@ -1501,6 +1370,7 @@ EncodingDetector::AutoDetectScript EncodingDetector::scriptForLanguageCode(const if ( lc.startsWith( QString::fromAscii( langStr ) ) ) return pango_script_for_lang[i].scripts[0]; } + return None; } #undef DECODE_DEBUG diff --git a/encodingdetector.h b/encodingdetector.h index 2d192e4c6..51ccb4695 100644 --- a/encodingdetector.h +++ b/encodingdetector.h @@ -125,32 +125,16 @@ public: EncodingChoiceSource encodingChoiceSource() const; /** - * The main class method - * - * Calls protected analyze() only the first time of the whole object life - * - * Replaces all null chars with spaces. - */ - QString decode(const char *data, int len); - QString decode(const QByteArray &data); - - //* You don't need to call analyze() if you use this method. - /** - * Convenience method that uses buffering. It waits for full html head to be buffered - * (i.e. calls analyze every time until it returns true). - * - * Replaces all null chars with spaces. - * - * @returns Decoded data, or empty string, if there was not enough data for accurate detection - * @see flush() + * Analyze text data. + * @returns true if there was enough data for accurate detection */ - QString decodeWithBuffering(const char *data, int len); + bool analyze( const char *data, int len ); /** - * Convenience method to be used with decodeForHtml. Flushes buffer. - * @see decodeForHtml() + * Analyze text data. + * @returns true if there was enough data for accurate detection */ - QString flush(); + bool analyze( const QByteArray &data ); /** * Takes lang name _after_ it were i18n()'ed @@ -161,12 +145,6 @@ public: static bool hasAutoDetectionForScript(AutoDetectScript); protected: - /** - * This nice method will kill all 0 bytes (or double bytes) - * and remember if this was a binary or not ;) - */ - bool processNull(char* data,int length); - /** * Check if we are really utf8. Taken from kate * @@ -176,12 +154,6 @@ protected: */ bool errorsIfUtf8 (const char* data, int length); - /** - * Analyze text data. - * @returns true if there was enough data for accurate detection - */ - bool analyze (const char *data, int len); - /** * @returns QTextDecoder for detected encoding */ diff --git a/kmcomposewin.cpp b/kmcomposewin.cpp index 97e7d038a..1c9a077ec 100644 --- a/kmcomposewin.cpp +++ b/kmcomposewin.cpp @@ -2758,9 +2758,9 @@ void KMComposeWin::slotAttachFileResult(KIO::Job *job) EncodingDetector ed; KLocale *loc = KGlobal::locale(); ed.setAutoDetectLanguage( EncodingDetector::scriptForLanguageCode ( loc->language() ) ); - ed.decode( ( *it ).data ); + ed.analyze( (*it).data ); partCharset = ed.encoding(); - if (partCharset.isEmpty()) //shouldn't happen + if ( partCharset.isEmpty() ) //shouldn't happen partCharset = mCharset; }