diff --git a/encodingdetector.cpp b/encodingdetector.cpp index 9c5c92f1f..e5881d6f7 100644 --- a/encodingdetector.cpp +++ b/encodingdetector.cpp @@ -809,34 +809,6 @@ static int findXMLEncoding(const QCString &str, int &encodingLength) return pos; } -bool EncodingDetector::processNull(char *data, int len) -{ - bool bin=false; - if(is16Bit(d->m_codec)) - { - for (int i=1; i < len; i+=2) - { - if ((data[i]=='\0') && (data[i-1]=='\0')) - { - bin=true; - data[i]=' '; - } - } - return bin; - } - // replace '\0' by spaces, for buggy pages - int i = len-1; - while(--i>=0) - { - if(data[i]==0) - { - bin=true; - data[i]=' '; - } - } - return bin; -} - bool EncodingDetector::errorsIfUtf8 (const char* data, int length) { @@ -902,7 +874,6 @@ static const unsigned char highest5Bits = 0xF8; return false; } - EncodingDetector::EncodingDetector() : d(new EncodingDetectorPrivate) { } @@ -1007,111 +978,9 @@ bool EncodingDetector::setEncoding(const char *_encoding, EncodingChoiceSource t return true; } -QString EncodingDetector::decode(const char *data, int len) -{ - processNull(const_cast(data),len); - if (!d->m_analyzeCalled) - { - analyze(data,len); - d->m_analyzeCalled=true; - } - - return d->m_decoder->toUnicode(data,len); -} - -QString EncodingDetector::decode(const QByteArray &data) -{ - processNull(const_cast(data.data()),data.size()); - if (!d->m_analyzeCalled) - { - analyze(data.data(),data.size()); - d->m_analyzeCalled=true; - } - - return d->m_decoder->toUnicode(data, data.size()); -} - -QString EncodingDetector::decodeWithBuffering(const char *data, int len) -{ -#ifdef DECODE_DEBUG - kWarning() << "EncodingDetector: decoding "<m_writtingHappened) - { -#ifdef DECODE_DEBUG - kWarning() << "EncodingDetector: d->m_writtingHappened "<< d->m_codec->name(); -#endif - processNull(const_cast(data),len); - return d->m_decoder->toUnicode(data, len); - } - else - { - if (d->m_bufferForDefferedEncDetection.isEmpty()) - { - if (analyze(data,len) - && (d->m_seenBody - || !(d->m_source==AutoDetectedEncoding - ||d->m_source==DefaultEncoding - ) - ) - )//dontWannaSeeHead() - { -#ifdef DECODE_DEBUG - kWarning() << "EncodingDetector: m_writtingHappened first time "<< d->m_codec->name(); -#endif - processNull(const_cast(data),len); - d->m_writtingHappened=true; - return d->m_decoder->toUnicode(data, len); - } - else - { -#ifdef DECODE_DEBUG - kWarning() << "EncodingDetector: begin deffer"; -#endif - d->m_bufferForDefferedEncDetection=data; - } - } - else - { - d->m_bufferForDefferedEncDetection+=data; - if ( (analyze(data,len) - && (d->m_seenBody - || !(d->m_source==AutoDetectedEncoding - ||d->m_source==DefaultEncoding - ) - ) - ) || d->m_bufferForDefferedEncDetection.length()>MAX_BUFFER - )//dontWannaSeeHead() - { - d->m_writtingHappened=true; - d->m_bufferForDefferedEncDetection.replace('\0',' '); - QString result(d->m_decoder->toUnicode(d->m_bufferForDefferedEncDetection, - d->m_bufferForDefferedEncDetection.size())); - d->m_bufferForDefferedEncDetection.resize(0); -#ifdef DECODE_DEBUG - kWarning() << "EncodingDetector: m_writtingHappened in the middle " << d->m_codec->name(); -#endif - return result; - } - } - } - - return QString(); -} - -QString EncodingDetector::flush() +bool EncodingDetector::analyze(const QByteArray &data) { - if (d->m_bufferForDefferedEncDetection.isEmpty()) - return QString(); - - d->m_bufferForDefferedEncDetection.replace('\0',' '); - QString result(d->m_decoder->toUnicode(d->m_bufferForDefferedEncDetection, - d->m_bufferForDefferedEncDetection.size())); - d->m_bufferForDefferedEncDetection.resize(0); -#ifdef DECODE_DEBUG - kWarning() << "EncodingDetector:flush() "<< d->m_bufferForDefferedEncDetection.length()<<" bytes "<< d->m_codec->name(); -#endif - return result; + return analyze( data.data(), data.size() ); } bool EncodingDetector::analyze(const char *data, int len) @@ -1501,6 +1370,7 @@ EncodingDetector::AutoDetectScript EncodingDetector::scriptForLanguageCode(const if ( lc.startsWith( QString::fromAscii( langStr ) ) ) return pango_script_for_lang[i].scripts[0]; } + return None; } #undef DECODE_DEBUG diff --git a/encodingdetector.h b/encodingdetector.h index 2d192e4c6..51ccb4695 100644 --- a/encodingdetector.h +++ b/encodingdetector.h @@ -125,32 +125,16 @@ public: EncodingChoiceSource encodingChoiceSource() const; /** - * The main class method - * - * Calls protected analyze() only the first time of the whole object life - * - * Replaces all null chars with spaces. - */ - QString decode(const char *data, int len); - QString decode(const QByteArray &data); - - //* You don't need to call analyze() if you use this method. - /** - * Convenience method that uses buffering. It waits for full html head to be buffered - * (i.e. calls analyze every time until it returns true). - * - * Replaces all null chars with spaces. - * - * @returns Decoded data, or empty string, if there was not enough data for accurate detection - * @see flush() + * Analyze text data. + * @returns true if there was enough data for accurate detection */ - QString decodeWithBuffering(const char *data, int len); + bool analyze( const char *data, int len ); /** - * Convenience method to be used with decodeForHtml. Flushes buffer. - * @see decodeForHtml() + * Analyze text data. + * @returns true if there was enough data for accurate detection */ - QString flush(); + bool analyze( const QByteArray &data ); /** * Takes lang name _after_ it were i18n()'ed @@ -161,12 +145,6 @@ public: static bool hasAutoDetectionForScript(AutoDetectScript); protected: - /** - * This nice method will kill all 0 bytes (or double bytes) - * and remember if this was a binary or not ;) - */ - bool processNull(char* data,int length); - /** * Check if we are really utf8. Taken from kate * @@ -176,12 +154,6 @@ protected: */ bool errorsIfUtf8 (const char* data, int length); - /** - * Analyze text data. - * @returns true if there was enough data for accurate detection - */ - bool analyze (const char *data, int len); - /** * @returns QTextDecoder for detected encoding */ diff --git a/kmcomposewin.cpp b/kmcomposewin.cpp index 97e7d038a..1c9a077ec 100644 --- a/kmcomposewin.cpp +++ b/kmcomposewin.cpp @@ -2758,9 +2758,9 @@ void KMComposeWin::slotAttachFileResult(KIO::Job *job) EncodingDetector ed; KLocale *loc = KGlobal::locale(); ed.setAutoDetectLanguage( EncodingDetector::scriptForLanguageCode ( loc->language() ) ); - ed.decode( ( *it ).data ); + ed.analyze( (*it).data ); partCharset = ed.encoding(); - if (partCharset.isEmpty()) //shouldn't happen + if ( partCharset.isEmpty() ) //shouldn't happen partCharset = mCharset; }