You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

72 lines
1.7 KiB

/*
SPDX-FileCopyrightText: 2013 Azat Khuzhin <a3at.mail@gmail.com>
SPDX-License-Identifier: GPL-2.0-or-later
*/
#include "document.h"
#include <QDataStream>
#include <QFile>
#include <QTextCodec>
#include <KEncodingProber>
#include <QDebug>
#include "debug_txt.h"
using namespace Txt;
Document::Document(const QString &fileName)
{
#ifdef TXT_DEBUG
qCDebug(OkularTxtDebug) << "Opening file" << fileName;
#endif
QFile plainFile(fileName);
if (!plainFile.open(QIODevice::ReadOnly | QIODevice::Text)) {
qCDebug(OkularTxtDebug) << "Can't open file" << plainFile.fileName();
return;
}
const QByteArray buffer = plainFile.readAll();
setPlainText(toUnicode(buffer));
}
Document::~Document()
{
}
QString Document::toUnicode(const QByteArray &array)
{
QByteArray encoding;
KEncodingProber prober(KEncodingProber::Universal);
int charsFeeded = 0;
int chunkSize = 3000; // ~= number of symbols in page.
// Try to detect encoding.
while (encoding.isEmpty() && charsFeeded < array.size()) {
prober.feed(array.mid(charsFeeded, chunkSize));
charsFeeded += chunkSize;
// No more data to feed - take what we have
if (array.size() <= chunkSize) {
encoding = prober.encoding();
}
if (prober.confidence() >= 0.5) {
encoding = prober.encoding();
break;
}
}
if (encoding.isEmpty()) {
return QString();
}
qCDebug(OkularTxtDebug) << "Detected" << prober.encoding() << "encoding"
<< "based on" << charsFeeded << "chars";
return QTextCodec::codecForName(encoding)->toUnicode(array);
}
Q_LOGGING_CATEGORY(OkularTxtDebug, "org.kde.okular.generators.txt", QtWarningMsg)