feat(core): add charset operations
1. introduce library icu 2. add CharsetOperator
This commit is contained in:
parent
9293cd93c0
commit
1e51f0839c
@ -44,9 +44,9 @@ set(UTILS_DIR ${CMAKE_SOURCE_DIR}/utils)
|
||||
set(GPGME_LIB_DIR ${UTILS_DIR}/gpgme/lib)
|
||||
|
||||
# link third-party libraries
|
||||
target_link_libraries(gpgfrontend_core config++)
|
||||
target_link_libraries(gpgfrontend_core PUBLIC config++)
|
||||
if (NOT LINUX)
|
||||
target_link_libraries(gpgfrontend_core config++ intl)
|
||||
target_link_libraries(gpgfrontend_core PUBLIC config++ intl)
|
||||
endif ()
|
||||
|
||||
# easyloggingpp
|
||||
@ -55,28 +55,38 @@ target_include_directories(gpgfrontend_core PUBLIC
|
||||
target_sources(gpgfrontend_core PUBLIC
|
||||
${CMAKE_SOURCE_DIR}/third_party/easyloggingpp/src/easylogging++.cc)
|
||||
# qt-aes
|
||||
target_sources(gpgfrontend_core PUBLIC
|
||||
target_sources(gpgfrontend_core PRIVATE
|
||||
${CMAKE_SOURCE_DIR}/third_party/qt-aes/qaesencryption.cpp)
|
||||
|
||||
# encoding detect library
|
||||
aux_source_directory(${CMAKE_SOURCE_DIR}/third_party/encoding-detect ENCODING_DETECT_SOURCE_CODE)
|
||||
target_sources(gpgfrontend_core PUBLIC ${ENCODING_DETECT_SOURCE_CODE})
|
||||
|
||||
# icu
|
||||
if(APPLE)
|
||||
target_include_directories(gpgfrontend_core PRIVATE /usr/local/opt/icu4c/include)
|
||||
target_link_directories(gpgfrontend_core PRIVATE /usr/local/opt/icu4c/lib)
|
||||
target_link_libraries(gpgfrontend_core PRIVATE icui18n icuuc)
|
||||
else()
|
||||
find_package(ICU 70.0 REQUIRED COMPONENTS i18n)
|
||||
target_link_libraries(gpgfrontend_core PRIVATE ICU::i18n)
|
||||
endif()
|
||||
|
||||
# link gnupg libraries
|
||||
target_link_libraries(gpgfrontend_core gpgme assuan gpg-error)
|
||||
target_link_libraries(gpgfrontend_core PUBLIC gpgme assuan gpg-error)
|
||||
# link openssl
|
||||
target_link_libraries(gpgfrontend_core OpenSSL::SSL OpenSSL::Crypto)
|
||||
target_link_libraries(gpgfrontend_core PUBLIC OpenSSL::SSL OpenSSL::Crypto)
|
||||
# link boost libraries
|
||||
target_link_libraries(gpgfrontend_core ${Boost_LIBRARIES})
|
||||
target_link_libraries(gpgfrontend_core PUBLIC ${Boost_LIBRARIES})
|
||||
|
||||
# link libarchive
|
||||
target_link_libraries(gpgfrontend_core archive)
|
||||
target_link_libraries(gpgfrontend_core PRIVATE archive)
|
||||
|
||||
# link json
|
||||
target_link_libraries(gpgfrontend_core
|
||||
nlohmann_json::nlohmann_json)
|
||||
PUBLIC nlohmann_json::nlohmann_json)
|
||||
# link Qt core
|
||||
target_link_libraries(gpgfrontend_core Qt5::Core)
|
||||
target_link_libraries(gpgfrontend_core PUBLIC Qt5::Core)
|
||||
|
||||
# set up pch
|
||||
target_precompile_headers(gpgfrontend_core
|
||||
@ -92,7 +102,7 @@ if (MINGW)
|
||||
target_link_libraries(gpgfrontend_core wsock32)
|
||||
elseif (APPLE)
|
||||
message(STATUS "Link GPG Static Library For macOS")
|
||||
target_link_libraries(gpgfrontend_core dl)
|
||||
target_link_libraries(gpgfrontend_core PUBLIC dl)
|
||||
if (XCODE_BUILD)
|
||||
set_target_properties(gpgfrontend_core
|
||||
PROPERTIES
|
||||
|
136
src/core/function/CharsetOperator.cpp
Normal file
136
src/core/function/CharsetOperator.cpp
Normal file
@ -0,0 +1,136 @@
|
||||
/**
|
||||
* Copyright (C) 2021 Saturneric
|
||||
*
|
||||
* This file is part of GpgFrontend.
|
||||
*
|
||||
* GpgFrontend is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GpgFrontend is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GpgFrontend. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
* The initial version of the source code is inherited from
|
||||
* the gpg4usb project, which is under GPL-3.0-or-later.
|
||||
*
|
||||
* All the source code of GpgFrontend was modified and released by
|
||||
* Saturneric<eric@bktus.com> starting on May 12, 2021.
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later
|
||||
*
|
||||
*/
|
||||
|
||||
#include "core/function/CharsetOperator.h"
|
||||
|
||||
#include <unicode/ucnv.h>
|
||||
#include <unicode/ucsdet.h>
|
||||
#include <unicode/ustring.h>
|
||||
#include <unicode/utypes.h>
|
||||
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "easylogging++.h"
|
||||
|
||||
GpgFrontend::CharsetOperator::CharsetInfo GpgFrontend::CharsetOperator::Detect(
|
||||
const std::string &buffer) {
|
||||
const UCharsetMatch *ucm;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UCharsetDetector *csd = ucsdet_open(&status);
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
if (U_FAILURE(status)) {
|
||||
LOG(ERROR) << "Failed to open charset detector: " << u_errorName(status);
|
||||
return {"unknown", "unknown", 0};
|
||||
}
|
||||
|
||||
LOG(INFO) << "Detecting charset buffer:" << buffer.size() << "bytes";
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
ucsdet_setText(csd, buffer.data(), buffer.size(), &status);
|
||||
if (U_FAILURE(status)) {
|
||||
LOG(ERROR) << "Failed to set text to charset detector: "
|
||||
<< u_errorName(status);
|
||||
return {"unknown", "unknown", 0};
|
||||
}
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
ucm = ucsdet_detect(csd, &status);
|
||||
|
||||
if (U_FAILURE(status)) return {"unknown", "unknown", 0};
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
const char *name = ucsdet_getName(ucm, &status);
|
||||
if (U_FAILURE(status)) return {"unknown", "unknown", 0};
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
int confidence = ucsdet_getConfidence(ucm, &status);
|
||||
if (U_FAILURE(status)) return {name, "unknown", 0};
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
const char *language = ucsdet_getLanguage(ucm, &status);
|
||||
if (U_FAILURE(status)) return {name, "unknown", confidence};
|
||||
|
||||
LOG(INFO) << "Detected charset: " << name << language << confidence;
|
||||
return {name, language, confidence};
|
||||
}
|
||||
|
||||
bool GpgFrontend::CharsetOperator::Convert2Utf8(const std::string &buffer,
|
||||
std::string &out_buffer,
|
||||
std::string from_charset_name) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
const auto from_encode = std::string("utf-8");
|
||||
const auto to_encode = from_charset_name;
|
||||
|
||||
LOG(INFO) << "Converting buffer:" << buffer.size();
|
||||
|
||||
// test if the charset is supported
|
||||
UConverter *conv = ucnv_open(from_encode.c_str(), &status);
|
||||
ucnv_close(conv);
|
||||
if (U_FAILURE(status)) {
|
||||
LOG(ERROR) << "Failed to open converter: " << u_errorName(status) << ":"
|
||||
<< from_encode;
|
||||
return false;
|
||||
}
|
||||
|
||||
// test if the charset is supported
|
||||
conv = ucnv_open(to_encode.c_str(), &status);
|
||||
ucnv_close(conv);
|
||||
if (U_FAILURE(status)) {
|
||||
LOG(ERROR) << "Failed to open converter: " << u_errorName(status) << ":"
|
||||
<< to_encode;
|
||||
return false;
|
||||
}
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
int32_t target_limit = 0, target_capacity = 0;
|
||||
|
||||
target_capacity =
|
||||
ucnv_convert(from_encode.c_str(), to_encode.c_str(), nullptr,
|
||||
target_limit, buffer.data(), buffer.size(), &status);
|
||||
|
||||
if (status == U_BUFFER_OVERFLOW_ERROR) {
|
||||
status = U_ZERO_ERROR;
|
||||
target_limit = target_capacity + 1;
|
||||
out_buffer.clear();
|
||||
out_buffer.resize(target_capacity);
|
||||
target_capacity =
|
||||
ucnv_convert(from_encode.c_str(), to_encode.c_str(), out_buffer.data(),
|
||||
out_buffer.size(), buffer.data(), buffer.size(), &status);
|
||||
}
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
LOG(ERROR) << "Failed to convert to utf-8: " << u_errorName(status);
|
||||
return false;
|
||||
}
|
||||
|
||||
LOG(INFO) << "Converted buffer:" << out_buffer.size() << "bytes";
|
||||
return true;
|
||||
}
|
49
src/core/function/CharsetOperator.h
Normal file
49
src/core/function/CharsetOperator.h
Normal file
@ -0,0 +1,49 @@
|
||||
/**
|
||||
* Copyright (C) 2021 Saturneric
|
||||
*
|
||||
* This file is part of GpgFrontend.
|
||||
*
|
||||
* GpgFrontend is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GpgFrontend is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GpgFrontend. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
* The initial version of the source code is inherited from
|
||||
* the gpg4usb project, which is under GPL-3.0-or-later.
|
||||
*
|
||||
* All the source code of GpgFrontend was modified and released by
|
||||
* Saturneric<eric@bktus.com> starting on May 12, 2021.
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef GPGFRONTEND_CHARSETDETECTOR_H
|
||||
#define GPGFRONTEND_CHARSETDETECTOR_H
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "core/GpgFrontendCore.h"
|
||||
|
||||
namespace GpgFrontend {
|
||||
|
||||
class GPGFRONTEND_CORE_EXPORT CharsetOperator {
|
||||
public:
|
||||
using CharsetInfo = std::tuple<std::string, std::string, int>;
|
||||
|
||||
static CharsetInfo Detect(const std::string &buffer);
|
||||
|
||||
static bool Convert2Utf8(const std::string &buffer, std::string &out_buffer,
|
||||
std::string from_charset_name);
|
||||
};
|
||||
} // namespace GpgFrontend
|
||||
|
||||
#endif // GPGFRONTEND_CHARSETDETECTOR_H
|
Loading…
x
Reference in New Issue
Block a user