aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSaturneric <[email protected]>2022-07-23 12:27:31 +0000
committerSaturneric <[email protected]>2022-07-23 12:27:31 +0000
commite8c8e13835ef6faa7d6122cd89f4915cccac3d49 (patch)
tree85efb71562e00fc116e8789fd9d9bdd1d4f3fbcd
parentfeat(ui): add tab to show gnupg info (diff)
downloadGpgFrontend-dev/2.0.8/main.tar.gz
GpgFrontend-dev/2.0.8/main.zip
refactor(third_party): remove encoding-detectdev/2.0.8/main
-rw-r--r--third_party/encoding-detect/TextEncodingDetect.cpp313
-rw-r--r--third_party/encoding-detect/TextEncodingDetect.h85
2 files changed, 0 insertions, 398 deletions
diff --git a/third_party/encoding-detect/TextEncodingDetect.cpp b/third_party/encoding-detect/TextEncodingDetect.cpp
deleted file mode 100644
index 22ae5897..00000000
--- a/third_party/encoding-detect/TextEncodingDetect.cpp
+++ /dev/null
@@ -1,313 +0,0 @@
-//
-// Copyright 2015-2016 Jonathan Bennett <[email protected]>
-//
-// https://www.autoitscript.com
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-// Includes
-#include "TextEncodingDetect.h"
-
-using namespace AutoIt::Common;
-
-static const unsigned char TextEncodingDetect_UTF16_BOM_LE[] = {
- (unsigned char)(0xFF), (unsigned char)(0xFE)};
-static const unsigned char TextEncodingDetect_UTF16_BOM_BE[] = {
- (unsigned char)(0xFE), (unsigned char)(0xFF)};
-static const unsigned char TextEncodingDetect_UTF8_BOM[] = {
- (unsigned char)(0xEF), (unsigned char)(0xBB), (unsigned char)(0xBF)};
-
-const unsigned char *TextEncodingDetect::utf16_bom_le_ =
- TextEncodingDetect_UTF16_BOM_LE;
-const unsigned char *TextEncodingDetect::utf16_bom_be_ =
- TextEncodingDetect_UTF16_BOM_BE;
-const unsigned char *TextEncodingDetect::utf8_bom_ =
- TextEncodingDetect_UTF8_BOM;
-
-///////////////////////////////////////////////////////////////////////////////
-// Constructor()
-// Default constructor
-///////////////////////////////////////////////////////////////////////////////
-
-TextEncodingDetect::TextEncodingDetect() {
- // By default, assume nulls can't appear in ANSI/ASCII/UTF8 text files
- null_suggests_binary_ = true;
-
- // Set defaults for utf16 detection based the use of odd/even nulls
- utf16_expected_null_percent_ = 70;
- utf16_unexpected_null_percent_ = 10;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-// Set the percentages used in utf16 detection using nulls.
-///////////////////////////////////////////////////////////////////////////////
-
-void TextEncodingDetect::SetUtf16UnexpectedNullPercent(int percent) {
- if (percent > 0 && percent < 100) utf16_expected_null_percent_ = percent;
-}
-
-void TextEncodingDetect::SetUtf16ExpectedNullPercent(int percent) {
- if (percent > 0 && percent < 100) utf16_unexpected_null_percent_ = percent;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-// Simple function to return the length of the BOM for a particular encoding
-// mode.
-///////////////////////////////////////////////////////////////////////////////
-
-int TextEncodingDetect::GetBOMLengthFromEncodingMode(Encoding encoding) {
- int length = 0;
-
- if (encoding == UTF16_BE_BOM || encoding == UTF16_LE_BOM)
- length = 2;
- else if (encoding == UTF8_BOM)
- length = 3;
-
- return length;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-// Checks if a buffer contains a valid BOM and returns the encoding based on it.
-// Returns encoding "None" if there is no BOM.
-///////////////////////////////////////////////////////////////////////////////
-
-TextEncodingDetect::Encoding TextEncodingDetect::CheckBOM(
- const unsigned char *pBuffer, size_t size) {
- // Check for BOM
- if (size >= 2 && pBuffer[0] == utf16_bom_le_[0] &&
- pBuffer[1] == utf16_bom_le_[1]) {
- return UTF16_LE_BOM;
- } else if (size >= 2 && pBuffer[0] == utf16_bom_be_[0] &&
- pBuffer[1] == utf16_bom_be_[1]) {
- return UTF16_BE_BOM;
- } else if (size >= 3 && pBuffer[0] == utf8_bom_[0] &&
- pBuffer[1] == utf8_bom_[1] && pBuffer[2] == utf8_bom_[2]) {
- return UTF8_BOM;
- } else {
- return None;
- }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-// Checks if a buffer contains a valid BOM and returns the encoding based on it.
-// If it doesn't contain a BOM it tries to guess what the encoding is or
-// "None" if it just looks like binary data.
-///////////////////////////////////////////////////////////////////////////////
-
-TextEncodingDetect::Encoding TextEncodingDetect::DetectEncoding(
- const unsigned char *pBuffer, size_t size) const {
- // First check if we have a BOM and return that if so
- Encoding encoding = CheckBOM(pBuffer, size);
- if (encoding != None) return encoding;
-
- // Now check for valid UTF8
- encoding = CheckUTF8(pBuffer, size);
- if (encoding != None) return encoding;
-
- // Now try UTF16
- encoding = CheckUTF16NewlineChars(pBuffer, size);
- if (encoding != None) return encoding;
-
- encoding = CheckUTF16ASCII(pBuffer, size);
- if (encoding != None) return encoding;
-
- // ANSI or None (binary) then
- if (!DoesContainNulls(pBuffer, size))
- return ANSI;
- else {
- // Found a null, return based on the preference in null_suggests_binary_
- if (null_suggests_binary_)
- return None;
- else
- return ANSI;
- }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-// Checks if a buffer contains valid utf8. Returns:
-// None - not valid utf8
-// UTF8_NOBOM - valid utf8 encodings and multibyte sequences
-// ASCII - Only data in the 0-127 range.
-///////////////////////////////////////////////////////////////////////////////
-
-TextEncodingDetect::Encoding TextEncodingDetect::CheckUTF8(
- const unsigned char *pBuffer, size_t size) const {
- // UTF8 Valid sequences
- // 0xxxxxxx ASCII
- // 110xxxxx 10xxxxxx 2-byte
- // 1110xxxx 10xxxxxx 10xxxxxx 3-byte
- // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 4-byte
- //
- // Width in UTF8
- // Decimal Width
- // 0-127 1 byte
- // 194-223 2 bytes
- // 224-239 3 bytes
- // 240-244 4 bytes
- //
- // Subsequent chars are in the range 128-191
-
- bool only_saw_ascii_range = true;
- size_t pos = 0;
- int more_chars;
-
- while (pos < size) {
- unsigned char ch = pBuffer[pos++];
-
- if (ch == 0 && null_suggests_binary_) {
- return None;
- } else if (ch <= 127) {
- // 1 byte
- more_chars = 0;
- } else if (ch >= 194 && ch <= 223) {
- // 2 Byte
- more_chars = 1;
- } else if (ch >= 224 && ch <= 239) {
- // 3 Byte
- more_chars = 2;
- } else if (ch >= 240 && ch <= 244) {
- // 4 Byte
- more_chars = 3;
- } else {
- return None; // Not utf8
- }
-
- // Check secondary chars are in range if we are expecting any
- while (more_chars && pos < size) {
- only_saw_ascii_range = false; // Seen non-ascii chars now
-
- ch = pBuffer[pos++];
- if (ch < 128 || ch > 191) return None; // Not utf8
-
- --more_chars;
- }
- }
-
- // If we get to here then only valid UTF-8 sequences have been processed
-
- // If we only saw chars in the range 0-127 then we can't assume UTF8 (the
- // caller will need to decide)
- if (only_saw_ascii_range)
- return ASCII;
- else
- return UTF8_NOBOM;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-// Checks if a buffer contains text that looks like utf16 by scanning for
-// newline chars that would be present even in non-english text.
-// Returns:
-// None - not valid utf16
-// UTF16_LE_NOBOM - looks like utf16 le
-// UTF16_BE_NOBOM - looks like utf16 be
-///////////////////////////////////////////////////////////////////////////////
-
-TextEncodingDetect::Encoding TextEncodingDetect::CheckUTF16NewlineChars(
- const unsigned char *pBuffer, size_t size) {
- if (size < 2) return None;
-
- // Reduce size by 1 so we don't need to worry about bounds checking for pairs
- // of bytes
- size--;
-
- int le_control_chars = 0;
- int be_control_chars = 0;
- unsigned char ch1, ch2;
-
- size_t pos = 0;
- while (pos < size) {
- ch1 = pBuffer[pos++];
- ch2 = pBuffer[pos++];
-
- if (ch1 == 0) {
- if (ch2 == 0x0a || ch2 == 0x0d) ++be_control_chars;
- } else if (ch2 == 0) {
- if (ch1 == 0x0a || ch1 == 0x0d) ++le_control_chars;
- }
-
- // If we are getting both LE and BE control chars then this file is not
- // utf16
- if (le_control_chars && be_control_chars) return None;
- }
-
- if (le_control_chars)
- return UTF16_LE_NOBOM;
- else if (be_control_chars)
- return UTF16_BE_NOBOM;
- else
- return None;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-// Checks if a buffer contains text that looks like utf16. This is done based
-// the use of nulls which in ASCII/script like text can be useful to identify.
-// Returns:
-// None - not valid utf16
-// UTF16_LE_NOBOM - looks like utf16 le
-// UTF16_BE_NOBOM - looks like utf16 be
-///////////////////////////////////////////////////////////////////////////////
-
-TextEncodingDetect::Encoding TextEncodingDetect::CheckUTF16ASCII(
- const unsigned char *pBuffer, size_t size) const {
- int num_odd_nulls = 0;
- int num_even_nulls = 0;
-
- // Get even nulls
- size_t pos = 0;
- while (pos < size) {
- if (pBuffer[pos] == 0) num_even_nulls++;
-
- pos += 2;
- }
-
- // Get odd nulls
- pos = 1;
- while (pos < size) {
- if (pBuffer[pos] == 0) num_odd_nulls++;
-
- pos += 2;
- }
-
- double even_null_threshold = (num_even_nulls * 2.0) / size;
- double odd_null_threshold = (num_odd_nulls * 2.0) / size;
- double expected_null_threshold = utf16_expected_null_percent_ / 100.0;
- double unexpected_null_threshold = utf16_unexpected_null_percent_ / 100.0;
-
- // Lots of odd nulls, low number of even nulls
- if (even_null_threshold < unexpected_null_threshold &&
- odd_null_threshold > expected_null_threshold)
- return UTF16_LE_NOBOM;
-
- // Lots of even nulls, low number of odd nulls
- if (odd_null_threshold < unexpected_null_threshold &&
- even_null_threshold > expected_null_threshold)
- return UTF16_BE_NOBOM;
-
- // Don't know
- return None;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-// Checks if a buffer contains any nulls. Used to check for binary vs text data.
-///////////////////////////////////////////////////////////////////////////////
-
-bool TextEncodingDetect::DoesContainNulls(const unsigned char *pBuffer,
- size_t size) {
- size_t pos = 0;
- while (pos < size) {
- if (pBuffer[pos++] == 0) return true;
- }
-
- return false;
-}
diff --git a/third_party/encoding-detect/TextEncodingDetect.h b/third_party/encoding-detect/TextEncodingDetect.h
deleted file mode 100644
index 6d861716..00000000
--- a/third_party/encoding-detect/TextEncodingDetect.h
+++ /dev/null
@@ -1,85 +0,0 @@
-#pragma once
-#ifndef TEXT_ENCODING_DETECT_H_
-#define TEXT_ENCODING_DETECT_H_
-
-//
-// Copyright 2015 Jonathan Bennett <[email protected]>
-//
-// https://www.autoitscript.com
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-// Includes
-#include <stddef.h>
-
-namespace AutoIt::Common {
-class TextEncodingDetect {
- public:
- enum Encoding {
- None, // Unknown or binary
- ANSI, // 0-255
- ASCII, // 0-127
- UTF8_BOM, // UTF8 with BOM
- UTF8_NOBOM, // UTF8 without BOM
- UTF16_LE_BOM, // UTF16 LE with BOM
- UTF16_LE_NOBOM, // UTF16 LE without BOM
- UTF16_BE_BOM, // UTF16-BE with BOM
- UTF16_BE_NOBOM, // UTF16-BE without BOM
- };
-
- TextEncodingDetect();
- ~TextEncodingDetect() = default;
-
- static Encoding CheckBOM(
- const unsigned char *pBuffer,
- size_t size); // Just check if there is a BOM and return
- Encoding DetectEncoding(const unsigned char *pBuffer, size_t size)
- const; // Check BOM and also guess if there is no BOM
- static int GetBOMLengthFromEncodingMode(
- Encoding encoding); // Just return the BOM length of a given mode
-
- void SetNullSuggestsBinary(bool null_suggests_binary) {
- null_suggests_binary_ = null_suggests_binary;
- }
- void SetUtf16UnexpectedNullPercent(int percent);
- void SetUtf16ExpectedNullPercent(int percent);
-
- private:
- TextEncodingDetect(const TextEncodingDetect &);
- const TextEncodingDetect &operator=(const TextEncodingDetect &);
-
- static const unsigned char *utf16_bom_le_;
- static const unsigned char *utf16_bom_be_;
- static const unsigned char *utf8_bom_;
-
- bool null_suggests_binary_;
- int utf16_expected_null_percent_;
- int utf16_unexpected_null_percent_;
-
- Encoding CheckUTF8(const unsigned char *pBuffer,
- size_t size) const; // Check for valid UTF8 with no BOM
- static Encoding CheckUTF16NewlineChars(
- const unsigned char *pBuffer,
- size_t size); // Check for valid UTF16 with no BOM via control chars
- Encoding CheckUTF16ASCII(const unsigned char *pBuffer, size_t size)
- const; // Check for valid UTF16 with no BOM via null distribution
- static bool DoesContainNulls(const unsigned char *pBuffer,
- size_t size); // Check for nulls
-};
-
-} // namespace AutoIt::Common
-
-//////////////////////////////////////////////////////////////////////
-
-#endif