summaryrefslogtreecommitdiff
path: root/gedit/gedit-smart-charset-converter.c
diff options
context:
space:
mode:
Diffstat (limited to 'gedit/gedit-smart-charset-converter.c')
-rwxr-xr-xgedit/gedit-smart-charset-converter.c422
1 files changed, 0 insertions, 422 deletions
diff --git a/gedit/gedit-smart-charset-converter.c b/gedit/gedit-smart-charset-converter.c
deleted file mode 100755
index e32b0b17..00000000
--- a/gedit/gedit-smart-charset-converter.c
+++ /dev/null
@@ -1,422 +0,0 @@
-/*
- * gedit-smart-charset-converter.c
- * This file is part of gedit
- *
- * Copyright (C) 2009 - Ignacio Casal Quinteiro
- *
- * gedit is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * gedit is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with gedit; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor,
- * Boston, MA 02110-1301 USA
- */
-
-#include "gedit-smart-charset-converter.h"
-#include "gedit-debug.h"
-#include "gedit-document.h"
-
-#include <gio/gio.h>
-#include <glib/gi18n.h>
-
-#define GEDIT_SMART_CHARSET_CONVERTER_GET_PRIVATE(object)(G_TYPE_INSTANCE_GET_PRIVATE((object), GEDIT_TYPE_SMART_CHARSET_CONVERTER, GeditSmartCharsetConverterPrivate))
-
-struct _GeditSmartCharsetConverterPrivate
-{
- GCharsetConverter *charset_conv;
-
- GSList *encodings;
- GSList *current_encoding;
-
- guint is_utf8 : 1;
- guint use_first : 1;
-};
-
-static void gedit_smart_charset_converter_iface_init (GConverterIface *iface);
-
-G_DEFINE_TYPE_WITH_CODE (GeditSmartCharsetConverter, gedit_smart_charset_converter,
- G_TYPE_OBJECT,
- G_IMPLEMENT_INTERFACE (G_TYPE_CONVERTER,
- gedit_smart_charset_converter_iface_init))
-
-static void
-gedit_smart_charset_converter_finalize (GObject *object)
-{
- GeditSmartCharsetConverter *smart = GEDIT_SMART_CHARSET_CONVERTER (object);
-
- g_slist_free (smart->priv->encodings);
-
- gedit_debug_message (DEBUG_UTILS, "finalizing smart charset converter");
-
- G_OBJECT_CLASS (gedit_smart_charset_converter_parent_class)->finalize (object);
-}
-
-static void
-gedit_smart_charset_converter_dispose (GObject *object)
-{
- GeditSmartCharsetConverter *smart = GEDIT_SMART_CHARSET_CONVERTER (object);
-
- if (smart->priv->charset_conv != NULL)
- {
- g_object_unref (smart->priv->charset_conv);
- smart->priv->charset_conv = NULL;
- }
-
- gedit_debug_message (DEBUG_UTILS, "disposing smart charset converter");
-
- G_OBJECT_CLASS (gedit_smart_charset_converter_parent_class)->dispose (object);
-}
-
-static void
-gedit_smart_charset_converter_class_init (GeditSmartCharsetConverterClass *klass)
-{
- GObjectClass *object_class = G_OBJECT_CLASS (klass);
-
- object_class->finalize = gedit_smart_charset_converter_finalize;
- object_class->dispose = gedit_smart_charset_converter_dispose;
-
- g_type_class_add_private (object_class, sizeof (GeditSmartCharsetConverterPrivate));
-}
-
-static void
-gedit_smart_charset_converter_init (GeditSmartCharsetConverter *smart)
-{
- smart->priv = GEDIT_SMART_CHARSET_CONVERTER_GET_PRIVATE (smart);
-
- smart->priv->charset_conv = NULL;
- smart->priv->encodings = NULL;
- smart->priv->current_encoding = NULL;
- smart->priv->is_utf8 = FALSE;
- smart->priv->use_first = FALSE;
-
- gedit_debug_message (DEBUG_UTILS, "initializing smart charset converter");
-}
-
-static const GeditEncoding *
-get_encoding (GeditSmartCharsetConverter *smart)
-{
- if (smart->priv->current_encoding == NULL)
- {
- smart->priv->current_encoding = smart->priv->encodings;
- }
- else
- {
- smart->priv->current_encoding = g_slist_next (smart->priv->current_encoding);
- }
-
- if (smart->priv->current_encoding != NULL)
- return (const GeditEncoding *)smart->priv->current_encoding->data;
-
-#if 0
- FIXME: uncomment this when using fallback
- /* If we tried all encodings, we return the first encoding */
- smart->priv->use_first = TRUE;
- smart->priv->current_encoding = smart->priv->encodings;
-
- return (const GeditEncoding *)smart->priv->current_encoding->data;
-#endif
- return NULL;
-}
-
-static gboolean
-try_convert (GCharsetConverter *converter,
- const void *inbuf,
- gsize inbuf_size)
-{
- GError *err;
- gsize bytes_read, nread;
- gsize bytes_written, nwritten;
- GConverterResult res;
- gchar *out;
- gboolean ret;
- gsize out_size;
-
- if (inbuf == NULL || inbuf_size == 0)
- {
- return FALSE;
- }
-
- err = NULL;
- nread = 0;
- nwritten = 0;
- out_size = inbuf_size * 4;
- out = g_malloc (out_size);
-
- do
- {
- res = g_converter_convert (G_CONVERTER (converter),
- inbuf + nread,
- inbuf_size - nread,
- out + nwritten,
- out_size - nwritten,
- G_CONVERTER_INPUT_AT_END,
- &bytes_read,
- &bytes_written,
- &err);
-
- nread += bytes_read;
- nwritten += bytes_written;
- } while (res != G_CONVERTER_FINISHED && res != G_CONVERTER_ERROR && err == NULL);
-
- if (err != NULL)
- {
- if (err->code == G_CONVERT_ERROR_PARTIAL_INPUT)
- {
- /* FIXME We can get partial input while guessing the
- encoding because we just take some amount of text
- to guess from. */
- ret = TRUE;
- }
- else
- {
- ret = FALSE;
- }
-
- g_error_free (err);
- }
- else
- {
- ret = TRUE;
- }
-
- /* FIXME: Check the remainder? */
- if (ret == TRUE && !g_utf8_validate (out, nwritten, NULL))
- {
- ret = FALSE;
- }
-
- g_free (out);
-
- return ret;
-}
-
-static GCharsetConverter *
-guess_encoding (GeditSmartCharsetConverter *smart,
- const void *inbuf,
- gsize inbuf_size)
-{
- GCharsetConverter *conv = NULL;
-
- if (inbuf == NULL || inbuf_size == 0)
- {
- smart->priv->is_utf8 = TRUE;
- return NULL;
- }
-
- if (smart->priv->encodings != NULL &&
- smart->priv->encodings->next == NULL)
- smart->priv->use_first = TRUE;
-
- /* We just check the first block */
- while (TRUE)
- {
- const GeditEncoding *enc;
-
- if (conv != NULL)
- {
- g_object_unref (conv);
- conv = NULL;
- }
-
- /* We get an encoding from the list */
- enc = get_encoding (smart);
-
- /* if it is NULL we didn't guess anything */
- if (enc == NULL)
- {
- break;
- }
-
- gedit_debug_message (DEBUG_UTILS, "trying charset: %s",
- gedit_encoding_get_charset (smart->priv->current_encoding->data));
-
- if (enc == gedit_encoding_get_utf8 ())
- {
- gsize remainder;
- const gchar *end;
-
- if (g_utf8_validate (inbuf, inbuf_size, &end) ||
- smart->priv->use_first)
- {
- smart->priv->is_utf8 = TRUE;
- break;
- }
-
- /* Check if the end is less than one char */
- remainder = inbuf_size - (end - (gchar *)inbuf);
- if (remainder < 6)
- {
- smart->priv->is_utf8 = TRUE;
- break;
- }
-
- continue;
- }
-
- conv = g_charset_converter_new ("UTF-8",
- gedit_encoding_get_charset (enc),
- NULL);
-
- /* If we tried all encodings we use the first one */
- if (smart->priv->use_first)
- {
- break;
- }
-
- /* Try to convert */
- if (try_convert (conv, inbuf, inbuf_size))
- {
- break;
- }
- }
-
- if (conv != NULL)
- {
- g_converter_reset (G_CONVERTER (conv));
-
- /* FIXME: uncomment this when we want to use the fallback
- g_charset_converter_set_use_fallback (conv, TRUE);*/
- }
-
- return conv;
-}
-
-static GConverterResult
-gedit_smart_charset_converter_convert (GConverter *converter,
- const void *inbuf,
- gsize inbuf_size,
- void *outbuf,
- gsize outbuf_size,
- GConverterFlags flags,
- gsize *bytes_read,
- gsize *bytes_written,
- GError **error)
-{
- GeditSmartCharsetConverter *smart = GEDIT_SMART_CHARSET_CONVERTER (converter);
-
- /* Guess the encoding if we didn't make it yet */
- if (smart->priv->charset_conv == NULL &&
- !smart->priv->is_utf8)
- {
- smart->priv->charset_conv = guess_encoding (smart, inbuf, inbuf_size);
-
- /* If we still have the previous case is that we didn't guess
- anything */
- if (smart->priv->charset_conv == NULL &&
- !smart->priv->is_utf8)
- {
- /* FIXME: Add a different domain when we kill gedit_convert */
- g_set_error_literal (error, GEDIT_DOCUMENT_ERROR,
- GEDIT_DOCUMENT_ERROR_ENCODING_AUTO_DETECTION_FAILED,
- _("It is not possible to detect the encoding automatically"));
- return G_CONVERTER_ERROR;
- }
- }
-
- /* Now if the encoding is utf8 just redirect the input to the output */
- if (smart->priv->is_utf8)
- {
- gsize size;
- GConverterResult ret;
-
- size = MIN (inbuf_size, outbuf_size);
-
- memcpy (outbuf, inbuf, size);
- *bytes_read = size;
- *bytes_written = size;
-
- ret = G_CONVERTER_CONVERTED;
-
- if (flags & G_CONVERTER_INPUT_AT_END)
- ret = G_CONVERTER_FINISHED;
- else if (flags & G_CONVERTER_FLUSH)
- ret = G_CONVERTER_FLUSHED;
-
- return ret;
- }
-
- /* If we reached here is because we need to convert the text so, we
- convert it with the charset converter */
- return g_converter_convert (G_CONVERTER (smart->priv->charset_conv),
- inbuf,
- inbuf_size,
- outbuf,
- outbuf_size,
- flags,
- bytes_read,
- bytes_written,
- error);
-}
-
-static void
-gedit_smart_charset_converter_reset (GConverter *converter)
-{
- GeditSmartCharsetConverter *smart = GEDIT_SMART_CHARSET_CONVERTER (converter);
-
- smart->priv->current_encoding = NULL;
- smart->priv->is_utf8 = FALSE;
-
- if (smart->priv->charset_conv != NULL)
- {
- g_object_unref (smart->priv->charset_conv);
- smart->priv->charset_conv = NULL;
- }
-}
-
-static void
-gedit_smart_charset_converter_iface_init (GConverterIface *iface)
-{
- iface->convert = gedit_smart_charset_converter_convert;
- iface->reset = gedit_smart_charset_converter_reset;
-}
-
-GeditSmartCharsetConverter *
-gedit_smart_charset_converter_new (GSList *candidate_encodings)
-{
- GeditSmartCharsetConverter *smart;
-
- g_return_val_if_fail (candidate_encodings != NULL, NULL);
-
- smart = g_object_new (GEDIT_TYPE_SMART_CHARSET_CONVERTER, NULL);
-
- smart->priv->encodings = g_slist_copy (candidate_encodings);
-
- return smart;
-}
-
-const GeditEncoding *
-gedit_smart_charset_converter_get_guessed (GeditSmartCharsetConverter *smart)
-{
- g_return_val_if_fail (GEDIT_IS_SMART_CHARSET_CONVERTER (smart), NULL);
-
- if (smart->priv->current_encoding != NULL)
- {
- return (const GeditEncoding *)smart->priv->current_encoding->data;
- }
- else if (smart->priv->is_utf8)
- {
- return gedit_encoding_get_utf8 ();
- }
-
- return NULL;
-}
-
-guint
-gedit_smart_charset_converter_get_num_fallbacks (GeditSmartCharsetConverter *smart)
-{
- g_return_val_if_fail (GEDIT_IS_SMART_CHARSET_CONVERTER (smart), FALSE);
-
- if (smart->priv->charset_conv == NULL)
- return FALSE;
-
- return g_charset_converter_get_num_fallbacks (smart->priv->charset_conv) != 0;
-}
-