summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSlava Aseev <[email protected]>2019-07-18 17:15:11 +0300
committerVictor Kareh <[email protected]>2019-08-02 07:43:29 -0400
commitdd4a31a1eb6fdd0fd4c22276de864d83c98830a2 (patch)
treec67b0d4d55610bfb1c221f43c5011c9adda5762f
parentd638f84c9dfbbb8cca2625cd431921f52e603c02 (diff)
downloadcaja-dd4a31a1eb6fdd0fd4c22276de864d83c98830a2.tar.bz2
caja-dd4a31a1eb6fdd0fd4c22276de864d83c98830a2.tar.xz
Support querying files by contained text
ODF files also supported via odt2txt
-rw-r--r--libcaja-private/caja-query.c12
-rw-r--r--libcaja-private/caja-query.h3
-rw-r--r--libcaja-private/caja-search-engine-simple.c128
-rw-r--r--src/caja-query-editor.c50
4 files changed, 192 insertions, 1 deletions
diff --git a/libcaja-private/caja-query.c b/libcaja-private/caja-query.c
index e7baf72f..1064fd16 100644
--- a/libcaja-private/caja-query.c
+++ b/libcaja-private/caja-query.c
@@ -39,6 +39,7 @@ struct CajaQueryDetails
GList *tags;
gint64 timestamp;
gint64 size;
+ char *contained_text;
};
static void caja_query_class_init (CajaQueryClass *class);
@@ -485,3 +486,14 @@ gint64 caja_query_get_size(CajaQuery *query)
{
return query->details->size;
}
+
+void caja_query_set_contained_text (CajaQuery *query, const char *text)
+{
+ g_free (query->details->contained_text);
+ query->details->contained_text = g_strdup (text);
+}
+
+char *caja_query_get_contained_text (CajaQuery *query)
+{
+ return g_strdup (query->details->contained_text);
+}
diff --git a/libcaja-private/caja-query.h b/libcaja-private/caja-query.h
index a1ec2864..117e813b 100644
--- a/libcaja-private/caja-query.h
+++ b/libcaja-private/caja-query.h
@@ -75,4 +75,7 @@ void caja_query_set_timestamp (CajaQuery *query, gint64 sec);
gint64 caja_query_get_size (CajaQuery *query);
void caja_query_set_size (CajaQuery *query, gint64 size);
+char * caja_query_get_contained_text (CajaQuery *query);
+void caja_query_set_contained_text (CajaQuery *query, const char *text);
+
#endif /* CAJA_QUERY_H */
diff --git a/libcaja-private/caja-search-engine-simple.c b/libcaja-private/caja-search-engine-simple.c
index 40abeb3a..f9c6498d 100644
--- a/libcaja-private/caja-search-engine-simple.c
+++ b/libcaja-private/caja-search-engine-simple.c
@@ -38,6 +38,7 @@ typedef struct
CajaSearchEngineSimple *engine;
GCancellable *cancellable;
+ char *contained_text;
GList *mime_types;
GList *tags;
char **words;
@@ -129,6 +130,7 @@ search_thread_data_new (CajaSearchEngineSimple *engine,
data->mime_types = caja_query_get_mime_types (query);
data->timestamp = caja_query_get_timestamp (query);
data->size = caja_query_get_size (query);
+ data->contained_text = caja_query_get_contained_text (query);
data->cancellable = g_cancellable_new ();
@@ -147,6 +149,7 @@ search_thread_data_free (SearchThreadData *data)
g_list_free_full (data->tags, g_free);
g_list_free_full (data->mime_types, g_free);
g_list_free_full (data->uri_hits, g_free);
+ g_free (data->contained_text);
g_free (data);
}
@@ -342,6 +345,102 @@ file_has_all_tags (GFileInfo *info, GList *tags)
return TRUE;
}
+static inline gboolean
+check_odt2txt () {
+ gboolean rc = TRUE;
+ int exit = 0;
+ gchar *output = NULL;
+
+ gboolean cmd_rc = g_spawn_command_line_sync ("odt2txt --version", &output, NULL, &exit, NULL);
+
+ if (!cmd_rc || exit != 0 ||
+ !output || !g_str_has_prefix (output, "odt2txt"))
+ {
+ rc = FALSE;
+ }
+
+ g_free (output);
+ return rc;
+}
+
+static inline gchar *
+read_odt (const char *filepath) {
+ gchar *command = g_strdup_printf ("odt2txt \"%s\"", filepath);
+ gchar *output = NULL;
+ int exit = 0;
+
+ gboolean rc = g_spawn_command_line_sync (command, &output, NULL, &exit, NULL);
+ if (!rc || exit != 0) {
+ g_free (output);
+ g_free (command);
+ return NULL;
+ }
+
+ g_free (command);
+ return output;
+}
+
+static inline gchar *
+utf8_normalize_strdown (const char *str) {
+ gchar* lower = NULL;
+ gchar *normalized = g_utf8_normalize (str, -1, G_NORMALIZE_DEFAULT);
+
+ if (normalized)
+ lower = g_utf8_strdown (normalized, -1);
+
+ g_free (normalized);
+
+ return lower;
+}
+
+static inline gboolean
+is_file_has_str (
+ const char *filepath,
+ const char *str,
+ const char *mime_type,
+ gboolean odt2txt_available)
+{
+ gboolean rc = TRUE;
+ gchar *contents = NULL;
+ gchar *lower_contents = NULL;
+ gchar *lower_str = NULL;
+
+ if (str[0] == '\0') {
+ return TRUE;
+ }
+
+ if (g_content_type_is_mime_type (mime_type, "text/plain")) {
+ rc = g_file_get_contents (filepath, &contents, NULL, NULL);
+ }
+ else {
+ if (!odt2txt_available) {
+ g_warning ("Can't search in file '%s'. odt2txt not found.", filepath);
+ rc = FALSE;
+ }
+ else {
+ contents = read_odt (filepath);
+ if (!contents)
+ rc = FALSE;
+ }
+ }
+
+ if (rc) {
+ lower_str = utf8_normalize_strdown (str);
+ lower_contents = utf8_normalize_strdown (contents);
+
+ if (lower_str && lower_contents && strstr (lower_contents, lower_str))
+ rc = TRUE;
+ else
+ rc = FALSE;
+ }
+
+ g_free (contents);
+ g_free (lower_str);
+ g_free (lower_contents);
+
+ return rc;
+}
+
static void
visit_directory (GFile *dir, SearchThreadData *data)
{
@@ -358,9 +457,11 @@ visit_directory (GFile *dir, SearchThreadData *data)
GTimeVal result;
gchar *attributes;
GString *attr_string;
+ gchar *filepath = NULL;
+ gboolean odt2txt_available = FALSE;
attr_string = g_string_new (STD_ATTRIBUTES);
- if (data->mime_types != NULL) {
+ if (data->mime_types != NULL || data->contained_text != NULL) {
g_string_append (attr_string, "," G_FILE_ATTRIBUTE_STANDARD_CONTENT_TYPE);
}
if (data->tags != NULL) {
@@ -374,6 +475,10 @@ visit_directory (GFile *dir, SearchThreadData *data)
g_string_append (attr_string, "," G_FILE_ATTRIBUTE_STANDARD_SIZE);
}
+ if (data->contained_text != NULL) {
+ odt2txt_available = check_odt2txt();
+ }
+
attributes = g_string_free (attr_string, FALSE);
enumerator = g_file_enumerate_children (dir, (const char*)attributes, 0,
data->cancellable, NULL);
@@ -456,6 +561,26 @@ visit_directory (GFile *dir, SearchThreadData *data)
child = g_file_get_child (dir, g_file_info_get_name (info));
+ if (hit && data->contained_text) {
+ mime_type = g_file_info_get_content_type (info);
+
+ if (g_content_type_is_mime_type (mime_type, "text/plain") ||
+ g_content_type_equals (mime_type, "application/vnd.oasis.opendocument.text") ||
+ g_content_type_equals (mime_type, "application/vnd.oasis.opendocument.text-template") ||
+ g_content_type_equals (mime_type, "application/vnd.oasis.opendocument.spreadsheet") ||
+ g_content_type_equals (mime_type, "application/vnd.oasis.opendocument.spreadsheet-template") ||
+ g_content_type_equals (mime_type, "application/vnd.oasis.opendocument.presentation") ||
+ g_content_type_equals (mime_type, "application/vnd.oasis.opendocument.presentation-template")
+ ) {
+ g_free (filepath);
+ filepath = g_file_get_path (child);
+ hit = is_file_has_str (filepath, data->contained_text, mime_type, odt2txt_available);
+ }
+ else {
+ hit = FALSE;
+ }
+ }
+
if (hit)
{
data->uri_hits = g_list_prepend (data->uri_hits, g_file_get_uri (child));
@@ -495,6 +620,7 @@ next:
g_object_unref (info);
}
+ g_free (filepath);
g_object_unref (enumerator);
}
diff --git a/src/caja-query-editor.c b/src/caja-query-editor.c
index 55dff943..5fbfddc9 100644
--- a/src/caja-query-editor.c
+++ b/src/caja-query-editor.c
@@ -57,6 +57,7 @@ typedef enum
CAJA_QUERY_EDITOR_ROW_TAGS,
CAJA_QUERY_EDITOR_ROW_TIME_MODIFIED,
CAJA_QUERY_EDITOR_ROW_SIZE,
+ CAJA_QUERY_EDITOR_ROW_CONTAINED_TEXT,
CAJA_QUERY_EDITOR_ROW_LAST
} CajaQueryEditorRowType;
@@ -157,6 +158,13 @@ static void size_row_free_data(CajaQueryEditorRow *row);
static void size_add_rows_from_query(CajaQueryEditor *editor,
CajaQuery *query);
+static GtkWidget *contained_text_row_create_widgets(CajaQueryEditorRow *row);
+static void contained_text_row_add_to_query(CajaQueryEditorRow *row,
+ CajaQuery *query);
+static void contained_text_row_free_data(CajaQueryEditorRow *row);
+static void contained_text_add_rows_from_query(CajaQueryEditor *editor,
+ CajaQuery *query);
+
static CajaQueryEditorRowOps row_type[] =
{
{
@@ -193,6 +201,13 @@ static CajaQueryEditorRowOps row_type[] =
size_row_add_to_query,
size_row_free_data,
size_add_rows_from_query
+ },
+ {
+ N_("Contained text"),
+ contained_text_row_create_widgets,
+ contained_text_row_add_to_query,
+ contained_text_row_free_data,
+ contained_text_add_rows_from_query
}
};
@@ -1323,6 +1338,41 @@ static void size_add_rows_from_query(CajaQueryEditor *editor, CajaQuery *query)
{
}
+static GtkWidget *
+contained_text_row_create_widgets (CajaQueryEditorRow *row)
+{
+ GtkWidget *entry = gtk_entry_new();
+ gtk_widget_set_tooltip_text (entry,
+ _("Matches files that contains specified text."));
+
+ gtk_entry_set_placeholder_text (GTK_ENTRY (entry),
+ _("Matches files that contains specified text."));
+
+ gtk_widget_show (entry);
+ gtk_box_pack_start (GTK_BOX (row->hbox), entry, TRUE, TRUE, 0);
+ g_signal_connect (entry, "activate", G_CALLBACK (go_search_cb), row->editor);
+
+ return entry;
+}
+
+static void
+contained_text_row_add_to_query (CajaQueryEditorRow *row, CajaQuery *query)
+{
+ GtkEntry *entry = GTK_ENTRY (row->type_widget);
+ const gchar *text = gtk_entry_get_text (entry);
+
+ caja_query_set_contained_text (query, text);
+}
+
+static void
+contained_text_row_free_data (CajaQueryEditorRow *row)
+{
+}
+
+static void
+contained_text_add_rows_from_query (CajaQueryEditor *editor, CajaQuery *query)
+{
+}
static CajaQueryEditorRowType
get_next_free_type (CajaQueryEditor *editor)