/* * Implements search and copy functionality for Djvu files. * Copyright (C) 2006 Michael Hofmann <mh21@piware.de> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include <config.h> #include <string.h> #include <glib.h> #include <libdjvu/miniexp.h> #include "djvu-text-page.h" /** * djvu_text_page_selection_process: * @page: #DjvuTextPage instance * @p: s-expression to append * @delimit: character/word/... delimiter * * Appends the string in @p to the page text. * * Returns: whether the end was not reached in this s-expression */ static gboolean djvu_text_page_selection_process (DjvuTextPage *page, miniexp_t p, int delimit) { if (page->text || p == page->start) { char *token_text = (char *) miniexp_to_str (miniexp_nth (5, p)); if (page->text) { char *new_text = g_strjoin (delimit & 2 ? "\n" : delimit & 1 ? " " : NULL, page->text, token_text, NULL); g_free (page->text); page->text = new_text; } else page->text = g_strdup (token_text); if (p == page->end) return FALSE; } return TRUE; } /** * djvu_text_page_selection: * @page: #DjvuTextPage instance * @p: tree to append * @delimit: character/word/... delimiter * * Walks the tree in @p and appends the text with * djvu_text_page_selection_process() for all s-expressions * between the start and end fields. * * Returns: whether the end was not reached in this subtree */ static gboolean djvu_text_page_selection (DjvuTextPage *page, miniexp_t p, int delimit) { g_return_val_if_fail (miniexp_consp (p) && miniexp_symbolp (miniexp_car (p)), FALSE); if (miniexp_car (p) != page->char_symbol) delimit |= miniexp_car (p) == page->word_symbol ? 1 : 2; miniexp_t deeper = miniexp_cddr (miniexp_cdddr (p)); while (deeper != miniexp_nil) { miniexp_t str = miniexp_car (deeper); if (miniexp_stringp (str)) { if (!djvu_text_page_selection_process (page, p, delimit)) return FALSE; } else { if (!djvu_text_page_selection (page, str, delimit)) return FALSE; } delimit = 0; deeper = miniexp_cdr (deeper); } return TRUE; } static void djvu_text_page_limits_process (DjvuTextPage *page, miniexp_t p, EvRectangle *rect) { EvRectangle current; current.x1 = miniexp_to_int (miniexp_nth (1, p)); current.y1 = miniexp_to_int (miniexp_nth (2, p)); current.x2 = miniexp_to_int (miniexp_nth (3, p)); current.y2 = miniexp_to_int (miniexp_nth (4, p)); if (current.x2 >= rect->x1 && current.y1 <= rect->y2 && current.x1 <= rect->x2 && current.y2 >= rect->y1) { if (page->start == miniexp_nil) page->start = p; page->end = p; } } static void djvu_text_page_limits (DjvuTextPage *page, miniexp_t p, EvRectangle *rect) { g_return_if_fail (miniexp_consp (p) && miniexp_symbolp (miniexp_car (p))); miniexp_t deeper = miniexp_cddr (miniexp_cdddr (p)); while (deeper != miniexp_nil) { miniexp_t str = miniexp_car (deeper); if (miniexp_stringp (str)) djvu_text_page_limits_process (page, p, rect); else djvu_text_page_limits (page, str, rect); deeper = miniexp_cdr (deeper); } } char * djvu_text_page_copy (DjvuTextPage *page, EvRectangle *rectangle) { char* text; page->start = miniexp_nil; page->end = miniexp_nil; djvu_text_page_limits (page, page->text_structure, rectangle); djvu_text_page_selection (page, page->text_structure, 0); /* Do not free the string */ text = page->text; page->text = NULL; return text; } /** * djvu_text_page_position: * @page: #DjvuTextPage instance * @position: index in the page text * * Returns the closest s-expression that contains the given position in * the page text. * * Returns: closest s-expression */ static miniexp_t djvu_text_page_position (DjvuTextPage *page, int position) { GArray *links = page->links; int low = 0; int hi = links->len - 1; int mid = 0; g_return_val_if_fail (hi >= 0, miniexp_nil); /* Shamelessly copied from GNU classpath */ while (low <= hi) { mid = (low + hi) >> 1; DjvuTextLink *link = &g_array_index (links, DjvuTextLink, mid); if (link->position == position) break; else if (link->position > position) hi = --mid; else low = mid + 1; } return g_array_index (page->links, DjvuTextLink, mid).pair; } /** * djvu_text_page_union: * @target: first rectangle and result * @source: second rectangle * * Calculates the bounding box of two rectangles and stores the reuslt * in the first. */ static void djvu_text_page_union (EvRectangle *target, EvRectangle *source) { if (source->x1 < target->x1) target->x1 = source->x1; if (source->x2 > target->x2) target->x2 = source->x2; if (source->y1 < target->y1) target->y1 = source->y1; if (source->y2 > target->y2) target->y2 = source->y2; } /** * djvu_text_page_sexpr_process: * @page: #DjvuTextPage instance * @p: s-expression to append * @start: first s-expression in the selection * @end: last s-expression in the selection * * Appends the rectangle defined by @p to the internal bounding box rectangle. * * Returns: whether the end was not reached in this s-expression */ static gboolean djvu_text_page_sexpr_process (DjvuTextPage *page, miniexp_t p, miniexp_t start, miniexp_t end) { if (page->bounding_box || p == start) { EvRectangle *new_rectangle = ev_rectangle_new (); new_rectangle->x1 = miniexp_to_int (miniexp_nth (1, p)); new_rectangle->y1 = miniexp_to_int (miniexp_nth (2, p)); new_rectangle->x2 = miniexp_to_int (miniexp_nth (3, p)); new_rectangle->y2 = miniexp_to_int (miniexp_nth (4, p)); if (page->bounding_box) { djvu_text_page_union (page->bounding_box, new_rectangle); g_free (new_rectangle); } else page->bounding_box = new_rectangle; if (p == end) return FALSE; } return TRUE; } /** * djvu_text_page_sexpr: * @page: #DjvuTextPage instance * @p: tree to append * @start: first s-expression in the selection * @end: last s-expression in the selection * * Walks the tree in @p and extends the rectangle with * djvu_text_page_process() for all s-expressions between @start and @end. * * Returns: whether the end was not reached in this subtree */ static gboolean djvu_text_page_sexpr (DjvuTextPage *page, miniexp_t p, miniexp_t start, miniexp_t end) { g_return_val_if_fail (miniexp_consp (p) && miniexp_symbolp (miniexp_car (p)), FALSE); miniexp_t deeper = miniexp_cddr (miniexp_cdddr (p)); while (deeper != miniexp_nil) { miniexp_t str = miniexp_car (deeper); if (miniexp_stringp (str)) { if (!djvu_text_page_sexpr_process (page, p, start, end)) return FALSE; } else { if (!djvu_text_page_sexpr (page, str, start, end)) return FALSE; } deeper = miniexp_cdr (deeper); } return TRUE; } /** * djvu_text_page_box: * @page: #DjvuTextPage instance * @start: first s-expression in the selection * @end: last s-expression in the selection * * Builds a rectangle that contains all s-expressions in the given range. */ static EvRectangle * djvu_text_page_box (DjvuTextPage *page, miniexp_t start, miniexp_t end) { page->bounding_box = NULL; djvu_text_page_sexpr (page, page->text_structure, start, end); return page->bounding_box; } /** * djvu_text_page_append_search: * @page: #DjvuTextPage instance * @p: tree to append * @case_sensitive: do not ignore case * @delimit: insert spaces because of higher (sentence/paragraph/...) break * * Appends the tree in @p to the internal text string. */ static void djvu_text_page_append_text (DjvuTextPage *page, miniexp_t p, gboolean case_sensitive, gboolean delimit) { char *token_text; g_return_if_fail (miniexp_consp (p) && miniexp_symbolp (miniexp_car (p))); delimit |= page->char_symbol != miniexp_car (p); miniexp_t deeper = miniexp_cddr (miniexp_cdddr (p)); while (deeper != miniexp_nil) { miniexp_t data = miniexp_car (deeper); if (miniexp_stringp (data)) { DjvuTextLink link; link.position = page->text == NULL ? 0 : strlen (page->text); link.pair = p; g_array_append_val (page->links, link); token_text = (char *) miniexp_to_str (data); if (!case_sensitive) token_text = g_utf8_casefold (token_text, -1); if (page->text == NULL) page->text = g_strdup (token_text); else { char *new_text = g_strjoin (delimit ? " " : NULL, page->text, token_text, NULL); g_free (page->text); page->text = new_text; } if (!case_sensitive) g_free (token_text); } else djvu_text_page_append_text (page, data, case_sensitive, delimit); delimit = FALSE; deeper = miniexp_cdr (deeper); } } /** * djvu_text_page_search: * @page: #DjvuTextPage instance * @text: text to search * @case_sensitive: do not ignore case * * Searches the page for the given text. The results list has to be * externally freed afterwards. */ void djvu_text_page_search (DjvuTextPage *page, const char *text, gboolean case_sensitive) { char *haystack = page->text; char *search_text; int search_len; EvRectangle *result; if (page->links->len == 0) return; search_len = strlen (text); if (case_sensitive) search_text = g_strdup (text); else search_text = g_utf8_casefold (text, search_len); while ((haystack = strstr (haystack, search_text)) != NULL) { int start_p = haystack - page->text; miniexp_t start = djvu_text_page_position (page, start_p); int end_p = start_p + search_len - 1; miniexp_t end = djvu_text_page_position (page, end_p); result = djvu_text_page_box (page, start, end); g_assert (result); page->results = g_list_prepend (page->results, result); haystack = haystack + search_len; } page->results = g_list_reverse (page->results); g_free (search_text); } /** * djvu_text_page_prepare_search: * @page: #DjvuTextPage instance * @case_sensitive: do not ignore case * * Indexes the page text and prepares the page for subsequent searches. */ void djvu_text_page_prepare_search (DjvuTextPage *page, gboolean case_sensitive) { djvu_text_page_append_text (page, page->text_structure, case_sensitive, FALSE); } /** * djvu_text_page_new: * @text: S-expression of the page text * * Creates a new page to search. * * Returns: new #DjvuTextPage instance */ DjvuTextPage * djvu_text_page_new (miniexp_t text) { DjvuTextPage *page; page = g_new0 (DjvuTextPage, 1); page->links = g_array_new (FALSE, FALSE, sizeof (DjvuTextLink)); page->char_symbol = miniexp_symbol ("char"); page->word_symbol = miniexp_symbol ("word"); page->text_structure = text; return page; } /** * djvu_text_page_free: * @page: #DjvuTextPage instance * * Frees the given #DjvuTextPage instance. */ void djvu_text_page_free (DjvuTextPage *page) { g_free (page->text); g_array_free (page->links, TRUE); g_free (page); }