/*
 * Implements search and copy functionality for Djvu files.
 * Copyright (C) 2006 Michael Hofmann <mh21@piware.de>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2, or (at your option)
 * any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */

#include <config.h>
#include <string.h>
#include <glib.h>
#include <libdjvu/miniexp.h>
#include "djvu-text-page.h"


/**
 * djvu_text_page_selection_process:
 * @page: #DjvuTextPage instance
 * @p: s-expression to append
 * @delimit: character/word/... delimiter
 * 
 * Appends the string in @p to the page text.
 * 
 * Returns: whether the end was not reached in this s-expression
 */
static gboolean
djvu_text_page_selection_process (DjvuTextPage *page, 
                                  miniexp_t     p,
                                  int           delimit)
{
	if (page->text || p == page->start) {
		char *token_text = (char *) miniexp_to_str (miniexp_nth (5, p));
		if (page->text) {
			char *new_text =
			    g_strjoin (delimit & 2 ? "\n" : 
			    	       delimit & 1 ? " " : NULL,
				       page->text, token_text,
				       NULL);
			g_free (page->text);
			page->text = new_text;
		} else
			page->text = g_strdup (token_text);
		if (p == page->end) 
			return FALSE;
	}
	return TRUE;
}

/**
 * djvu_text_page_selection:
 * @page: #DjvuTextPage instance
 * @p: tree to append
 * @delimit: character/word/... delimiter
 * 
 * Walks the tree in @p and appends the text with
 * djvu_text_page_selection_process() for all s-expressions 
 * between the start and end fields.
 * 
 * Returns: whether the end was not reached in this subtree
 */
static gboolean
djvu_text_page_selection (DjvuTextPage *page, 
		          miniexp_t     p,
		          int           delimit)
{
	g_return_val_if_fail (miniexp_consp (p) && miniexp_symbolp
			      (miniexp_car (p)), FALSE);

	if (miniexp_car (p) != page->char_symbol) 
		delimit |= miniexp_car (p) == page->word_symbol ? 1 : 2;
		
	miniexp_t deeper = miniexp_cddr (miniexp_cdddr (p));
	while (deeper != miniexp_nil) {
		miniexp_t str = miniexp_car (deeper);
		if (miniexp_stringp (str)) {
			if (!djvu_text_page_selection_process
			    (page, p, delimit))
				return FALSE;
		} else {
			if (!djvu_text_page_selection
			    (page, str, delimit))
				return FALSE;
		}
		delimit = 0;
		deeper = miniexp_cdr (deeper);
	}
	return TRUE;
}

static void
djvu_text_page_limits_process (DjvuTextPage *page,
			       miniexp_t     p, 
			       EvRectangle  *rect)
{
	EvRectangle current;
	
	current.x1 = miniexp_to_int (miniexp_nth (1, p));
	current.y1 = miniexp_to_int (miniexp_nth (2, p));
	current.x2 = miniexp_to_int (miniexp_nth (3, p));
	current.y2 = miniexp_to_int (miniexp_nth (4, p));
	if (current.x2 >= rect->x1 && current.y1 <= rect->y2 &&
	    current.x1 <= rect->x2 && current.y2 >= rect->y1) {
	    	if (page->start == miniexp_nil)
	    		page->start = p;
	    	page->end = p;
	}
}


static void
djvu_text_page_limits (DjvuTextPage *page,
			  miniexp_t     p, 
			  EvRectangle  *rect)
{
	g_return_if_fail (miniexp_consp (p) && 
			  miniexp_symbolp (miniexp_car (p)));

	miniexp_t deeper = miniexp_cddr (miniexp_cdddr (p));
	while (deeper != miniexp_nil) {
		miniexp_t str = miniexp_car (deeper);
		if (miniexp_stringp (str))
			djvu_text_page_limits_process (page, p, rect);
		else
			djvu_text_page_limits (page, str, rect);

		deeper = miniexp_cdr (deeper);
	}
}

char *
djvu_text_page_copy (DjvuTextPage *page, 
		     EvRectangle  *rectangle)
{
	char* text;
	
	page->start = miniexp_nil;
	page->end = miniexp_nil;
	djvu_text_page_limits (page, page->text_structure, rectangle);
	djvu_text_page_selection (page, page->text_structure, 0);
	
	/* Do not free the string */	  
	text = page->text;
	page->text = NULL;
	
	return text;
}

/**
 * djvu_text_page_position:
 * @page: #DjvuTextPage instance
 * @position: index in the page text
 * 
 * Returns the closest s-expression that contains the given position in 
 * the page text.
 * 
 * Returns: closest s-expression
 */
static miniexp_t
djvu_text_page_position (DjvuTextPage *page, 
			 int           position)
{
	GArray *links = page->links;
	int low = 0;
	int hi = links->len - 1;
	int mid = 0;

	g_return_val_if_fail (hi >= 0, miniexp_nil);

	/* Shamelessly copied from GNU classpath */
	while (low <= hi) {
		mid = (low + hi) >> 1;
		DjvuTextLink *link =
		    &g_array_index (links, DjvuTextLink, mid);
		if (link->position == position)
			break;
		else if (link->position > position)
			hi = --mid;
		else
			low = mid + 1;
	}

	return g_array_index (page->links, DjvuTextLink, mid).pair;
}

/**
 * djvu_text_page_union:
 * @target: first rectangle and result
 * @source: second rectangle
 * 
 * Calculates the bounding box of two rectangles and stores the reuslt 
 * in the first.
 */
static void
djvu_text_page_union (EvRectangle *target, 
		      EvRectangle *source)
{
	if (source->x1 < target->x1)
		target->x1 = source->x1;
	if (source->x2 > target->x2)
		target->x2 = source->x2;
	if (source->y1 < target->y1)
		target->y1 = source->y1;
	if (source->y2 > target->y2)
		target->y2 = source->y2;
}

/**
 * djvu_text_page_sexpr_process:
 * @page: #DjvuTextPage instance
 * @p: s-expression to append
 * @start: first s-expression in the selection
 * @end: last s-expression in the selection
 * 
 * Appends the rectangle defined by @p to the internal bounding box rectangle.
 * 
 * Returns: whether the end was not reached in this s-expression
 */
static gboolean
djvu_text_page_sexpr_process (DjvuTextPage *page, 
                              miniexp_t     p,
                              miniexp_t     start, 
                              miniexp_t     end)
{
	if (page->bounding_box || p == start) {
		EvRectangle *new_rectangle = ev_rectangle_new ();
		new_rectangle->x1 = miniexp_to_int (miniexp_nth (1, p));
		new_rectangle->y1 = miniexp_to_int (miniexp_nth (2, p));
		new_rectangle->x2 = miniexp_to_int (miniexp_nth (3, p));
		new_rectangle->y2 = miniexp_to_int (miniexp_nth (4, p));
		if (page->bounding_box) {
			djvu_text_page_union (page->bounding_box,
					      new_rectangle);
			g_free (new_rectangle);
		} else
			page->bounding_box = new_rectangle;
		if (p == end)
			return FALSE;
	}
	return TRUE;
}

/**
 * djvu_text_page_sexpr:
 * @page: #DjvuTextPage instance
 * @p: tree to append
 * @start: first s-expression in the selection
 * @end: last s-expression in the selection
 * 
 * Walks the tree in @p and extends the rectangle with 
 * djvu_text_page_process() for all s-expressions between @start and @end.
 * 
 * Returns: whether the end was not reached in this subtree
 */
static gboolean
djvu_text_page_sexpr (DjvuTextPage *page, 
		      miniexp_t p,
		      miniexp_t start, 
		      miniexp_t end)
{
	g_return_val_if_fail (miniexp_consp (p) && miniexp_symbolp
			      (miniexp_car (p)), FALSE);

	miniexp_t deeper = miniexp_cddr (miniexp_cdddr (p));
	while (deeper != miniexp_nil) {
		miniexp_t str = miniexp_car (deeper);
		if (miniexp_stringp (str)) {
			if (!djvu_text_page_sexpr_process
			    (page, p, start, end))
				return FALSE;
		} else {
			if (!djvu_text_page_sexpr
			    (page, str, start, end))
				return FALSE;
		}
		deeper = miniexp_cdr (deeper);
	}
	return TRUE;
}

/**
 * djvu_text_page_box:
 * @page: #DjvuTextPage instance
 * @start: first s-expression in the selection
 * @end: last s-expression in the selection
 * 
 * Builds a rectangle that contains all s-expressions in the given range.
 */
static EvRectangle *
djvu_text_page_box (DjvuTextPage *page,
		    miniexp_t     start, 
		    miniexp_t     end)
{
	page->bounding_box = NULL;
	djvu_text_page_sexpr (page, page->text_structure, start, end);
	return page->bounding_box;
}

/**
 * djvu_text_page_append_search:
 * @page: #DjvuTextPage instance
 * @p: tree to append
 * @case_sensitive: do not ignore case
 * @delimit: insert spaces because of higher (sentence/paragraph/...) break
 * 
 * Appends the tree in @p to the internal text string. 
 */
static void
djvu_text_page_append_text (DjvuTextPage *page,
			    miniexp_t     p, 
			    gboolean      case_sensitive, 
			    gboolean      delimit)
{
	char *token_text;
	
	g_return_if_fail (miniexp_consp (p) && 
			  miniexp_symbolp (miniexp_car (p)));

	delimit |= page->char_symbol != miniexp_car (p);
	
	miniexp_t deeper = miniexp_cddr (miniexp_cdddr (p));
	while (deeper != miniexp_nil) {
		miniexp_t data = miniexp_car (deeper);
		if (miniexp_stringp (data)) {
			DjvuTextLink link;
			link.position = page->text == NULL ? 0 :
			    strlen (page->text);
			link.pair = p;
			g_array_append_val (page->links, link);

			token_text = (char *) miniexp_to_str (data);
			if (!case_sensitive)
				token_text = g_utf8_casefold (token_text, -1);
			if (page->text == NULL)
				page->text = g_strdup (token_text);
			else {
				char *new_text =
				    g_strjoin (delimit ? " " : NULL,
					       page->text, token_text,
					       NULL);
				g_free (page->text);
				page->text = new_text;
			}
			if (!case_sensitive)
				g_free (token_text);
		} else
			djvu_text_page_append_text (page, data, 
						    case_sensitive, delimit);
		delimit = FALSE;
		deeper = miniexp_cdr (deeper);
	}
}

/**
 * djvu_text_page_search:
 * @page: #DjvuTextPage instance
 * @text: text to search
 * 
 * Searches the page for the given text. The results list has to be 
 * externally freed afterwards.
 */
void 
djvu_text_page_search (DjvuTextPage *page, 
		       const char   *text)
{
	char *haystack = page->text;
	int search_len;
	EvRectangle *result;
	if (page->links->len == 0)
		return;

	search_len = strlen (text);
	while ((haystack = strstr (haystack, text)) != NULL) {
		int start_p = haystack - page->text;
		miniexp_t start = djvu_text_page_position (page, start_p);
		int end_p = start_p + search_len - 1;
		miniexp_t end = djvu_text_page_position (page, end_p);
		result = djvu_text_page_box (page, start, end);
		g_assert (result);
		page->results = g_list_prepend (page->results, result);
		haystack = haystack + search_len;
	}
	page->results = g_list_reverse (page->results);
}


/**
 * djvu_text_page_prepare_search:
 * @page: #DjvuTextPage instance
 * @case_sensitive: do not ignore case
 * 
 * Indexes the page text and prepares the page for subsequent searches.
 */
void
djvu_text_page_prepare_search (DjvuTextPage *page,
	       		       gboolean      case_sensitive)
{
	djvu_text_page_append_text (page, page->text_structure, 
				    case_sensitive, FALSE);	
}

/**
 * djvu_text_page_new:
 * @text: S-expression of the page text
 * 
 * Creates a new page to search. 
 * 
 * Returns: new #DjvuTextPage instance
 */
DjvuTextPage *
djvu_text_page_new (miniexp_t text)
{
	DjvuTextPage *page;

	page = g_new0 (DjvuTextPage, 1);
	page->links = g_array_new (FALSE, FALSE, sizeof (DjvuTextLink));
	page->char_symbol = miniexp_symbol ("char");
	page->word_symbol = miniexp_symbol ("word");
	page->text_structure = text;
	return page;
}

/**
 * djvu_text_page_free:
 * @page: #DjvuTextPage instance
 * 
 * Frees the given #DjvuTextPage instance.
 */
void 
djvu_text_page_free (DjvuTextPage *page)
{
	g_free (page->text);
	g_array_free (page->links, TRUE);
	g_free (page);
}