summaryrefslogtreecommitdiff
path: root/backend
diff options
context:
space:
mode:
authorrootavish <[email protected]>2014-08-06 15:34:02 +0530
committerrootavish <[email protected]>2014-08-06 15:34:02 +0530
commit56880392a6678ccec12bbec016939597acd49b07 (patch)
treefd127f796f5127a63f564a5dea82d64f422a9580 /backend
parent5f3f572777cfcb64a92a4671fb2aff5faa5cefa4 (diff)
downloadatril-56880392a6678ccec12bbec016939597acd49b07.tar.bz2
atril-56880392a6678ccec12bbec016939597acd49b07.tar.xz
Searching in epub documents
I added the capability to search through documents. The search is quite buggy, and we'll be taking care of each bug one at a time.
Diffstat (limited to 'backend')
-rw-r--r--backend/epub/epub-document.c92
1 files changed, 58 insertions, 34 deletions
diff --git a/backend/epub/epub-document.c b/backend/epub/epub-document.c
index e9443c09..3a574151 100644
--- a/backend/epub/epub-document.c
+++ b/backend/epub/epub-document.c
@@ -29,14 +29,13 @@
#include "ev-document-misc.h"
#include <libxml/parser.h>
#include <libxml/xmlmemory.h>
-
+#include <libxml/HTMLparser.h>
#include <config.h>
#include <glib/gi18n.h>
#include <glib/gstdio.h>
#include <gtk/gtk.h>
-#include <stdio.h>
/*For strcasestr(),strstr()*/
#include <string.h>
@@ -130,47 +129,72 @@ epub_document_thumbnails_get_thumbnail (EvDocumentThumbnails *document,
}
static gboolean
+in_tag(const char* found)
+{
+ const char* bracket = found ;
+
+ /* Since the dump started with the body tag, the '<' will be the first
+ * character in the haystack.
+ */
+ while (*bracket != '<') {
+ bracket--;
+ if (*bracket == '>') {
+ /*We encounted a close brace before an open*/
+ return FALSE ;
+ }
+ }
+
+ return TRUE;
+}
+
+static int
+get_substr_count(const char * haystack,const char *needle,gboolean case_sensitive)
+{
+ const char* tmp = haystack ;
+ char* (*string_compare_function)(const char*,const char*);
+ int count=0;
+ if (case_sensitive) {
+ string_compare_function = strstr ;
+ }
+ else {
+ string_compare_function = strcasestr;
+ }
+
+ while ((tmp=string_compare_function(tmp,needle))) {
+ if (!in_tag(tmp)) {
+ count++;
+ }
+ tmp = tmp + strlen(needle);
+ }
+
+ return count;
+}
+
+static guint
epub_document_check_hits(EvDocumentFind *document_find,
EvPage *page,
const gchar *text,
gboolean case_sensitive)
{
gchar *filepath = g_filename_from_uri((gchar*)page->backend_page,NULL,NULL);
- FILE *fp = fopen(filepath,"r");
- GString *buffer;
- gchar *found ;
-
- while (!feof(fp)) {
- gchar c;
- gint pos=0;
- buffer = g_string_sized_new (1024);
-
- while ((c = fgetc(fp)) != '\n' && !feof(fp)) {
- g_string_insert_c(buffer,pos++,c);
- }
+ htmlDocPtr htmldoc = xmlParseFile(filepath);
+ htmlNodePtr htmltag = xmlDocGetRootElement(htmldoc);
+ int count=0;
+ htmlNodePtr bodytag = htmltag->xmlChildrenNode;
- g_string_insert_c(buffer,pos,'\0');
-
- if (case_sensitive) {
- if ((found = strstr(buffer->str,text)) != NULL) {
- g_string_free(buffer,TRUE);
- fclose(fp);
- return TRUE;
- }
- }
- else {
-
- if ( (found = strcasestr(buffer->str,text)) != NULL) {
- g_string_free(buffer,TRUE);
- fclose(fp);
- return TRUE;
- }
- }
- g_string_free(buffer,TRUE);
+ while ( xmlStrcmp(bodytag->name,(xmlChar*)"body") ) {
+ bodytag = bodytag->next;
}
- fclose(fp);
- return FALSE;
+ xmlBufferPtr bodybuffer = xmlBufferCreate();
+ xmlNodeDump(bodybuffer,htmldoc,bodytag,0,1);
+
+ count = get_substr_count((char*)bodybuffer->content,text,case_sensitive);
+
+ xmlBufferFree(bodybuffer);
+ xmlFreeDoc(htmldoc);
+
+ return count;
}
static gboolean