From 5f3f572777cfcb64a92a4671fb2aff5faa5cefa4 Mon Sep 17 00:00:00 2001 From: rootavish Date: Sun, 3 Aug 2014 12:23:21 +0530 Subject: Table of contents(index) for epub The documents can finally be read alongside a TOC, I tested it with a variety of documents before this commit, so I presume it should work with all. Also fixed the part where I had screwed up PDF annotations owing to a wrong call. In the next commit, fully functional searching would be up, using HTML parser of libxml in combination with webkit. Also some other minor changes. --- backend/epub/epub-document.c | 472 +++++++++++++++++++++++++++++++++---------- 1 file changed, 361 insertions(+), 111 deletions(-) (limited to 'backend/epub/epub-document.c') diff --git a/backend/epub/epub-document.c b/backend/epub/epub-document.c index 56d7e67d..e9443c09 100644 --- a/backend/epub/epub-document.c +++ b/backend/epub/epub-document.c @@ -25,6 +25,7 @@ #include "unzip.h" #include "ev-document-thumbnails.h" #include "ev-document-find.h" +#include "ev-document-links.h" #include "ev-document-misc.h" #include #include @@ -34,17 +35,10 @@ #include #include -#if GTK_CHECK_VERSION(3, 0, 0) - #include -#else - #include -#endif - #include #include -/*For strcasestr()*/ - +/*For strcasestr(),strstr()*/ #include typedef enum _xmlParseReturnType @@ -59,6 +53,12 @@ typedef struct _contentListNode { gint index ; }contentListNode; +typedef struct _linknode { + gchar *pagelink; + gchar *linktext; + guint page; +}linknode; + typedef struct _EpubDocumentClass EpubDocumentClass; struct _EpubDocumentClass @@ -79,17 +79,25 @@ struct _EpubDocument unzFile epubDocument ; /*The (sub)directory that actually houses the document*/ gchar* documentdir; + /*Stores the table of contents*/ + GList *index; + /*Document title, for the sidebar links*/ + gchar *docTitle; }; static void epub_document_document_thumbnails_iface_init (EvDocumentThumbnailsInterface *iface); -static void epub_document_find_iface_init (EvDocumentFindInterface *iface); +static void epub_document_document_find_iface_init (EvDocumentFindInterface *iface); +static void epub_document_document_links_iface_init (EvDocumentLinksInterface *iface); EV_BACKEND_REGISTER_WITH_CODE (EpubDocument, epub_document, { EV_BACKEND_IMPLEMENT_INTERFACE (EV_TYPE_DOCUMENT_THUMBNAILS, epub_document_document_thumbnails_iface_init); EV_BACKEND_IMPLEMENT_INTERFACE (EV_TYPE_DOCUMENT_FIND, - epub_document_find_iface_init); + epub_document_document_find_iface_init); + EV_BACKEND_IMPLEMENT_INTERFACE (EV_TYPE_DOCUMENT_LINKS, + epub_document_document_links_iface_init); + } ); static void @@ -165,6 +173,108 @@ epub_document_check_hits(EvDocumentFind *document_find, return FALSE; } +static gboolean +epub_document_links_has_document_links(EvDocumentLinks *document_links) +{ + EpubDocument *epub_document = EPUB_DOCUMENT(document_links); + + g_return_if_fail(EPUB_IS_DOCUMENT(epub_document)); + + if (!epub_document->index) + return FALSE; + + return TRUE; +} + + +typedef struct _LinksCBStruct { + GtkTreeModel *model; + GtkTreeIter *parent; +}LinksCBStruct; + +static void +epub_document_make_tree_entry(linknode* ListData,LinksCBStruct* UserData) +{ + GtkTreeIter tree_iter; + EvLink *link = NULL; + gboolean expand; + char *title_markup; + + //These are all children of the document title, and have no chlidren nodes + expand = FALSE; + + EvLinkDest *ev_dest = NULL; + EvLinkAction *ev_action; + + /* We shall use a EV_LINK_DEST_TYPE_PAGE for page links, + * and a EV_LINK_DEST_TYPE_HLINK(custom) for refs on a page of type url#label + * because we need both dest and page label for this. + */ + + if (g_strrstr(ListData->pagelink,"#") == NULL) { + ev_dest = ev_link_dest_new_page(ListData->page); + } + else { + ev_dest = ev_link_dest_new_hlink((gchar*)ListData->pagelink,ListData->page); + } + + ev_action = ev_link_action_new_dest (ev_dest); + + link = ev_link_new((gchar*)ListData->linktext,ev_action); + + gtk_tree_store_append (GTK_TREE_STORE (UserData->model), &tree_iter,(UserData->parent)); + title_markup = g_strdup((gchar*)ListData->linktext); + + gtk_tree_store_set (GTK_TREE_STORE (UserData->model), &tree_iter, + EV_DOCUMENT_LINKS_COLUMN_MARKUP, title_markup, + EV_DOCUMENT_LINKS_COLUMN_LINK, link, + EV_DOCUMENT_LINKS_COLUMN_EXPAND, expand, + -1); + + g_free (title_markup); + g_object_unref (link); +} + +static GtkTreeModel * +epub_document_links_get_links_model(EvDocumentLinks *document_links) +{ + GtkTreeModel *model = NULL; + + g_return_val_if_fail (EPUB_IS_DOCUMENT (document_links), NULL); + + EpubDocument *epub_document = EPUB_DOCUMENT(document_links); + + model = (GtkTreeModel*) gtk_tree_store_new (EV_DOCUMENT_LINKS_COLUMN_NUM_COLUMNS, + G_TYPE_STRING, + G_TYPE_OBJECT, + G_TYPE_BOOLEAN, + G_TYPE_STRING); + + LinksCBStruct linkStruct; + linkStruct.model = model; + EvLink *link = ev_link_new(epub_document->docTitle, + ev_link_action_new_dest(ev_link_dest_new_page(0))); + GtkTreeIter parent; + + linkStruct.parent = &parent; + + gtk_tree_store_append (GTK_TREE_STORE (model), &parent,NULL); + + gtk_tree_store_set (GTK_TREE_STORE (model), &parent, + EV_DOCUMENT_LINKS_COLUMN_MARKUP, epub_document->docTitle, + EV_DOCUMENT_LINKS_COLUMN_LINK, link, + EV_DOCUMENT_LINKS_COLUMN_EXPAND, TRUE, + -1); + + g_object_unref(link); + + if (epub_document->index) { + g_list_foreach (epub_document->index,(GFunc)epub_document_make_tree_entry,&linkStruct); + } + + return model; +} + static void epub_document_document_thumbnails_iface_init (EvDocumentThumbnailsInterface *iface) { @@ -173,11 +283,18 @@ epub_document_document_thumbnails_iface_init (EvDocumentThumbnailsInterface *ifa } static void -epub_document_find_iface_init (EvDocumentFindInterface *iface) +epub_document_document_find_iface_init (EvDocumentFindInterface *iface) { iface->check_for_hits = epub_document_check_hits; } +static void +epub_document_document_links_iface_init(EvDocumentLinksInterface *iface) +{ + iface->has_document_links = epub_document_links_has_document_links; + iface->get_links_model = epub_document_links_get_links_model; +} + static gboolean epub_document_save (EvDocument *document, const char *uri, @@ -190,7 +307,7 @@ epub_document_save (EvDocument *document, static int epub_document_get_n_pages (EvDocument *document) -{ +{ EpubDocument *epub_document = EPUB_DOCUMENT (document); if (epub_document-> contentList == NULL) @@ -404,6 +521,7 @@ static void xml_free_doc() { xmlFreeDoc(xmldocument); + xmldocument = NULL; } static gboolean @@ -750,18 +868,30 @@ get_uri_to_content(const gchar* uri,GError ** error,EpubDocument *epub_document) } g_string_free(absolutepath,TRUE); g_free(directorybuffer); + xml_free_doc(); return content_uri ; } +static gboolean +link_present_on_page(const gchar* link,const gchar *page_uri) +{ + if (g_strrstr(link, page_uri)) { + return TRUE; + } + else { + return FALSE; + } +} + static GList* -setup_document_content_list(const gchar* content_uri, GError** error,gchar *documentdir) +setup_document_content_list(const gchar* content_uri, GError** error,gchar *documentdir,GList *docindex) { GList* newlist = NULL ; GError * err = NULL ; gint indexcounter= 1; xmlNodePtr manifest,spine,itemrefptr,itemptr ; gboolean errorflag = FALSE; - + GList *indexcopy = docindex,*indexcopyiter = docindex; gchar* relativepath ; GString* absolutepath = g_string_new(NULL); @@ -815,6 +945,7 @@ setup_document_content_list(const gchar* content_uri, GError** error,gchar *docu /*Parse the spine for remaining itemrefs*/ do { + indexcopyiter = indexcopy ; /*for the first time that we enter the loop, if errorflag is set we break*/ if ( errorflag ) { @@ -850,8 +981,21 @@ setup_document_content_list(const gchar* content_uri, GError** error,gchar *docu errorflag =TRUE; break; } - + newnode->index = indexcounter++ ; + + /* NOTE:Because the TOC is not always in a sorted manner, we need to check all remaining pages every time. + */ + while (indexcopyiter != NULL) { + linknode *linkdata = indexcopyiter->data; + + if (link_present_on_page(linkdata->pagelink,newnode->value)) { + linkdata->page = newnode->index - 1; + indexcopy = indexcopy->next; + } + indexcopyiter = indexcopyiter->next; + } + newlist = g_list_prepend(newlist,newnode); } itemrefptr = itemrefptr->next ; @@ -878,6 +1022,7 @@ setup_document_content_list(const gchar* content_uri, GError** error,gchar *docu } newlist = g_list_reverse(newlist); g_string_free(absolutepath,TRUE); + xml_free_doc(); return newlist ; } @@ -893,113 +1038,109 @@ free_tree_nodes(gpointer data) } static void -epub_document_init (EpubDocument *epub_document) +free_link_nodes(gpointer data) { - epub_document->archivename = NULL ; - epub_document->tmp_archive_dir = NULL ; - epub_document->contentList = NULL ; - epub_document->documentdir = NULL; + linknode* dataptr = data ; + g_free(dataptr->pagelink); + g_free(dataptr->linktext); + g_free(dataptr); } -typedef struct _linknode { - guint page; - gchar *linktext; -}linknode; - -static void -setup_document_index(EpubDocument *epub_document,const gchar* contentUri) +static gchar* +get_toc_file_name(gchar *containeruri) { - linknode *index; + gchar *containerfilename = g_filename_from_uri(containeruri,NULL,NULL); -} + open_xml_document(containerfilename); -static gboolean -epub_document_load (EvDocument* document, - const char* uri, - GError** error) -{ - EpubDocument *epub_document = EPUB_DOCUMENT(document); - GError* err = NULL ; - gchar* containeruri ; - GString *containerpath ; - gchar* contentOpfUri ; - if ( check_mime_type (uri,&err) == FALSE ) - { - /*Error would've been set by the function*/ - g_propagate_error(error,err); - return FALSE; - } + set_xml_root_node(NULL); - extract_epub_from_container (uri,epub_document,&err); + xmlNodePtr manifest = xml_get_pointer_to_node((xmlChar*)"manifest",NULL,NULL); + xmlNodePtr spine = xml_get_pointer_to_node((xmlChar*)"spine",NULL,NULL); - if ( err ) - { - g_propagate_error( error,err ); - return FALSE; - } + xmlChar *ncx = xml_get_data_from_node(spine,XML_ATTRIBUTE,(xmlChar*)"toc"); + xmlretval = NULL; + xml_parse_children_of_node(manifest,(xmlChar*)"item",(xmlChar*)"id",ncx); - /*FIXME : can this be different, ever?*/ - containerpath = g_string_new(epub_document->tmp_archive_dir); - g_string_append_printf(containerpath,"/META-INF/container.xml"); - containeruri = g_filename_to_uri(containerpath->str,NULL,&err); + gchar* tocfilename = (gchar*)xml_get_data_from_node(xmlretval,XML_ATTRIBUTE,(xmlChar*)"href"); + xml_free_doc(); - if ( err ) - { - g_propagate_error(error,err); - return FALSE; - } - contentOpfUri = get_uri_to_content (containeruri,&err,epub_document); + return tocfilename; +} - if ( contentOpfUri == NULL ) - { - g_propagate_error(error,err); - return FALSE; +static GList* +setup_document_index(EpubDocument *epub_document,gchar *containeruri) +{ + GString *tocpath = g_string_new(epub_document->documentdir); + gchar *tocfilename = get_toc_file_name(containeruri); + GList *index = NULL; + g_string_append_printf (tocpath,"/%s",tocfilename); + GString *pagelink; + open_xml_document(tocpath->str); + g_string_free(tocpath,TRUE); + set_xml_root_node((xmlChar*)"ncx"); + + xmlNodePtr docTitle = xml_get_pointer_to_node((xmlChar*)"docTitle",NULL,NULL); + xmlretval = NULL; + xml_parse_children_of_node(docTitle,(xmlChar*)"text",NULL,NULL); + + while (epub_document->docTitle == NULL && xmlretval != NULL) { + epub_document->docTitle = (gchar*)xml_get_data_from_node(xmlretval,XML_KEYWORD,NULL); + xmlretval = xmlretval->next; } + xmlNodePtr navMap = xml_get_pointer_to_node((xmlChar*)"navMap",NULL,NULL); + xmlretval = NULL; + xml_parse_children_of_node(navMap,(xmlChar*)"navPoint",NULL,NULL); + + xmlNodePtr navPoint = xmlretval; + + do { + + if ( !xmlStrcmp(navPoint->name,(xmlChar*)"navPoint")) { + xmlretval = NULL; + xml_parse_children_of_node(navPoint,(xmlChar*)"navLabel",NULL,NULL); + xmlNodePtr navLabel = xmlretval; + xmlretval = NULL; + gchar *fragment=NULL,*end=NULL; + GString *uri = NULL; + + xml_parse_children_of_node(navLabel,(xmlChar*)"text",NULL,NULL); + linknode *newnode = g_new0(linknode,1); + newnode->linktext = NULL; + while (newnode->linktext == NULL) { + newnode->linktext = (gchar*)xml_get_data_from_node(xmlretval,XML_KEYWORD,NULL); + xmlretval = xmlretval->next; + } + xmlretval = NULL; + xml_parse_children_of_node(navPoint,(xmlChar*)"content",NULL,NULL); + pagelink = g_string_new(epub_document->documentdir); + newnode->pagelink = (gchar*)xml_get_data_from_node(xmlretval,XML_ATTRIBUTE,(xmlChar*)"src"); + g_string_append_printf(pagelink,"/%s",newnode->pagelink); + xmlFree(newnode->pagelink); + + if ((end = g_strrstr(pagelink->str,"#")) != NULL) { + fragment = g_strdup(g_strrstr(pagelink->str,"#")); + *end = '\0'; + } + uri = g_string_new(g_filename_to_uri(pagelink->str,NULL,NULL)); + g_string_free(pagelink,TRUE); + + if (fragment) { + g_string_append(uri,fragment); + } - xml_free_doc() ; - - epub_document->contentList = setup_document_content_list (contentOpfUri,&err,epub_document->documentdir); - - if ( xmldocument != NULL ) - xml_free_doc (); - - if ( epub_document->contentList == NULL ) - { - g_propagate_error(error,err); - return FALSE; - } + newnode->pagelink = g_strdup(uri->str); + g_string_free(uri,TRUE); + index = g_list_prepend(index,newnode); + } + + navPoint = navPoint->next; - return TRUE ; -} + } while(navPoint != NULL); -static void -epub_document_finalize (GObject *object) -{ - EpubDocument *epub_document = EPUB_DOCUMENT (object); - - if (epub_document->epubDocument != NULL) { - if (epub_remove_temporary_dir (epub_document->tmp_archive_dir) == -1) - g_warning (_("There was an error deleting ā€œ%sā€."), - epub_document->tmp_archive_dir); - } - - if ( epub_document->contentList ) { - g_list_free_full(epub_document->contentList,(GDestroyNotify)free_tree_nodes); - epub_document->contentList = NULL; - } - if ( epub_document->tmp_archive_dir) { - g_free (epub_document->tmp_archive_dir); - epub_document->tmp_archive_dir = NULL; - } - if ( epub_document->archivename) { - g_free (epub_document->archivename); - epub_document->archivename = NULL; - } - if ( epub_document->documentdir) { - g_free (epub_document->documentdir); - epub_document->documentdir = NULL; - } - G_OBJECT_CLASS (epub_document_parent_class)->finalize (object); + xml_free_doc(); + + return g_list_reverse(index); } static EvDocumentInfo* @@ -1035,9 +1176,6 @@ epub_document_get_info(EvDocument *document) EV_DOCUMENT_INFO_LINEARIZED | EV_DOCUMENT_INFO_N_PAGES ; - if ( xmldocument != NULL ) - xml_free_doc(); - infofile = g_filename_from_uri(uri,NULL,&error); if ( error ) return epubinfo; @@ -1085,6 +1223,9 @@ epub_document_get_info(EvDocument *document) g_free(uri); g_string_free(containerpath,TRUE); g_string_free(buffer,TRUE); + + if (xmldocument) + xml_free_doc(); return epubinfo ; } @@ -1099,6 +1240,115 @@ epub_document_get_page(EvDocument *document, return page ; } + +static gboolean +epub_document_load (EvDocument* document, + const char* uri, + GError** error) +{ + EpubDocument *epub_document = EPUB_DOCUMENT(document); + GError* err = NULL ; + gchar* containeruri ; + GString *containerpath ; + gchar* contentOpfUri ; + if ( check_mime_type (uri,&err) == FALSE ) + { + /*Error would've been set by the function*/ + g_propagate_error(error,err); + return FALSE; + } + + extract_epub_from_container (uri,epub_document,&err); + + if ( err ) + { + g_propagate_error( error,err ); + return FALSE; + } + + /*FIXME : can this be different, ever?*/ + containerpath = g_string_new(epub_document->tmp_archive_dir); + g_string_append_printf(containerpath,"/META-INF/container.xml"); + containeruri = g_filename_to_uri(containerpath->str,NULL,&err); + + if ( err ) + { + g_propagate_error(error,err); + return FALSE; + } + contentOpfUri = get_uri_to_content (containeruri,&err,epub_document); + + if ( contentOpfUri == NULL ) + { + g_propagate_error(error,err); + return FALSE; + } + + epub_document->index = setup_document_index(epub_document,contentOpfUri); + + epub_document->contentList = setup_document_content_list (contentOpfUri,&err,epub_document->documentdir,epub_document->index); + + if ( epub_document->contentList == NULL ) + { + g_propagate_error(error,err); + return FALSE; + } + + return TRUE ; +} + +static void +epub_document_init (EpubDocument *epub_document) +{ + epub_document->archivename = NULL ; + epub_document->tmp_archive_dir = NULL ; + epub_document->contentList = NULL ; + epub_document->documentdir = NULL; + epub_document->index = NULL; + epub_document->docTitle = NULL; +} + +static void +epub_document_finalize (GObject *object) +{ + EpubDocument *epub_document = EPUB_DOCUMENT (object); + + if (epub_document->epubDocument != NULL) { + if (epub_remove_temporary_dir (epub_document->tmp_archive_dir) == -1) + g_warning (_("There was an error deleting ā€œ%sā€."), + epub_document->tmp_archive_dir); + } + + if ( epub_document->contentList ) { + g_list_free_full(epub_document->contentList,(GDestroyNotify)free_tree_nodes); + epub_document->contentList = NULL; + } + + if (epub_document->index) { + g_list_free_full(epub_document->index,(GDestroyNotify)free_link_nodes); + epub_document->index = NULL; + } + + if ( epub_document->tmp_archive_dir) { + g_free (epub_document->tmp_archive_dir); + epub_document->tmp_archive_dir = NULL; + } + + if (epub_document->docTitle) { + g_free(epub_document->docTitle); + epub_document->docTitle = NULL; + } + if ( epub_document->archivename) { + g_free (epub_document->archivename); + epub_document->archivename = NULL; + } + if ( epub_document->documentdir) { + g_free (epub_document->documentdir); + epub_document->documentdir = NULL; + } + G_OBJECT_CLASS (epub_document_parent_class)->finalize (object); +} + static void epub_document_class_init (EpubDocumentClass *klass) { @@ -1111,4 +1361,4 @@ epub_document_class_init (EpubDocumentClass *klass) ev_document_class->get_n_pages = epub_document_get_n_pages; ev_document_class->get_info = epub_document_get_info; ev_document_class->get_page = epub_document_get_page; -} +} \ No newline at end of file -- cgit v1.2.1