From 3fd3e5708b0566b1392fa5e7f37a18de137a43e0 Mon Sep 17 00:00:00 2001 From: rootavish Date: Sat, 7 Jun 2014 05:38:54 +0530 Subject: Redefined most XML parsing functions,more helpers This commit as compared to previous ones has much more changes as: > The xml parsing functions were not generic enough to my needs and needed to be redefined as well as renamed . > I also set up the document tree for the document using these parsers.All that is now left is to serve these to a webview. > Fixed a typo in epub-document.h TODO: integrate with Atril frontend. --- backend/epub/epub-document.c | 414 ++++++++++++++++++++++++++++++++++++++----- backend/epub/epub-document.h | 2 +- 2 files changed, 367 insertions(+), 49 deletions(-) diff --git a/backend/epub/epub-document.c b/backend/epub/epub-document.c index ec40a3b9..826ccf74 100644 --- a/backend/epub/epub-document.c +++ b/backend/epub/epub-document.c @@ -1,27 +1,92 @@ #include "ev-file-helpers.h" #include "epub-document.h" #include "unzip.h" + #include #include + #include + #include +#include /* A variable to hold the path where we extact our ePub */ -static gchar* tmp_dir ; +static gchar* tmp_dir = NULL; /* A variable to hold our epubDocument , for unzip purposes */ static unzFile epubDocument ; +typedef enum _xmlParseReturnType +{ + xmlattribute = 0, + xmlkeyword = 1 + +}xmlParseReturnType; + +struct _DocumentTreeNode { + gchar* key ; + gchar* value ; +}; + +typedef struct _DocumentTreeNode DocumentTreeNode; + +/*Prototypes for some future functions*/ +static gboolean +extract_one_file (GError ** error); + +static gboolean +check_mime_type (const gchar* uri, + GError** error); + +static gboolean +extract_epub_from_container (const gchar* uri, + GError ** error); + +static gboolean +open_xml_document (const gchar* filename); + +static gboolean +set_xml_root_node (xmlChar* rootname); + +static xmlNodePtr +xml_get_pointer_to_node (xmlChar* parserfor, + xmlChar* attributename, + xmlChar* attributevalue); +static void +xml_parse_children_of_node (xmlNodePtr parent, + xmlChar* parserfor, + xmlChar* attributename, + xmlChar* attributevalue); + +static gboolean +xml_check_attribute_value (xmlNode* node, + xmlChar * attributename, + xmlChar* attributevalue); + +static xmlChar* +xml_get_data_from_node (xmlNodePtr node, + xmlParseReturnType rettype, + xmlChar* attributename); + +static void +xml_free_doc(); + +static void +free_tree_nodes (gpointer data); + +static GList* +setup_document_tree (const gchar* content_uri, + GError** error); + /*Global variables for XML parsing*/ -static xmlDocPtr xmldocument ; -static xmlNodePtr xmlroot ; -static xmlChar* xmlkey ; -static xmlChar* retval ; +static xmlDocPtr xmldocument ; +static xmlNodePtr xmlroot ; +static xmlNodePtr retval ; /* **Functions to parse the xml files. **Open a XML document for reading */ -gboolean +static gboolean open_xml_document ( const gchar* filename ) { xmldocument = xmlParseFile(filename); @@ -40,8 +105,8 @@ open_xml_document ( const gchar* filename ) *Check if the root value is same as rootname . *if supplied rootvalue = NULL ,just set root to rootnode . **/ -gboolean -check_xml_root_node(xmlChar* rootname) +static gboolean +set_xml_root_node(xmlChar* rootname) { xmlroot = xmlDocGetRootElement(xmldocument); @@ -66,44 +131,52 @@ check_xml_root_node(xmlChar* rootname) } } -xmlChar* -parse_xml_children(xmlChar* parserfor, - XMLparsereturntype rettype, - xmlChar* attributename ) +static xmlNodePtr +xml_get_pointer_to_node(xmlChar* parserfor, + xmlChar* attributename, + xmlChar* attributevalue ) { - xmlNodePtr topchild,children ; + xmlNodePtr topchild,children ; retval = NULL ; + + if ( !xmlStrcmp( xmlroot->name, parserfor) ) + { + return xmlroot ; + } + topchild = xmlroot->xmlChildrenNode ; - while ( topchild != NULL ) - { + while ( topchild != NULL ) + { if ( !xmlStrcmp(topchild->name,parserfor) ) { - if ( rettype == xmlattribute ) + if ( xml_check_attribute_value(topchild,attributename,attributevalue) == TRUE ) { - retval = xmlGetProp(children,attributename); + retval = topchild; return retval; } - else + else { - retval = xmlNodeListGetString(xmldocument,topchild->xmlChildrenNode, 1); - return retval ; + /*No need to parse children node*/ + topchild = topchild->next ; + continue ; } } - parse_children( topchild , parserfor,rettype,attributename) ; + + xml_parse_children_of_node(topchild , parserfor, attributename, attributevalue) ; topchild = topchild->next ; - } + } return retval ; } static void -parse_children(xmlNodePtr parent, - xmlChar* parserfor, - XMLparsereturntype rettype, - xmlChar* attributename ) +xml_parse_children_of_node(xmlNodePtr parent, + xmlChar* parserfor, + xmlChar* attributename, + xmlChar* attributevalue ) { xmlNodePtr child = parent->xmlChildrenNode ; @@ -111,15 +184,17 @@ parse_children(xmlNodePtr parent, { if ( !xmlStrcmp(child->name,parserfor)) { - if ( rettype == xmlattribute ) + if ( xml_check_attribute_value(child,attributename,attributevalue) == TRUE ) { - retval = xmlGetProp(child,attributename); + retval = child; + return ; } - else + else { - retval = xmlNodeListGetString(xmldocument,child->xmlChildrenNode, 1); + /*No need to parse children node*/ + child = child->next ; + continue ; } - return ; } /*return already if we have retval set*/ @@ -128,17 +203,47 @@ parse_children(xmlNodePtr parent, return ; } - parse_children(child,parserfor,rettype,attributename) ; + xml_parse_children_of_node(child,parserfor,attributename,attributevalue) ; child = child->next ; } } static void -xml_free_all() +xml_free_doc() { xmlFreeDoc(xmldocument); - xmlFree(retval); - xmlFree(xmlkey); +} + +static gboolean +xml_check_attribute_value(xmlNode* node, + xmlChar * attributename, + xmlChar* attributevalue) +{ + xmlChar* attributefromfile ; + if ( attributename == NULL || attributevalue == NULL ) + { + return TRUE ; + } + else if ( !xmlStrcmp(( attributefromfile = xmlGetProp(node,attributename)), + attributevalue) ) + { + xmlFree(attributefromfile); + return TRUE ; + } + xmlFree(attributefromfile); + return FALSE ; +} + +static xmlChar* +xml_get_data_from_node(xmlNodePtr node,xmlParseReturnType rettype,xmlChar* attributename) +{ + xmlChar* datastring ; + if ( rettype == xmlattribute ) + datastring= xmlGetProp(node,attributename); + else + datastring= xmlNodeListGetString(xmldocument,node->xmlChildrenNode, 1); + + return datastring; } static gboolean @@ -166,15 +271,11 @@ check_mime_type(const gchar* uri,GError** error) } else { - if (err) { - g_propagate_error (error, err); - } - else { - g_set_error_literal (error, - EV_DOCUMENT_ERROR, - EV_DOCUMENT_ERROR_INVALID, - _("Not an ePub document")); - } + g_set_error_literal (error, + EV_DOCUMENT_ERROR, + EV_DOCUMENT_ERROR_INVALID, + _("Not an ePub document")); + return FALSE; } } @@ -272,7 +373,7 @@ extract_epub_from_container (const gchar* uri, GError ** error) return TRUE ; } -gboolean +static gboolean extract_one_file(GError ** error) { GFile * outfile ; @@ -296,8 +397,7 @@ extract_one_file(GError ** error) directory++; gfilepath = g_string_new(tmp_dir) ; - g_string_append(gfilepath,"/"); - g_string_append(gfilepath,currentfilename); + g_string_append_printf(gfilepath,"/%s",(gchar*)currentfilename); /*if we encounter a directory, make a directory inside our temporary folder.*/ if (directory != NULL && *directory == '\0') @@ -321,8 +421,226 @@ extract_one_file(GError ** error) } unzCloseCurrentFile (epubDocument) ; - g_string_free(gfilepath); + g_string_free(gfilepath,TRUE); g_free(currentfilename); g_free(buffer); } + +static gchar* get_uri_to_content(const gchar* uri,GError ** error) +{ + GError * err = NULL ; + gchar* containerpath = g_filename_from_uri(uri,NULL,&err); + GString* absolutepath = g_string_new(NULL); + gchar* content_uri ; + xmlNodePtr rootfileNode ; + xmlChar* relativepath; + if ( !containerpath ) + { + if (err) { + g_propagate_error (error,err); + } + else { + g_set_error_literal (error, + EV_DOCUMENT_ERROR, + EV_DOCUMENT_ERROR_INVALID, + _("could not retrieve container file")); + } + return NULL ; + } + + if ( open_xml_document(containerpath) == FALSE ) + { + g_set_error_literal(error, + EV_DOCUMENT_ERROR, + EV_DOCUMENT_ERROR_INVALID, + _("could not open container file")); + + return NULL ; + } + + if ( set_xml_root_node("container") == FALSE) { + + g_set_error_literal(error, + EV_DOCUMENT_ERROR, + EV_DOCUMENT_ERROR_INVALID, + _("container file is corrupt")); + return NULL ; + } + + if ( (rootfileNode = xml_get_pointer_to_node("rootfile","media-type","application/oebps-package+xml")) == NULL) + { + g_set_error_literal(error, + EV_DOCUMENT_ERROR, + EV_DOCUMENT_ERROR_INVALID, + _("epub file is invalid or corrput")); + return NULL ; + } + + relativepath = xml_get_data_from_node(rootfileNode,xmlattribute,(xmlChar*)"full-path") ; + + if ( relativepath == NULL ) + { + g_set_error_literal(error, + EV_DOCUMENT_ERROR, + EV_DOCUMENT_ERROR_INVALID, + _("epub file is corrupt,no container")); + return NULL ; + } + g_string_printf(absolutepath,"%s/%s",tmp_dir,relativepath); + + content_uri = g_filename_to_uri(absolutepath->str,NULL,&err); + if ( !content_uri ) { + if (err) { + g_propagate_error (error,err); + } + else { + g_set_error_literal (error, + EV_DOCUMENT_ERROR, + EV_DOCUMENT_ERROR_INVALID, + _("could not retrieve container file")); + } + return NULL ; + } + free(absolutepath); + + return content_uri ; +} + +static GList* +setup_document_tree(const gchar* content_uri, GError** error) +{ + GList* newlist = NULL ; + GError * err = NULL ; + gchar* contentOpf="/home/rootavish/Downloads/zlib/progit/content.opf"; + xmlNodePtr manifest,spine,itemrefptr,itemptr ; + gboolean errorflag = FALSE; + + gchar* relativepath ; + GString* absolutepath = g_string_new(NULL); + + if ( open_xml_document(contentOpf) == FALSE ) + { + g_set_error_literal(error, + EV_DOCUMENT_ERROR, + EV_DOCUMENT_ERROR_INVALID, + _("could not parse content manifest")); + + return FALSE ; + } + if ( set_xml_root_node("package") == FALSE) { + + g_set_error_literal(error, + EV_DOCUMENT_ERROR, + EV_DOCUMENT_ERROR_INVALID, + _("content file is invalid")); + return FALSE ; + } + + if ( ( spine = xml_get_pointer_to_node("spine",NULL,NULL) )== NULL ) + { + g_set_error_literal(error, + EV_DOCUMENT_ERROR, + EV_DOCUMENT_ERROR_INVALID, + _("epub file has no spine")); + return FALSE ; + } + + if ( ( manifest = xml_get_pointer_to_node("manifest",NULL,NULL) )== NULL ) + { + g_set_error_literal(error, + EV_DOCUMENT_ERROR, + EV_DOCUMENT_ERROR_INVALID, + _("epub file has no manifest")); + return FALSE ; + } + + retval = NULL ; + + /*Get first instance of itemref from the spine*/ + xml_parse_children_of_node(spine,"itemref",NULL,NULL); + + if ( retval != NULL ) + itemrefptr = retval ; + else + { + errorflag=TRUE; + } + /*Parse the spine for remaining itemrefs*/ + do + { + /*for the first time that we enter the loop, if errorflag is set we break*/ + if ( errorflag ) + { + break; + } + if ( xmlStrcmp(itemrefptr->name,"itemref") == 0) + { + DocumentTreeNode* newnode = g_malloc0(sizeof(newnode)); + newnode->key = xml_get_data_from_node(itemrefptr,xmlattribute,(xmlChar*)"idref"); + + if ( newnode->key == NULL ) + { + errorflag =TRUE; + break; + } + retval=NULL ; + xml_parse_children_of_node(manifest,"item","id",newnode->key); + + if ( retval != NULL ) + { + itemptr = retval ; + } + else + { + errorflag=TRUE; + break; + } + relativepath = xml_get_data_from_node(itemptr,xmlattribute,(xmlChar*)"href"); + + g_string_assign(absolutepath,tmp_dir); + g_string_append_printf(absolutepath,"/%s",relativepath); + newnode->value = g_filename_to_uri(absolutepath->str,NULL,&err); + if ( newnode->value == NULL ) + { + errorflag =TRUE; + break; + } + newlist = g_list_prepend(newlist,newnode); + } + itemrefptr = itemrefptr->next ; + } + while ( itemrefptr != NULL ); + + if ( errorflag ) + { + if ( err ) + { + g_propagate_error(error,err); + } + else + { + g_set_error_literal(error, + EV_DOCUMENT_ERROR, + EV_DOCUMENT_ERROR_INVALID, + _("Could not set up document tree for loading, some files missing")); + } + /*free any nodes that were set up and return empty*/ + g_string_free(absolutepath,TRUE); + g_list_free_full(newlist,(GDestroyNotify)free_tree_nodes); + return NULL ; + } + + g_string_free(absolutepath,TRUE); + return newlist ; + +} + +static void +free_tree_nodes(gpointer data) +{ + DocumentTreeNode* dataptr = data ; + g_free(dataptr->value); + g_free(dataptr->key); + g_free(dataptr); +} \ No newline at end of file diff --git a/backend/epub/epub-document.h b/backend/epub/epub-document.h index 5fff2514..db9c99e5 100644 --- a/backend/epub/epub-document.h +++ b/backend/epub/epub-document.h @@ -11,7 +11,7 @@ G_BEGIN_DECLS typedef struct _ePubDocument ePubDocument; -GType ePub_document_get_type (void) G_GNUC_CONST; +GType epub_document_get_type (void) G_GNUC_CONST; G_MODULE_EXPORT GType register_atril_backend (GTypeModule *module); -- cgit v1.2.1