summaryrefslogtreecommitdiff
path: root/backend/epub
diff options
context:
space:
mode:
authorrootavish <[email protected]>2014-06-07 05:38:54 +0530
committerrootavish <[email protected]>2014-06-07 05:38:54 +0530
commit3fd3e5708b0566b1392fa5e7f37a18de137a43e0 (patch)
tree41ff633ba7e666c95f1e48aa6f3a0b9493beab87 /backend/epub
parent415b008bff9de46176ed39daa2ffece45cdd2506 (diff)
downloadatril-3fd3e5708b0566b1392fa5e7f37a18de137a43e0.tar.bz2
atril-3fd3e5708b0566b1392fa5e7f37a18de137a43e0.tar.xz
Redefined most XML parsing functions,more helpers
This commit as compared to previous ones has much more changes as: > The xml parsing functions were not generic enough to my needs and needed to be redefined as well as renamed . > I also set up the document tree for the document using these parsers.All that is now left is to serve these to a webview. > Fixed a typo in epub-document.h TODO: integrate with Atril frontend.
Diffstat (limited to 'backend/epub')
-rw-r--r--backend/epub/epub-document.c414
-rw-r--r--backend/epub/epub-document.h2
2 files changed, 367 insertions, 49 deletions
diff --git a/backend/epub/epub-document.c b/backend/epub/epub-document.c
index ec40a3b9..826ccf74 100644
--- a/backend/epub/epub-document.c
+++ b/backend/epub/epub-document.c
@@ -1,27 +1,92 @@
#include "ev-file-helpers.h"
#include "epub-document.h"
#include "unzip.h"
+
#include <libxml/parser.h>
#include <libxml/xmlmemory.h>
+
#include <config.h>
+
#include <glib/gi18n.h>
+#include <glib/gstdio.h>
/* A variable to hold the path where we extact our ePub */
-static gchar* tmp_dir ;
+static gchar* tmp_dir = NULL;
/* A variable to hold our epubDocument , for unzip purposes */
static unzFile epubDocument ;
+typedef enum _xmlParseReturnType
+{
+ xmlattribute = 0,
+ xmlkeyword = 1
+
+}xmlParseReturnType;
+
+struct _DocumentTreeNode {
+ gchar* key ;
+ gchar* value ;
+};
+
+typedef struct _DocumentTreeNode DocumentTreeNode;
+
+/*Prototypes for some future functions*/
+static gboolean
+extract_one_file (GError ** error);
+
+static gboolean
+check_mime_type (const gchar* uri,
+ GError** error);
+
+static gboolean
+extract_epub_from_container (const gchar* uri,
+ GError ** error);
+
+static gboolean
+open_xml_document (const gchar* filename);
+
+static gboolean
+set_xml_root_node (xmlChar* rootname);
+
+static xmlNodePtr
+xml_get_pointer_to_node (xmlChar* parserfor,
+ xmlChar* attributename,
+ xmlChar* attributevalue);
+static void
+xml_parse_children_of_node (xmlNodePtr parent,
+ xmlChar* parserfor,
+ xmlChar* attributename,
+ xmlChar* attributevalue);
+
+static gboolean
+xml_check_attribute_value (xmlNode* node,
+ xmlChar * attributename,
+ xmlChar* attributevalue);
+
+static xmlChar*
+xml_get_data_from_node (xmlNodePtr node,
+ xmlParseReturnType rettype,
+ xmlChar* attributename);
+
+static void
+xml_free_doc();
+
+static void
+free_tree_nodes (gpointer data);
+
+static GList*
+setup_document_tree (const gchar* content_uri,
+ GError** error);
+
/*Global variables for XML parsing*/
-static xmlDocPtr xmldocument ;
-static xmlNodePtr xmlroot ;
-static xmlChar* xmlkey ;
-static xmlChar* retval ;
+static xmlDocPtr xmldocument ;
+static xmlNodePtr xmlroot ;
+static xmlNodePtr retval ;
/*
**Functions to parse the xml files.
**Open a XML document for reading
*/
-gboolean
+static gboolean
open_xml_document ( const gchar* filename )
{
xmldocument = xmlParseFile(filename);
@@ -40,8 +105,8 @@ open_xml_document ( const gchar* filename )
*Check if the root value is same as rootname .
*if supplied rootvalue = NULL ,just set root to rootnode .
**/
-gboolean
-check_xml_root_node(xmlChar* rootname)
+static gboolean
+set_xml_root_node(xmlChar* rootname)
{
xmlroot = xmlDocGetRootElement(xmldocument);
@@ -66,44 +131,52 @@ check_xml_root_node(xmlChar* rootname)
}
}
-xmlChar*
-parse_xml_children(xmlChar* parserfor,
- XMLparsereturntype rettype,
- xmlChar* attributename )
+static xmlNodePtr
+xml_get_pointer_to_node(xmlChar* parserfor,
+ xmlChar* attributename,
+ xmlChar* attributevalue )
{
- xmlNodePtr topchild,children ;
+ xmlNodePtr topchild,children ;
retval = NULL ;
+
+ if ( !xmlStrcmp( xmlroot->name, parserfor) )
+ {
+ return xmlroot ;
+ }
+
topchild = xmlroot->xmlChildrenNode ;
- while ( topchild != NULL )
- {
+ while ( topchild != NULL )
+ {
if ( !xmlStrcmp(topchild->name,parserfor) )
{
- if ( rettype == xmlattribute )
+ if ( xml_check_attribute_value(topchild,attributename,attributevalue) == TRUE )
{
- retval = xmlGetProp(children,attributename);
+ retval = topchild;
return retval;
}
- else
+ else
{
- retval = xmlNodeListGetString(xmldocument,topchild->xmlChildrenNode, 1);
- return retval ;
+ /*No need to parse children node*/
+ topchild = topchild->next ;
+ continue ;
}
}
- parse_children( topchild , parserfor,rettype,attributename) ;
+
+ xml_parse_children_of_node(topchild , parserfor, attributename, attributevalue) ;
topchild = topchild->next ;
- }
+ }
return retval ;
}
static void
-parse_children(xmlNodePtr parent,
- xmlChar* parserfor,
- XMLparsereturntype rettype,
- xmlChar* attributename )
+xml_parse_children_of_node(xmlNodePtr parent,
+ xmlChar* parserfor,
+ xmlChar* attributename,
+ xmlChar* attributevalue )
{
xmlNodePtr child = parent->xmlChildrenNode ;
@@ -111,15 +184,17 @@ parse_children(xmlNodePtr parent,
{
if ( !xmlStrcmp(child->name,parserfor))
{
- if ( rettype == xmlattribute )
+ if ( xml_check_attribute_value(child,attributename,attributevalue) == TRUE )
{
- retval = xmlGetProp(child,attributename);
+ retval = child;
+ return ;
}
- else
+ else
{
- retval = xmlNodeListGetString(xmldocument,child->xmlChildrenNode, 1);
+ /*No need to parse children node*/
+ child = child->next ;
+ continue ;
}
- return ;
}
/*return already if we have retval set*/
@@ -128,17 +203,47 @@ parse_children(xmlNodePtr parent,
return ;
}
- parse_children(child,parserfor,rettype,attributename) ;
+ xml_parse_children_of_node(child,parserfor,attributename,attributevalue) ;
child = child->next ;
}
}
static void
-xml_free_all()
+xml_free_doc()
{
xmlFreeDoc(xmldocument);
- xmlFree(retval);
- xmlFree(xmlkey);
+}
+
+static gboolean
+xml_check_attribute_value(xmlNode* node,
+ xmlChar * attributename,
+ xmlChar* attributevalue)
+{
+ xmlChar* attributefromfile ;
+ if ( attributename == NULL || attributevalue == NULL )
+ {
+ return TRUE ;
+ }
+ else if ( !xmlStrcmp(( attributefromfile = xmlGetProp(node,attributename)),
+ attributevalue) )
+ {
+ xmlFree(attributefromfile);
+ return TRUE ;
+ }
+ xmlFree(attributefromfile);
+ return FALSE ;
+}
+
+static xmlChar*
+xml_get_data_from_node(xmlNodePtr node,xmlParseReturnType rettype,xmlChar* attributename)
+{
+ xmlChar* datastring ;
+ if ( rettype == xmlattribute )
+ datastring= xmlGetProp(node,attributename);
+ else
+ datastring= xmlNodeListGetString(xmldocument,node->xmlChildrenNode, 1);
+
+ return datastring;
}
static gboolean
@@ -166,15 +271,11 @@ check_mime_type(const gchar* uri,GError** error)
}
else
{
- if (err) {
- g_propagate_error (error, err);
- }
- else {
- g_set_error_literal (error,
- EV_DOCUMENT_ERROR,
- EV_DOCUMENT_ERROR_INVALID,
- _("Not an ePub document"));
- }
+ g_set_error_literal (error,
+ EV_DOCUMENT_ERROR,
+ EV_DOCUMENT_ERROR_INVALID,
+ _("Not an ePub document"));
+
return FALSE;
}
}
@@ -272,7 +373,7 @@ extract_epub_from_container (const gchar* uri, GError ** error)
return TRUE ;
}
-gboolean
+static gboolean
extract_one_file(GError ** error)
{
GFile * outfile ;
@@ -296,8 +397,7 @@ extract_one_file(GError ** error)
directory++;
gfilepath = g_string_new(tmp_dir) ;
- g_string_append(gfilepath,"/");
- g_string_append(gfilepath,currentfilename);
+ g_string_append_printf(gfilepath,"/%s",(gchar*)currentfilename);
/*if we encounter a directory, make a directory inside our temporary folder.*/
if (directory != NULL && *directory == '\0')
@@ -321,8 +421,226 @@ extract_one_file(GError ** error)
}
unzCloseCurrentFile (epubDocument) ;
- g_string_free(gfilepath);
+ g_string_free(gfilepath,TRUE);
g_free(currentfilename);
g_free(buffer);
}
+
+static gchar* get_uri_to_content(const gchar* uri,GError ** error)
+{
+ GError * err = NULL ;
+ gchar* containerpath = g_filename_from_uri(uri,NULL,&err);
+ GString* absolutepath = g_string_new(NULL);
+ gchar* content_uri ;
+ xmlNodePtr rootfileNode ;
+ xmlChar* relativepath;
+ if ( !containerpath )
+ {
+ if (err) {
+ g_propagate_error (error,err);
+ }
+ else {
+ g_set_error_literal (error,
+ EV_DOCUMENT_ERROR,
+ EV_DOCUMENT_ERROR_INVALID,
+ _("could not retrieve container file"));
+ }
+ return NULL ;
+ }
+
+ if ( open_xml_document(containerpath) == FALSE )
+ {
+ g_set_error_literal(error,
+ EV_DOCUMENT_ERROR,
+ EV_DOCUMENT_ERROR_INVALID,
+ _("could not open container file"));
+
+ return NULL ;
+ }
+
+ if ( set_xml_root_node("container") == FALSE) {
+
+ g_set_error_literal(error,
+ EV_DOCUMENT_ERROR,
+ EV_DOCUMENT_ERROR_INVALID,
+ _("container file is corrupt"));
+ return NULL ;
+ }
+
+ if ( (rootfileNode = xml_get_pointer_to_node("rootfile","media-type","application/oebps-package+xml")) == NULL)
+ {
+ g_set_error_literal(error,
+ EV_DOCUMENT_ERROR,
+ EV_DOCUMENT_ERROR_INVALID,
+ _("epub file is invalid or corrput"));
+ return NULL ;
+ }
+
+ relativepath = xml_get_data_from_node(rootfileNode,xmlattribute,(xmlChar*)"full-path") ;
+
+ if ( relativepath == NULL )
+ {
+ g_set_error_literal(error,
+ EV_DOCUMENT_ERROR,
+ EV_DOCUMENT_ERROR_INVALID,
+ _("epub file is corrupt,no container"));
+ return NULL ;
+ }
+ g_string_printf(absolutepath,"%s/%s",tmp_dir,relativepath);
+
+ content_uri = g_filename_to_uri(absolutepath->str,NULL,&err);
+ if ( !content_uri ) {
+ if (err) {
+ g_propagate_error (error,err);
+ }
+ else {
+ g_set_error_literal (error,
+ EV_DOCUMENT_ERROR,
+ EV_DOCUMENT_ERROR_INVALID,
+ _("could not retrieve container file"));
+ }
+ return NULL ;
+ }
+ free(absolutepath);
+
+ return content_uri ;
+}
+
+static GList*
+setup_document_tree(const gchar* content_uri, GError** error)
+{
+ GList* newlist = NULL ;
+ GError * err = NULL ;
+ gchar* contentOpf="/home/rootavish/Downloads/zlib/progit/content.opf";
+ xmlNodePtr manifest,spine,itemrefptr,itemptr ;
+ gboolean errorflag = FALSE;
+
+ gchar* relativepath ;
+ GString* absolutepath = g_string_new(NULL);
+
+ if ( open_xml_document(contentOpf) == FALSE )
+ {
+ g_set_error_literal(error,
+ EV_DOCUMENT_ERROR,
+ EV_DOCUMENT_ERROR_INVALID,
+ _("could not parse content manifest"));
+
+ return FALSE ;
+ }
+ if ( set_xml_root_node("package") == FALSE) {
+
+ g_set_error_literal(error,
+ EV_DOCUMENT_ERROR,
+ EV_DOCUMENT_ERROR_INVALID,
+ _("content file is invalid"));
+ return FALSE ;
+ }
+
+ if ( ( spine = xml_get_pointer_to_node("spine",NULL,NULL) )== NULL )
+ {
+ g_set_error_literal(error,
+ EV_DOCUMENT_ERROR,
+ EV_DOCUMENT_ERROR_INVALID,
+ _("epub file has no spine"));
+ return FALSE ;
+ }
+
+ if ( ( manifest = xml_get_pointer_to_node("manifest",NULL,NULL) )== NULL )
+ {
+ g_set_error_literal(error,
+ EV_DOCUMENT_ERROR,
+ EV_DOCUMENT_ERROR_INVALID,
+ _("epub file has no manifest"));
+ return FALSE ;
+ }
+
+ retval = NULL ;
+
+ /*Get first instance of itemref from the spine*/
+ xml_parse_children_of_node(spine,"itemref",NULL,NULL);
+
+ if ( retval != NULL )
+ itemrefptr = retval ;
+ else
+ {
+ errorflag=TRUE;
+ }
+ /*Parse the spine for remaining itemrefs*/
+ do
+ {
+ /*for the first time that we enter the loop, if errorflag is set we break*/
+ if ( errorflag )
+ {
+ break;
+ }
+ if ( xmlStrcmp(itemrefptr->name,"itemref") == 0)
+ {
+ DocumentTreeNode* newnode = g_malloc0(sizeof(newnode));
+ newnode->key = xml_get_data_from_node(itemrefptr,xmlattribute,(xmlChar*)"idref");
+
+ if ( newnode->key == NULL )
+ {
+ errorflag =TRUE;
+ break;
+ }
+ retval=NULL ;
+ xml_parse_children_of_node(manifest,"item","id",newnode->key);
+
+ if ( retval != NULL )
+ {
+ itemptr = retval ;
+ }
+ else
+ {
+ errorflag=TRUE;
+ break;
+ }
+ relativepath = xml_get_data_from_node(itemptr,xmlattribute,(xmlChar*)"href");
+
+ g_string_assign(absolutepath,tmp_dir);
+ g_string_append_printf(absolutepath,"/%s",relativepath);
+ newnode->value = g_filename_to_uri(absolutepath->str,NULL,&err);
+ if ( newnode->value == NULL )
+ {
+ errorflag =TRUE;
+ break;
+ }
+ newlist = g_list_prepend(newlist,newnode);
+ }
+ itemrefptr = itemrefptr->next ;
+ }
+ while ( itemrefptr != NULL );
+
+ if ( errorflag )
+ {
+ if ( err )
+ {
+ g_propagate_error(error,err);
+ }
+ else
+ {
+ g_set_error_literal(error,
+ EV_DOCUMENT_ERROR,
+ EV_DOCUMENT_ERROR_INVALID,
+ _("Could not set up document tree for loading, some files missing"));
+ }
+ /*free any nodes that were set up and return empty*/
+ g_string_free(absolutepath,TRUE);
+ g_list_free_full(newlist,(GDestroyNotify)free_tree_nodes);
+ return NULL ;
+ }
+
+ g_string_free(absolutepath,TRUE);
+ return newlist ;
+
+}
+
+static void
+free_tree_nodes(gpointer data)
+{
+ DocumentTreeNode* dataptr = data ;
+ g_free(dataptr->value);
+ g_free(dataptr->key);
+ g_free(dataptr);
+} \ No newline at end of file
diff --git a/backend/epub/epub-document.h b/backend/epub/epub-document.h
index 5fff2514..db9c99e5 100644
--- a/backend/epub/epub-document.h
+++ b/backend/epub/epub-document.h
@@ -11,7 +11,7 @@ G_BEGIN_DECLS
typedef struct _ePubDocument ePubDocument;
-GType ePub_document_get_type (void) G_GNUC_CONST;
+GType epub_document_get_type (void) G_GNUC_CONST;
G_MODULE_EXPORT GType register_atril_backend (GTypeModule *module);