diff options
Diffstat (limited to 'baobab/src/baobab-scan.c')
-rw-r--r-- | baobab/src/baobab-scan.c | 381 |
1 files changed, 381 insertions, 0 deletions
diff --git a/baobab/src/baobab-scan.c b/baobab/src/baobab-scan.c new file mode 100644 index 00000000..8c85d859 --- /dev/null +++ b/baobab/src/baobab-scan.c @@ -0,0 +1,381 @@ +/* + * baobab-scan.c + * This file is part of baobab + * + * Copyright (C) 2005-2006 Fabio Marzocca <[email protected]> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301 USA + */ + + +#include <config.h> + +#include <string.h> + +#include <glib.h> +#include <gtk/gtk.h> +#include <gio/gio.h> + +#include "baobab.h" +#include "baobab-utils.h" + + +/* + Hardlinks handling. + + As long as we're optimistic about hardlinks count + over the whole system (250 files with st_nlink > 1 here), + we keep linear search. If it turns out to be an bottleneck + we can switch to an hash table or tree. + + TODO: get real timings about this code. find out the average + number of files with st_nlink > 1 on average computer. + + To save memory, we store only { inode, dev } instead of full + GFileInfo. + + EDIT: /me stupid. I realize that this code was not called that often + 1 call per file with st_nlink > 1. BUT, i'm using pdumpfs to backup + my /etc. pdumpfs massively uses hard links. So there are more than + 5000 files with st_nlink > 1. I believe this is the worst case. +*/ + +typedef struct { + guint64 inode; + dev_t device; +} BaobabHardLink; + +typedef GArray BaobabHardLinkArray; + +static BaobabHardLinkArray * +baobab_hardlinks_array_create (void) +{ + return g_array_new (FALSE, FALSE, sizeof(BaobabHardLink)); +} + +static gboolean +baobab_hardlinks_array_has (BaobabHardLinkArray *a, + BaobabHardLink *s) +{ + guint i; + + for (i = 0; i < a->len; ++i) { + BaobabHardLink *cur = &g_array_index (a, BaobabHardLink, i); + + /* + * cur->st_dev == s->st_dev is the common case and may be more + * expansive than cur->st_ino == s->st_ino + * so keep this order */ + if (cur->inode == s->inode && cur->device == s->device) + return TRUE; + } + + return FALSE; +} + +/* return FALSE if the element was already in the array */ +static gboolean +baobab_hardlinks_array_add (BaobabHardLinkArray *a, + GFileInfo *s) +{ + + if (g_file_info_has_attribute (s, G_FILE_ATTRIBUTE_UNIX_INODE) && + g_file_info_has_attribute (s, G_FILE_ATTRIBUTE_UNIX_DEVICE)) + { + BaobabHardLink hl; + + hl.inode = g_file_info_get_attribute_uint64 (s, + G_FILE_ATTRIBUTE_UNIX_INODE); + hl.device = g_file_info_get_attribute_uint32 (s, + G_FILE_ATTRIBUTE_UNIX_DEVICE); + + if (baobab_hardlinks_array_has (a, &hl)) + return FALSE; + + g_array_append_val (a, hl); + + return TRUE; + } + else + { + g_warning ("Could not obtain inode and device for hardlink"); + } + + return FALSE; +} + +static void +baobab_hardlinks_array_free (BaobabHardLinkArray *a) +{ +/* g_print ("HL len was %d\n", a->len); */ + + g_array_free (a, TRUE); +} + +#define BLOCK_SIZE 512 + +struct allsizes { + goffset size; + goffset alloc_size; + gint depth; +}; + +static const char *dir_attributes = \ + G_FILE_ATTRIBUTE_STANDARD_NAME "," \ + G_FILE_ATTRIBUTE_STANDARD_DISPLAY_NAME "," \ + G_FILE_ATTRIBUTE_STANDARD_TYPE "," \ + G_FILE_ATTRIBUTE_STANDARD_SIZE "," \ + G_FILE_ATTRIBUTE_UNIX_BLOCKS "," \ + G_FILE_ATTRIBUTE_UNIX_NLINK "," \ + G_FILE_ATTRIBUTE_UNIX_INODE "," \ + G_FILE_ATTRIBUTE_UNIX_DEVICE "," \ + G_FILE_ATTRIBUTE_ACCESS_CAN_READ; + + +static gboolean +is_in_dot_gvfs (GFile *file) +{ + static GFile *dot_gvfs_dir = NULL; + GFile *parent; + gboolean res = FALSE; + + if (dot_gvfs_dir == NULL) + { + gchar *dot_gvfs; + + dot_gvfs = g_build_filename (g_get_home_dir (), ".gvfs", NULL); + + dot_gvfs_dir = g_file_new_for_path (dot_gvfs); + + g_free (dot_gvfs); + } + + parent = g_file_get_parent (file); + + if (parent != NULL) + { + res = g_file_equal (parent, dot_gvfs_dir); + g_object_unref (parent); + } + + return res; +} + +static struct allsizes +loopdir (GFile *file, + GFileInfo *info, + guint count, + BaobabHardLinkArray *hla, + gint current_depth) +{ + guint64 tempHLsize = 0; + gint elements = 0; + struct chan_data data; + struct allsizes retloop, temp; + GFileInfo *temp_info; + GFileEnumerator *file_enum; + gchar *dir_uri = NULL; + gchar *display_name = NULL; + gchar *parse_name = NULL; + GError *err = NULL; + + count++; + retloop.size = 0; + retloop.alloc_size = 0; + retloop.depth = 0; + + /* Skip the user excluded folders */ + if (baobab_is_excluded_location (file)) + goto exit; + + /* Skip the virtual file systems */ + if (is_virtual_filesystem (file)) + goto exit; + + /* FIXME: skip dirs in ~/.gvfs. It would be better to have a way + * to check if a file is a FUSE mountpoint instead of just + * hardcoding .gvfs */ + if (is_in_dot_gvfs (file)) + goto exit; + + parse_name = g_file_get_parse_name (file); + + if (g_file_info_has_attribute (info, G_FILE_ATTRIBUTE_STANDARD_SIZE)) + retloop.size = g_file_info_get_size (info); + + if (g_file_info_has_attribute (info, G_FILE_ATTRIBUTE_UNIX_BLOCKS)) + retloop.alloc_size = BLOCK_SIZE * + g_file_info_get_attribute_uint64 (info, + G_FILE_ATTRIBUTE_UNIX_BLOCKS); + + if (g_file_info_has_attribute (info, G_FILE_ATTRIBUTE_STANDARD_DISPLAY_NAME)) + display_name = g_strdup (g_file_info_get_display_name (info)); + else + /* paranoid fallback */ + display_name = g_filename_display_basename (g_file_info_get_name (info)); + + /* load up the file enumerator */ + file_enum = g_file_enumerate_children (file, + dir_attributes, + G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS, + NULL, + &err); + + if (file_enum == NULL) { + if (!g_error_matches (err, G_IO_ERROR, G_IO_ERROR_PERMISSION_DENIED)) { + g_warning ("couldn't get dir enum for dir %s: %s\n", + parse_name, err->message); + } + goto exit; + } + + /* All skipped folders (i.e. bad type, excluded, /proc) must be + skept *before* this point. Everything passes the prefill-model + will be part of the GUI. */ + + /* prefill the model */ + data.size = 1; + data.alloc_size = 1; + data.depth = count - 1; + data.elements = -1; + data.display_name = display_name; + data.parse_name = parse_name; + data.tempHLsize = tempHLsize; + baobab_fill_model (&data); + + g_clear_error (&err); + while ((temp_info = g_file_enumerator_next_file (file_enum, + NULL, + &err)) != NULL) { + GFileType temp_type = g_file_info_get_file_type (temp_info); + if (baobab.STOP_SCANNING) { + g_object_unref (temp_info); + g_object_unref (file_enum); + goto exit; + } + + /* is a directory? */ + if (temp_type == G_FILE_TYPE_DIRECTORY) { + GFile *child_dir = g_file_get_child (file, + g_file_info_get_name (temp_info)); + temp = loopdir (child_dir, temp_info, count, hla, current_depth+1); + retloop.size += temp.size; + retloop.alloc_size += temp.alloc_size; + retloop.depth = ((temp.depth + 1) > retloop.depth) ? temp.depth + 1 : retloop.depth; + elements++; + g_object_unref (child_dir); + } + + /* is it a regular file? */ + else if (temp_type == G_FILE_TYPE_REGULAR) { + + /* check for hard links only on local files */ + if (g_file_info_has_attribute (temp_info, + G_FILE_ATTRIBUTE_UNIX_NLINK) && + g_file_info_get_attribute_uint32 (temp_info, + G_FILE_ATTRIBUTE_UNIX_NLINK) > 1) { + + if (!baobab_hardlinks_array_add (hla, temp_info)) { + + /* we already acconted for it */ + tempHLsize += g_file_info_get_size (temp_info); + g_object_unref (temp_info); + continue; + } + } + + if (g_file_info_has_attribute (temp_info, G_FILE_ATTRIBUTE_UNIX_BLOCKS)) { + retloop.alloc_size += BLOCK_SIZE * + g_file_info_get_attribute_uint64 (temp_info, + G_FILE_ATTRIBUTE_UNIX_BLOCKS); + } + retloop.size += g_file_info_get_size (temp_info); + elements++; + } + + /* ignore other types (symlinks, sockets, devices, etc) */ + + g_object_unref (temp_info); + } + + /* won't be an error if we've finished normally */ + if (err != NULL) { + g_warning ("error in dir %s: %s\n", + parse_name, err->message); + } + + data.display_name = display_name; + data.parse_name = parse_name; + data.size = retloop.size; + data.alloc_size = retloop.alloc_size; + data.depth = count - 1; + data.elements = elements; + data.tempHLsize = tempHLsize; + baobab_fill_model (&data); + g_object_unref (file_enum); + + exit: + g_free (dir_uri); + g_free (display_name); + g_free (parse_name); + if (err) + g_error_free (err); + + return retloop; +} + +void +baobab_scan_execute (GFile *location) +{ + BaobabHardLinkArray *hla; + GFileInfo *info; + GError *err = NULL; + GFileType ftype; + struct allsizes sizes; + + g_return_if_fail (location != NULL); + + /* NOTE: for the root of the scan we follow symlinks */ + info = g_file_query_info (location, + dir_attributes, + G_FILE_QUERY_INFO_NONE, + NULL, + &err); + + if (info == NULL) { + char *parse_name = g_file_get_parse_name (location); + g_warning ("couldn't get info for dir %s: %s\n", + parse_name, err->message); + g_free (parse_name); + g_error_free (err); + + return; + } + + ftype = g_file_info_get_file_type (info); + + if (ftype == G_FILE_TYPE_DIRECTORY) { + hla = baobab_hardlinks_array_create (); + + sizes = loopdir (location, info, 0, hla, 0); + baobab.model_max_depth = sizes.depth; + + baobab_hardlinks_array_free (hla); + } + + g_object_unref (info); +} + |