/* * baobab-scan.c * This file is part of baobab * * Copyright (C) 2005-2006 Fabio Marzocca * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston, MA 02110-1301 USA */ #include #include #include #include #include #include "baobab.h" #include "baobab-utils.h" /* Hardlinks handling. As long as we're optimistic about hardlinks count over the whole system (250 files with st_nlink > 1 here), we keep linear search. If it turns out to be an bottleneck we can switch to an hash table or tree. TODO: get real timings about this code. find out the average number of files with st_nlink > 1 on average computer. To save memory, we store only { inode, dev } instead of full GFileInfo. EDIT: /me stupid. I realize that this code was not called that often 1 call per file with st_nlink > 1. BUT, i'm using pdumpfs to backup my /etc. pdumpfs massively uses hard links. So there are more than 5000 files with st_nlink > 1. I believe this is the worst case. */ typedef struct { guint64 inode; dev_t device; } BaobabHardLink; typedef GArray BaobabHardLinkArray; static BaobabHardLinkArray * baobab_hardlinks_array_create (void) { return g_array_new (FALSE, FALSE, sizeof(BaobabHardLink)); } static gboolean baobab_hardlinks_array_has (BaobabHardLinkArray *a, BaobabHardLink *s) { guint i; for (i = 0; i < a->len; ++i) { BaobabHardLink *cur = &g_array_index (a, BaobabHardLink, i); /* * cur->st_dev == s->st_dev is the common case and may be more * expansive than cur->st_ino == s->st_ino * so keep this order */ if (cur->inode == s->inode && cur->device == s->device) return TRUE; } return FALSE; } /* return FALSE if the element was already in the array */ static gboolean baobab_hardlinks_array_add (BaobabHardLinkArray *a, GFileInfo *s) { if (g_file_info_has_attribute (s, G_FILE_ATTRIBUTE_UNIX_INODE) && g_file_info_has_attribute (s, G_FILE_ATTRIBUTE_UNIX_DEVICE)) { BaobabHardLink hl; hl.inode = g_file_info_get_attribute_uint64 (s, G_FILE_ATTRIBUTE_UNIX_INODE); hl.device = g_file_info_get_attribute_uint32 (s, G_FILE_ATTRIBUTE_UNIX_DEVICE); if (baobab_hardlinks_array_has (a, &hl)) return FALSE; g_array_append_val (a, hl); return TRUE; } else { g_warning ("Could not obtain inode and device for hardlink"); } return FALSE; } static void baobab_hardlinks_array_free (BaobabHardLinkArray *a) { /* g_print ("HL len was %d\n", a->len); */ g_array_free (a, TRUE); } #define BLOCK_SIZE 512 struct allsizes { goffset size; goffset alloc_size; gint depth; }; static const char *dir_attributes = \ G_FILE_ATTRIBUTE_STANDARD_NAME "," \ G_FILE_ATTRIBUTE_STANDARD_DISPLAY_NAME "," \ G_FILE_ATTRIBUTE_STANDARD_TYPE "," \ G_FILE_ATTRIBUTE_STANDARD_SIZE "," \ G_FILE_ATTRIBUTE_UNIX_BLOCKS "," \ G_FILE_ATTRIBUTE_UNIX_NLINK "," \ G_FILE_ATTRIBUTE_UNIX_INODE "," \ G_FILE_ATTRIBUTE_UNIX_DEVICE "," \ G_FILE_ATTRIBUTE_ACCESS_CAN_READ; static gboolean is_in_dot_gvfs (GFile *file) { static GFile *dot_gvfs_dir = NULL; GFile *parent; gboolean res = FALSE; if (dot_gvfs_dir == NULL) { gchar *dot_gvfs; dot_gvfs = g_build_filename (g_get_home_dir (), ".gvfs", NULL); dot_gvfs_dir = g_file_new_for_path (dot_gvfs); g_free (dot_gvfs); } parent = g_file_get_parent (file); if (parent != NULL) { res = g_file_equal (parent, dot_gvfs_dir); g_object_unref (parent); } return res; } static struct allsizes loopdir (GFile *file, GFileInfo *info, guint count, BaobabHardLinkArray *hla, gint current_depth) { guint64 tempHLsize = 0; gint elements = 0; struct chan_data data; struct allsizes retloop, temp; GFileInfo *temp_info; GFileEnumerator *file_enum; gchar *dir_uri = NULL; gchar *display_name = NULL; gchar *parse_name = NULL; GError *err = NULL; count++; retloop.size = 0; retloop.alloc_size = 0; retloop.depth = 0; /* Skip the user excluded folders */ if (baobab_is_excluded_location (file)) goto exit; /* Skip the virtual file systems */ if (is_virtual_filesystem (file)) goto exit; /* FIXME: skip dirs in ~/.gvfs. It would be better to have a way * to check if a file is a FUSE mountpoint instead of just * hardcoding .gvfs */ if (is_in_dot_gvfs (file)) goto exit; parse_name = g_file_get_parse_name (file); if (g_file_info_has_attribute (info, G_FILE_ATTRIBUTE_STANDARD_SIZE)) retloop.size = g_file_info_get_size (info); if (g_file_info_has_attribute (info, G_FILE_ATTRIBUTE_UNIX_BLOCKS)) retloop.alloc_size = BLOCK_SIZE * g_file_info_get_attribute_uint64 (info, G_FILE_ATTRIBUTE_UNIX_BLOCKS); if (g_file_info_has_attribute (info, G_FILE_ATTRIBUTE_STANDARD_DISPLAY_NAME)) display_name = g_strdup (g_file_info_get_display_name (info)); else /* paranoid fallback */ display_name = g_filename_display_basename (g_file_info_get_name (info)); /* load up the file enumerator */ file_enum = g_file_enumerate_children (file, dir_attributes, G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS, NULL, &err); if (file_enum == NULL) { if (!g_error_matches (err, G_IO_ERROR, G_IO_ERROR_PERMISSION_DENIED)) { g_warning ("couldn't get dir enum for dir %s: %s\n", parse_name, err->message); } goto exit; } /* All skipped folders (i.e. bad type, excluded, /proc) must be skept *before* this point. Everything passes the prefill-model will be part of the GUI. */ /* prefill the model */ data.size = 1; data.alloc_size = 1; data.depth = count - 1; data.elements = -1; data.display_name = display_name; data.parse_name = parse_name; data.tempHLsize = tempHLsize; baobab_fill_model (&data); g_clear_error (&err); while ((temp_info = g_file_enumerator_next_file (file_enum, NULL, &err)) != NULL) { GFileType temp_type = g_file_info_get_file_type (temp_info); if (baobab.STOP_SCANNING) { g_object_unref (temp_info); g_object_unref (file_enum); goto exit; } /* is a directory? */ if (temp_type == G_FILE_TYPE_DIRECTORY) { GFile *child_dir = g_file_get_child (file, g_file_info_get_name (temp_info)); temp = loopdir (child_dir, temp_info, count, hla, current_depth+1); retloop.size += temp.size; retloop.alloc_size += temp.alloc_size; retloop.depth = ((temp.depth + 1) > retloop.depth) ? temp.depth + 1 : retloop.depth; elements++; g_object_unref (child_dir); } /* is it a regular file? */ else if (temp_type == G_FILE_TYPE_REGULAR) { /* check for hard links only on local files */ if (g_file_info_has_attribute (temp_info, G_FILE_ATTRIBUTE_UNIX_NLINK) && g_file_info_get_attribute_uint32 (temp_info, G_FILE_ATTRIBUTE_UNIX_NLINK) > 1) { if (!baobab_hardlinks_array_add (hla, temp_info)) { /* we already acconted for it */ tempHLsize += g_file_info_get_size (temp_info); g_object_unref (temp_info); continue; } } if (g_file_info_has_attribute (temp_info, G_FILE_ATTRIBUTE_UNIX_BLOCKS)) { retloop.alloc_size += BLOCK_SIZE * g_file_info_get_attribute_uint64 (temp_info, G_FILE_ATTRIBUTE_UNIX_BLOCKS); } retloop.size += g_file_info_get_size (temp_info); elements++; } /* ignore other types (symlinks, sockets, devices, etc) */ g_object_unref (temp_info); } /* won't be an error if we've finished normally */ if (err != NULL) { g_warning ("error in dir %s: %s\n", parse_name, err->message); } data.display_name = display_name; data.parse_name = parse_name; data.size = retloop.size; data.alloc_size = retloop.alloc_size; data.depth = count - 1; data.elements = elements; data.tempHLsize = tempHLsize; baobab_fill_model (&data); g_object_unref (file_enum); exit: g_free (dir_uri); g_free (display_name); g_free (parse_name); if (err) g_error_free (err); return retloop; } void baobab_scan_execute (GFile *location) { BaobabHardLinkArray *hla; GFileInfo *info; GError *err = NULL; GFileType ftype; struct allsizes sizes; g_return_if_fail (location != NULL); /* NOTE: for the root of the scan we follow symlinks */ info = g_file_query_info (location, dir_attributes, G_FILE_QUERY_INFO_NONE, NULL, &err); if (info == NULL) { char *parse_name = g_file_get_parse_name (location); g_warning ("couldn't get info for dir %s: %s\n", parse_name, err->message); g_free (parse_name); g_error_free (err); return; } ftype = g_file_info_get_file_type (info); if (ftype == G_FILE_TYPE_DIRECTORY) { hla = baobab_hardlinks_array_create (); sizes = loopdir (location, info, 0, hla, 0); baobab.model_max_depth = sizes.depth; baobab_hardlinks_array_free (hla); } g_object_unref (info); }