1. Look up all references to incremental distance join of Hjaltason and Samet in SIGMOD98 (Hjal98) and summarize them to see what is different. This includes a comparison with the work on reverse nearest neighbor queries which seems to be quite similar to the distance semi-join. Some experimental implementation and testing would make the project stronger. @InProceedings{Hjal98, folder = "HJALTASON", author = {G. R. Hjaltason and H. Samet}, title = {Incremental distance join algorithms for spatial databases}, booktitle = "Proceedings of the {ACM SIGMOD} Conference", editor = {L. Haas and A. Tiwary}, year = 1998, address = {Seattle, WA}, month = jun, pages = {237--248}, abstract = {Two new spatial join operations, distance join and distance semi-join, are introduced where the join output is ordered by the distance between the spatial attribute values of the joined tuples. Incremental algorithms are presented for computing these operations, which can be used in a pipelined fashion, thereby obviating the need to wait for their completing when only a few tuples are needed.}, keywords = {[spatial join; incremental nearest neighbor; incremental algorithms; distance join]} } @InProceedings{Mamo99, folder = "HJALTASON/DISTANCE SEMI-JOIN", author = {N. Mamoulis and D. Papadias}, title = {Integration of spatial join algorithms for processing multiple inputs}, booktitle = "Proceedings of the {ACM SIGMOD} Conference", year = 1999, address = {Philadelphia}, month = jun, pages = {1--12}, abstract = {In this paper we analyze previous work on spatial joins and propose a novel algorithm, called slot index spatial join (SISJ), that efficiently computes the spatial join between two inputs, only one of which is indexed by an R-tree.}, keywords = {[spatial join]} } @InProceedings{Korn00, folder = {KORN/RNN?}, author = {F. Korn and S. Muthukrishnan}, title = {Influence sets based on reverse nearest neighbor queries}, booktitle = "Proceedings of the {ACM SIGMOD} Conference", editor = {W. Chen and J. Naughton and P. A. Bernstein}, year = 2000, address = {Dallas}, month = may, pages = {201--212}, keywords = {[distance join; reverse nearest neighbor queries]} } @article{Lin00, folder = "HJALTASON/DISTANCE SEMI-JOIN", author = "X. Lin and X. Zhou and Ch. Liu", title = "Efficient computation of a proximity matching in spatial databases", journal = "Data and Knowledge Engineering", pages = "204--241", year = 2000, month = apr, volume = 33, number = 1, keywords = "[distance semi-join; proximity matching; spatial data mining]" } @InProceedings{Shin00, folder = {HJALTASON/DISTANCE SEMI-JOIN}, author = {H. Shin and B. Moon and S. Lee}, title = {Adaptive Multi-Stage Distance Join Processing}, booktitle = "Proceedings of the {ACM SIGMOD} Conference", editor = {W. Chen and J. Naughton and P. A. Bernstein}, year = 2000, address = {Dallas}, month = may, pages = {343--354}, keywords = {[distance join]} } @InProceedings{Stan00, folder = {KORN/RNN?}, author = {I. Stanoi and D. Agrawal and A. {El Abbadi}}, title = {Reverse nearest neighbor queries for dynamic databases}, booktitle = {Proceedings {ACM} {SIGMOD} Workshop on Research Issues in Data Mining and Knowledge Discovery}, editor = {D. Gunopulos and R. Rastogi}, address = {Dallas}, year = 2000, month = may, pages = {44--53}, keywords = {[distance join; reverse nearest neighbor queries]} } @Article{Mamo01, folder = "HJALTASON/DISTANCE SEMI-JOIN", author = {N. Mamoulis and D. Papadias}, title = {Multiway spatial joins}, journal = acmtods, year = 2001, month = dec, volume = 26, number = 4, pages = {424--475}, keywords = {[spatial join]} } @InProceedings{Stan01, folder = {KORN/RNN?}, author = {I. Stanoi and M. Riedewald and D. Agrawal and A. {El Abbadi}}, title = {Discovery of influence sets in frequently updated databases}, booktitle = "Proceedings of the 27th International Conference on Very Large Data Bases ({VLDB})", editor = {P. M. G. Apers and P. Atzeni and S. Ceri and S. Paraboschi and K. Ramamohanarao and R. T. Snodgrass}, address = {Roma, Italy}, year = 2001, month = sep, pages = {99--108}, keywords = {[distance join; reverse nearest neighbor queries]} } @InProceedings{Yang01, folder = {HJALTASON/DISTANCE SEMI-JOIN}, author = {C. Yang and K.-I. Lin}, title = {An index structure for efficient reverse nearest neighbor queries}, booktitle = "Proceedings of the 17th {IEEE} International Conference on Data Engineering", year = 2001, address = {Heidelberg, Germany}, month = apr, pages = {485--492}, keywords = {[points; reverse nearest neighbor; distance semi-join]} } @Article{Kosc02, folder = "HJALTASON/DISTANCE SEMI-JOIN", author = {H. Kosch and S. Atnafu}, title = {Processing a multimedia join through the method of nearest neighbor search}, journal = {Information Processing Letters}, year = 2002, volume = 82, number = 5, month = jun, pages = {269--276}, keywords = {[distance semi-join; nearest neighbor]} } @techreport{Jin02, folder = {HJALTASON/DISTANCE SEMI-JOIN}, author = {L. Jin and C. Li and S. Mehrotra}, title = {Efficient similarity string joins in large data sets}, institution = {Department of Information and Computer Science, University of California}, year = 2002, number = {TR-DB-02-04}, address = {Irvine, CA}, month = feb, keywords = {strings; distance semi-join; FastMap]} } @InProceedings{Yang02, folder = {HJALTASON/DISTANCE SEMI-JOIN}, author = {C. Yang and K.-I. Lin}, title = {An index structure for improving nearest closest pairs and related join queries in spatial databases}, editor = {M. A. Nascimento M. T. \"{O}zsu and O. E. Zaiane}, booktitle = {Proceedings of the International Database Engineering and Applications Symposium}, address = {Edmonton, Canada}, month = jul, year = {2002}, pages = {140--149}, keywords = {[points; reverse nearest neighbor; distance semi-join]} } ----------------------------------------------------------------------------- 2. Devise a marching cube algorithm with octrees. See the nice 2 page explanation of marching cubes by Matt Ward of Worcester Polytechnic University in Massachusetts at http://cs.wpi.edu/~matt/courses/cs563/talks/march_cub.html ----------------------------------------------------------------------------- 3. Investigate spatial data mining further. Erica Kolatch wrote a nice survey. One suggestion is to devise an incremental clustering algorithm where instead of getting the nearest neighbor, we get a cluster of neighbors which are somewhat similar. This project needs some research on spatial data mining and can start with Erica's survey paper. However, in this project you will have to develop an algorithm and find an application area to test it with respect to experiments. ----------------------------------------------------------------------------- 4. Applications of incremental nearest neighbor (INN) algorithm bioinformatics applications. This project would have to be in collaboration with a domain expert like Scott Federhen. ----------------------------------------------------------------------------- 5. Look at incremental nearest neighbor finding in a domain where we do not have a metric space. Prof. David Jacobs may have some suggested application domains that involve computer vision. ----------------------------------------------------------------------------- 6. Create a set of JAVA applets for distance indexing (i.e., just in a metric space) that is similar to the VASCO system for spatial indexes. One of the main complexity issues here is the visualization of the space partitions. The work can be tried with the vp-tree, mvp tree, GNAT, and the sa-tree. In other words, the visualization would be best done with a Euclidean metric although the ability to switch between different metrics including the Minkowski metrics such as $L_1$ and $L_\infty}. ----------------------------------------------------------------------------- 7. Look at the following texture octree papers in SIGGRAPH 2002. How does this work tie in to the quadtree variants discussed in class and in the two books by Samet. Can the results reported in these papers be improved by using other quadtree variants or even R-trees (i.e., bounding box hierarchies). Also, look at the following suggested improvements by Robert E. Webber on August 1, 2002: A. the ILM paper seems to have a nice problems/future work section which is worth considering. B. one question is exactly where are the differences between the two papers. since the ILM paper views painting as a problem and the other paper thinks it supports painting, there are clearly some differences between the papers that could bear a closer look. C. a difference between the 2-d and 3-d versions of this work is the discussions of normals found in both papers. these seem sort of hacky and so could probably bear refinement. D. both papers extend from MipMaps. an alternative technology is summed area tables, which one could then consider extending into octrees and comparing the results. E. more globally, it might be worthwhile seeing what alternative approaches there are out there for 3d textures. doubtless the authors made some choices about what 3d texture research they wanted to extend, leaving other 3d texture research still unextended. F. 4d textures (3d + time)? video textures? how do they connect? @article{Bens02, folder = {FRISKEN/ADF}, author = {D. Benson and J. Davis}, title = {Octree textures}, journal = acmtogs, volume = 21, number = 3, pages = "785--790", month = jul, year = 2002, note = "Also {\it Proceedings of the SIGGRAPH'02 Conference}, San Antonio, TX, July 2002", abstract = "Texture mapping using octrees.", keywords = {[surfaces; texture mapping; adaptively sampled distance fields]} } @article{DeBr02, folder = {FRISKEN/ADF}, author = {D. DeBry and J. Gibbs and D. D. Petty and N. Robins}, title = {Painting and rendering textures on unparameterized models}, journal = acmtogs, volume = 21, number = 3, pages = "763--768", month = jul, year = 2002, note = "Also {\it Proceedings of the SIGGRAPH'02 Conference}, San Antonio, TX, July 2002", abstract = "Texture mapping using octrees.", keywords = {[surfaces; texture mapping; adaptively sampled distance fields]} } ----------------------------------------------------------------------------- 8. Conduct a review of data structures for spatiotemporal databases. This means indexing methods. ----------------------------------------------------------------------------- 9. Conduct a review of data structures for indexing moving objects. For example, look at the June 2002 issue of the Data Engineering Bulletin. Also as an implementation project, consider using PK tree for moving object databases when have varying capacity so that don't always have to rebuild the tree when objects move. The PK tree permits a larger variation in the number of objects that can be stored in a bucket than methods that split whenever a bucket has more than x items. ----------------------------------------------------------------------------- 10. Conduct a review of Kinetic data structures. This may be too close to the topic that suggests a review of the data structures for moving objects. However, there is probably a difference. Part of this project is to design and implement a set of JAVA applets in the same spirit as the VASCO system that demonstrates how some of these methods work. This may overlap a bit the thesis work of Glenn Iwerks (iwerks@umiacs), so you need to contact him first to see what he has accomplished, if anything at all, so far. ----------------------------------------------------------------------------- 11. Survey use of hierarchical spatial data structures in Finite Element Analysis. There are many papers on this topic. A good starting point is the CRC Handbook of Grid Generation. See ... @InCollection{Bake99, folder = {FINITE ELEMENT/GRID GENERATION}, author = "T. J. Baker", title = {Delaunay-Vorono\"{i} methods}, booktitle = {Handbook of Grid Generation}, editor = "J. F. Thompson and B. K. Soni and N. P. Weatherill", publisher = {CRC Press}, address = "Boca Raton, FL", year = "1999", chapter = 16, keywords = {[finite element analysis; grid generation; Delaunay triangulation; Voronoi diagram; constrained Delaunay triangulation]}, } @InCollection{deCo99, folder = {FINITE ELEMENT/GRID GENERATION}, author = {H. L. {de Cougny}}, title = {Parallel unstructured grid generation}, booktitle = {Handbook of Grid Generation}, editor = "J. F. Thompson and B. K. Soni and N. P. Weatherill", publisher = {CRC Press}, address = "Boca Raton, FL", year = "1999", chapter = 24, keywords = {[finite element analysis; grid generation; unstructured meshes; octrees; parallel processing]}, } @InCollection{Form99, folder = {FINITE ELEMENT/GRID GENERATION}, author = {L. Formaggia}, title = {Data structures for unstructured mesh generation}, booktitle = {Handbook of Grid Generation}, editor = "J. F. Thompson and B. K. Soni and N. P. Weatherill", publisher = {CRC Press}, address = "Boca Raton, FL", year = "1999", chapter = 14, keywords = {[finite element analysis; grid generation; unstructured meshes; trees; quadtrees]}, } @InCollection{Hama99, folder = {FINITE ELEMENT/GRID GENERATION}, author = {B. Hamann and B. A. Jean and A. Razdan}, title = {Computer-aided geometric design techniques for surface grid generation}, booktitle = {Handbook of Grid Generation}, editor = "J. F. Thompson and B. K. Soni and N. P. Weatherill", publisher = {CRC Press}, address = "Boca Raton, FL", year = "1999", chapter = 29, keywords = {[finite element analysis; grid generation; surfaces; k-d trees; surface intersection]}, } @InCollection{Kall99, folder = {FINITE ELEMENT/GRID GENERATION}, author = "Y. Kallinderis", title = {Hybrid grids and their applications}, booktitle = {Handbook of Grid Generation}, editor = "J. F. Thompson and B. K. Soni and N. P. Weatherill", publisher = {CRC Press}, address = "Boca Raton, FL", year = "1999", chapter = 25, keywords = {[finite element analysis; grid generation; hybrid grids; octrees; tetrahedra]}, } @InCollection{Marc99, folder = {FINITE ELEMENT/GRID GENERATION}, author = "D. L. Marcum", title = {Unstructured grid generation using automatic point insertion and local reconnection}, booktitle = {Handbook of Grid Generation}, editor = "J. F. Thompson and B. K. Soni and N. P. Weatherill", publisher = {CRC Press}, address = "Boca Raton, FL", year = "1999", chapter = 18, keywords = {[finite element analysis; unstructured grid generation; surfaces; tetrahedra]}, } @InCollection{Peir99, folder = {FINITE ELEMENT/GRID GENERATION}, author = {J. Peir\'{o}}, title = {Surface grid generation}, booktitle = {Handbook of Grid Generation}, editor = "J. F. Thompson and B. K. Soni and N. P. Weatherill", publisher = {CRC Press}, address = "Boca Raton, FL", year = "1999", chapter = 19, keywords = {[finite element analysis; grid generation; surface grid]}, } @InCollection{Schn99, folder = {FINITE ELEMENT/GRID GENERATION}, author = {R. Schneiders}, title = {Quadrilateral and hexahedral element meshes}, booktitle = {Handbook of Grid Generation}, editor = "J. F. Thompson and B. K. Soni and N. P. Weatherill", publisher = {CRC Press}, address = "Boca Raton, FL", year = "1999", chapter = 21, keywords = {[finite element analysis; grid generation; quadrilateral meshes; hexahedral meshes]}, } @InCollection{Shep99, folder = {FINITE ELEMENT/GRID GENERATION}, author = {M. S. Shephard and H. L. {de Cougny} and R. M. O'Bara and M. W. Beall}, title = {Automatic grid generation using spatially based trees}, booktitle = {Handbook of Grid Generation}, editor = "J. F. Thompson and B. K. Soni and N. P. Weatherill", publisher = {CRC Press}, address = "Boca Raton, FL", year = "1999", chapter = 15, keywords = {[finite element analysis; grid generation; quadtrees; octrees]}, } @proceedings{Thom99a, title = gridgen99, editor = "J. F. Thompson and B. K. Soni and N. P. Weatherill", publisher = {CRC Press}, address = "Boca Raton, FL", year = "1999", keywords = {[finite element analysis; grid generation]} } @InCollection{Thom99b, folder = {FINITE ELEMENT/GRID GENERATION}, author = "J. F. Thompson and B. K. Soni and N. P. Weatherill", title = {Fundamental concepts and approaches}, booktitle = {Handbook of Grid Generation}, editor = "J. F. Thompson and B. K. Soni and N. P. Weatherill", publisher = {CRC Press}, address = "Boca Raton, FL", year = "1999", chapter = 1, keywords = {[finite element analysis; grid generation; overview]}, } ----------------------------------------------------------------------------- 12. Look at the following comparison paper on quadtrees and R-trees as implemented in Oracle Spatial (Koth02,Kant99b). Examine the type of data that they used and the type of quadtrees. It seems that they use MX quadtrees for polygons. The quadtree implementation is actually a linear quadtree with various levels of detail for each region (Bada99,Koth01). In other words, the region is represented by the set of blocks that it spans. The finer the coverage, the more blocks are used. Try with a PMR quadtree for polygons or the line segments that form their boundaries. Also try a PK tree implementation using a PMR quadtree or PR quadtree. Perhaps a better comparison would be using k-d tree variants of these structures. This project will be done in consultation with Frantisek Brabec (brabec@umiacs) who is familiar with getting ORACLE to run on our system. @InProceedings{Kant99b, folder = {KOTHURI/ORACLE}, author = {K. V. R. Kanth and S. Ravada and J. Sharma and J. Banerjee}, title = {Indexing medium-dimensionality data in oracle}, booktitle = "Proceedings of the {ACM SIGMOD} Conference", year = 1999, address = {Philadelphia}, month = jun, pages = {521--522}, abstract = {In this paper, we describe the implementation of a new index type, called the R-tree, to support medium- dimensionality data using the extensible indexing framework of Oracle8i.}, keywords = {[]} } @InProceedings{Koth02, folder = {KOTHURI/ORACLE}, author = {R. K. V. Kothuri and S. Ravada and D. Abugov}, title = {Quadtree and R-tree indexes in oracle spatial: a comparison using GIS data}, booktitle = "Proceedings of the {ACM SIGMOD} Conference", year = 2002, address = {Madison, WI}, month = jun, pages = {546--557}, abstract = {Comparison of quadtrees and R-trees as implemented in Oracle spatial.}, keywords = {[quadtrees' R-trees; comparison]} } @InProceedings{Bada99, author = {W.M. Badawy and W.G. Aref}, title = {On local heuristics to speed up polygon-polygon intersection tests}, booktitle = "Proceedings of the 7th International Symposium on Advances in Geographic Information Systems", year = 1999, address = {Kansas City, MO}, month = nov, editor = {Claudia Bauzer Medeiros}, pages = {97--102}, abstract = {The polygon-polygon intersection operation is CPU-intensive. Many data structures look into decomposing the polygons into multiple yet simple pieces to speed up the polygon-polygon intersection operation. This paper addresses local heuristics that can be adopted in these data structures by using local information about the simple polygon pieces to decide upon polygon-polygon intersections without having to perform this costly operation. The significance and effectiveness of each of the heuristics is studied. The paper also shows how these heuristics can be put together to perform a polygon join operation. Experiments are given to demonstrate the savings both in CPU and in I/O that result from these local heuristics.}, keywords = {[Spatial databases, polygon-polygon intersection, query processing]}, } @InProceedings{Koth01, folder = {KOTHURI/ORACLE}, author = {R. K. Kothuri and S. Ravada}, title = {Efficient processing of large spatial queries using interior approximations}, booktitle = "Advances in Spatial and Temporal Databases --- Seventh International Symposium, {SSTD}'01", editor = {C. S. Jensen and M. Schneider and B. Seeger and V. J. Tsotras}, year = 2001, address = {Redondo Beach, CA}, month = jul, macronote = lncs2121, pages = {404--421}, keywords = {[regions; quadtrees]} } ----------------------------------------------------------------------------- 13. The SAND (denoting Spatial And Nonspatial Database) System stores the spatial data in a spatial data structures (multiple structures are supported) while non-spatial data are stored in B-trees. The commands are expressed with a graphical user interface which translates them into SAND-Tcl (SAND scripting language) commands which are interpreted using a specially-developed relational database. Modify the existing SAND kernel so that non-spatial Sand-Tcl operations are translated into SQL commands that are passed to an SQL database for execution. Use 'mysql' (www.mysql.com) which is a popular free SQL database available on our Linux box. Also, do the same for the ORACLE spatial data option. This project will be done in consultation with Frantisek Brabec (brabec@umiacs) who is familiar with getting mysql to run on our system. @Article{Espe02, folder = {ESPERANCA}, author = {C. Esperan\c{c}a and H. Samet}, title = {Experience with {SAND/Tcl}: a scripting tool for spatial databases}, journal = "Journal of Visual Languages and Computing", year = 2002, month = apr, volume = 13, number = 2, pages = {229--255}, abstract = {The use of scripting makes it possible to overcome many important difficulties in the development of database applications. By extending a general-purpose scripting language with constructs derived both from the database kernel and from the intended application domain, issues such as query processing and user interfacing can be approached in an economical and flexible way. This is illustrated by describing our experience with {\em SAND-Tcl}, a scripting tool developed by us for building spatial database applications. SAND-Tcl is an extension of the Tcl embedded scripting language with the constructs of the SAND environment for developing applications involving both spatial and non-spatial data. SAND-Tcl acts as a ``glue'' to hold together all the subsystems of SAND. In fact, query evaluation plans are SAND-Tcl programs (or {\em scripts}) which are written on-the-fly by SAND in response to a query defined by the user. This permits the rapid prototyping of algorithms and makes SAND a useful tool both for applications and research. The focus is on data storage, retrieval operations, and spatial indexing. Implementations of operations such as spatial selection, ranking, and spatial join are given. In addition, tools are described to make possible the construction of graphical user interfaces to a spatial database as well as providing users the ability to view and interact with spatial objects in a graphical manner. This is achieved through the use of SAND-Tcl scripts and the Tk graphical user interface toolkit which is tightly coupled to Tcl.}, keywords = {[spatial databases; user interfaces; scripting]} } ----------------------------------------------------------------------------- 14. Get a copy of the ORACLE spatial data option which is available on my linux machines and compare with the SAND System which is accessed via the SAND Spatial Browser. Study what kind of spatial indexes they use and evaluate them. This means that you need to prepare multiple data sets of various types of data and import them into both systems. Develop benchmark test and perform thorough performance testing of both systems. Test SAND with multiple underlying data structures (R-tree, PK-tree, PMR-tree, ...) and their parameters (e.g. bucket size, thresholds, etc). Provide conclusions and suggest the best performing SAND structures and their parameters. In order to use the ORACLE database for the same data, you may need to develop some form of translating from the SAND-Tcl commands to a form of SQL which is compatible with ORACLE. You may want to adapt the results of Project 13 for mysql. This project will be done in consultation with Frantisek Brabec (brabec@umiacs) who is familiar with getting ORACLE to run on our system. ----------------------------------------------------------------------------- 15. For large enough datasets, zooming out beyond a certain point is impractical as drawing all the data takes long time and may completely fill the viewing area. Several layers representing different levels of detail are needed. Generalization is one method of providing such varying levels of detail. Investigate the generalization problem and the proposed solutions, and implement a tool that given a SAND data set will generate another data set that contains fewer elements (the reduction rate is an input parameter) and/or these elements are simplified (e.g., a complex polygon with hundreds of vertices will become a simpler with perhaps tens of vertices), smaller objects can be merged into one, etc. The new dataset needs to remain visually similar to the original one. This should be incorporated into the SAND Internet Browser. This project will be done in consultation with Frantisek Brabec (brabec@umiacs). ----------------------------------------------------------------------------- 16. The SAND Tcl browser loads all the spatial data in the relation into the main memory. This can result in running out of memory when the relation is sufficiently large. Modify the Tcl client memory management so that the viewer only stores limited amount of information in the memory and loads additional data from the SAND kernel when needed while possibly dropping data no longer needed. Thus the project is to provide memory caching for SAND Tcl. This project will be done in consultation with Frantisek Brabec (brabec@umiacs). ----------------------------------------------------------------------------- 17. The SAND Spatial Browser (Java app) allows users on Java-supporting platforms view and operate on spatial datasets stored on a remote server. The Java application downloads spatial data from the server when a certain view is requested by the user. This data is then cached on the client in case the same data is viewed again. Old data is dropped from the main memory when the memory is running low. For this project, you are to modify the client-based data management so that not only the main memory but also the local disk is used to store data. This can be beneficial not just to cache more data locally within a single session but also to quickly restart the client next time by tapping the local data before contacting the server. Use both memory and disk caching for maximum performance. This project will be done in consultation with Frantisek Brabec (brabec@umiacs). ----------------------------------------------------------------------------- 18. Study depth-first k-nearest neighbor finding with the M-tree by using pruning Rules 1-5 of Fukunaga/Narendra. Kamgar-Parsi/Kanal, and Samet. It seems that the original presentation of the M-tree only makes use of pruning Rule 1. The idea is to see if these additional pruning rules make a difference. @inproceedings{Ciac97, folder = {BURKHARD/KELLER/METRIC}, author = {P. Ciaccia and M. Patella and P. Zezula}, title = {M-tree: An efficient access method for similarity search in metric spaces}, booktitle = "Proceedings of the 23rd International Conference on Very Large Data Bases ({VLDB})", editor = "M. Jarke and M. J. Carey and K. R. Dittrich and F. H. Lochovsky and P. Loucopoulos and M. A. Jeusfeld", year = 1997, address = "Athens, Greece", month = aug, pages = {426--435}, keywords = {[metric spaces; search tree]}, abstract = {A new access method, called M-tree, is proposed to organize and search large data sets from a generic ``metric space'', i.e. where object proximity is only defined by a distance function satisfying the positivity, symmetry, and triangle inequality postulates.} } @article{Fuku75, folder = "BURKHARD/KELLER/METRIC", author = "K. Fukunaga and P. M. Narendra", title = "A Branch and Bound Algorithm for Computing $k$-Nearest Neighbors", journal = "IEEE Transactions on Computers", volume = 24, number = 7, pages = "750--753", month = jul, year = 1975, keywords = "[metric space; points; pattern recognition; feature points]" } @article{Kamg85, folder = "BURKHARD/KELLER/METRIC", author = "B. Kamgar-Parsi and L. N. Kanal", title = "An improved branch and bound algorithm for computing $k$-nearest neighbors", journal = prl, volume = 3, number = 1, month = jan, year = 1985, keywords = "[points; nearest neighbor]" } ----------------------------------------------------------------------------- 19. Compare the general incremental nearest neighbor algorithm of Hjaltason and Samet (Hjal99b,Hjal00b) with the fully-enhanced k-nearest neighbor algorithm of Fukunaga/Narendra, Kamgar-Parsi, and Samet for k-nearest neighbor finding both in a vector space and in an arbitrary metric space. @Article{Hjal99b, folder = "HJALTASON?", author = {G. R. Hjaltason and H. Samet}, title = {Distance Browsing in Spatial Databases}, journal = acmtods, year = 1999, month = jun, volume = 24, number = 2, pages = {265--318}, note = {Also Computer Science TR-3919, University of Maryland, College Park, MD}, abstract = {Two different techniques of browsing through a collection of spatial objects stored in an R-tree spatial data structure on the basis of their distances from an arbitrary spatial query object are compared. The conventional approach is one that makes use of a $k$-nearest neighbor algorithm where $k$ is known prior to the invocation of the algorithm. Thus if $m > k$ neighbors are needed, the $k$-nearest neighbor algorithm needs to be reinvoked for $m$ neighbors, thereby possibly performing some redundant computations. The second approach is incremental in the sense that having obtained the $k$ nearest neighbors, the $k+1^{\it st}$ neighbor can be obtained without having to calculate the $k+1$ nearest neighbors from scratch. The incremental approach finds use when processing complex queries where one of the conditions involves spatial proximity (e.g., the nearest city to Chicago with population greater than a million), in which case a query engine can make use of a pipelined strategy. A general incremental nearest neighbor algorithm is presented that is applicable to a large class of hierarchical spatial data structures. This algorithm is adapted to the R-tree and its performance is compared to an existing $k$-nearest neighbor algorithm for R-trees~\cite{Rous95}. Experiments show that the incremental nearest neighbor algorithm significantly outperforms the $k$-nearest neighbor algorithm for distance browsing queries in a spatial database that uses the R-tree as a spatial index. Moreover, the incremental nearest neighbor algorithm also usually outperforms the $k$-nearest neighbor algorithm when applied to the $k$-nearest neighbor problem for the R-tree, although the improvement is not nearly as large as for distance browsing queries.}, keywords = {[incremental nearest neighbor algorithm]} } @techreport{Hjal00b, folder = {HJALTASON?}, author = {G. R. Hjaltason and H. Samet}, title = {Incremental similarity search in multimedia databases}, institution = {University of Maryland}, address = "College Park, MD", number = {TR-4199}, type = {Computer Science Department}, month = nov, year = 2000, abstract = {}, keywords = {[points; high\hyp{}dimensional data; similarity searching; incremental nearest neighbor]} } @article{Fuku75, folder = "BURKHARD/KELLER/METRIC", author = "K. Fukunaga and P. M. Narendra", title = "A Branch and Bound Algorithm for Computing $k$-Nearest Neighbors", journal = "IEEE Transactions on Computers", volume = 24, number = 7, pages = "750--753", month = jul, year = 1975, keywords = "[metric space; points; pattern recognition; feature points]" } @article{Kamg85, folder = "BURKHARD/KELLER/METRIC", author = "B. Kamgar-Parsi and L. N. Kanal", title = "An improved branch and bound algorithm for computing $k$-nearest neighbors", journal = prl, volume = 3, number = 1, month = jan, year = 1985, keywords = "[points; nearest neighbor]" } ----------------------------------------------------------------------------- 20. Investigate the use of quadtree-like data structures for VLSI applications. You need to be well-versed in VLSI to do this project as the idea is to draw on your experience in the industry and try to adapt some of the techniques that we will discuss in class. ----------------------------------------------------------------------------- 21. Look at the game programming literature and try to find an application where use of quadtrees would be beneficial and implement it. The idea is to see if some performance could be enhanced with these modifications. This will require you to get up to speed on what types of operations must be executed quickly in the games and how spatial sorting may help. To get some background, you may want to consult the series of three books called "Game Programming Gems" published by Charles River Media. ----------------------------------------------------------------------------- 22. Investigate computing the Hausdorff distance between two sets using spatial data structures. Some information about the Hausdorff distance can be found at: http://www.cs.cornell.edu/vision/hausdorff/hausdist.html In essence, this is the farthest nearest neighbor! Given sets $A$ and $B$, we find the nearest neighbor in $B$ of each element of $a$ in $A$. We then take the distance to the farthest nearest neighbor. Call this $h(A,B)$ where: $h(A,B)=\max_{a \in A} \min_{b \in B} d(a,b)$. $h(A,B)$ is called the {\em directed Hausdorff distance}. It is not symmetric and thus is not a metric. Next, repeat the computation for the nearest neighbor in $A$ of each element $b$ in $B$. Again, we take the distance to the farthest neighbor. We call this $h(B,A)$ where $h(B,A)=\max_{b \in B} \min_{a \in A} d(b,a)$. The {\em Hausdorff distance} $H(A,B)$ is the maximum of $h(A,B)$ and $h(B,A)$ --- that is, $H(A,B) = \max (h(A,B),h(B,A))$. Notice that $h(A,B)$ is not symmetric. The Hausdorff distance is used to measure the degree of mismatch between two sets. In essence, if the Hausdorff distance between $A$ and $B$ is $\delta$, then every point of $A$ must be within distance $\delta$ of at least one point of $B$ and vice versa. The Hausdorff distance is a metric over all closed, bounded sets --- that is, it is non-negative, symmetric, and obeys the triangle inequality. The directed Hausdorff distance is useful in that given $h(A,B) \leq \delta$, then we know that if we compute the expansion of $B$ by $\delta$ (termed the Minkowski sum of $B$ and $\delta$) yielding $B'$, then we are guaranteed that every point in $A$ is in the expanded region $B'$ --- that is, $A \subset B'$. The first value returned by the incremental distance semi-join algorithm is $C(A,B) = \min_{a \in A} \min_{b \in B} d(a,b)$. This is the closest pair. It is a metric. Thus if we compute the expansion of $A$ ($B$) by $\delta = C(A,B)$ yielding $A'$ ($B'$), then we are guaranteed to have $A'$ and $B$ ($B'$ and $A$) as disjoint. There are a number of applications for the Hausdorff distance. For example, it can be used for buffer algorithms and to do matching. In computer graphics you often have simplification algorithms which take a triangular mesh that approximates a surface and remove some triangles and retriangulate, etc. This is done iteratively. You then may want to match the results of two simplifications of the same surface or even the original surface against the result of simplifying it. You want to know how bad is the error. The Hausdorff distance give you a measure of this error. As we see, the Hausdorff distance is expensive to compute. Perhaps given an appropriate spatial data structure for storing a surface, we can somehow use the incremental nearest neighbor algorithm to compute such error measures more quickly. One approach is to attach bounding box hierarchies to the simplifications. This type of work finds application in Visualization and Computer Graphics. As we have mentioned, the Hausdorff distance is somewhat related to the distance join and distance semi-join described by Hjaltason and Samet {Hjal98}. It would be interesting if some of the algorithms used for these two operations would be applicable here as well. @InProceedings{Hjal98, folder = "HJALTASON", author = {G. R. Hjaltason and H. Samet}, title = {Incremental distance join algorithms for spatial databases}, booktitle = "Proceedings of the {ACM SIGMOD} Conference", editor = {L. Haas and A. Tiwary}, year = 1998, address = {Seattle, WA}, month = jun, pages = {237--248}, abstract = {Two new spatial join operations, distance join and distance semi-join, are introduced where the join output is ordered by the distance between the spatial attribute values of the joined tuples. Incremental algorithms are presented for computing these operations, which can be used in a pipelined fashion, thereby obviating the need to wait for their completing when only a few tuples are needed.}, keywords = {[spatial join; incremental nearest neighbor; incremental algorithms; distance join]} } Another mention of its use is in the following paper on solid modeling where the metric is based on the points inside of a solid instead of on the surface. Such metrics are called {\em solid metrics}. @article{Andu02, folder = "BRUNET / NAVAZO", author = {C. And/'{u}jar and P. Brunet and D. Ayala}, title = {Topology-reducing surface simplification using a discrete solid representation}, journal = {{ACM} Transactions on Graphics}, volume = {21}, number = {2}, month = apr, year = {2002}, pages = {88--105}, abstract = {Topology-reducing surface simplification using octree models. Uses a Hausdorff distance metric for interior of solids.} keywords = {[volumes; surfaces; surface simplification]} } ----------------------------------------------------------------------------- 23. Try to incorporate distance-based indexes into the SAND kernel. This includes variants of metric trees such as M-trees, vp-trees, mvp-trees, and GNAT. This is somewhat related to the project on developing JAVA applets for such methods. @article{Uhlm91b, folder = "BURKHARD/KELLER/METRIC", author = "J. K. Uhlmann", title = "Metric trees", journal = "Applied Mathematics Letters", volume = 4, number = 5, pages = "61--62", year = 1991, abstract = {A new multidimensional search structure is described that is able to exploit metric information to efficiently satisfy a large class of proximity queries}, keywords = {[metric space; search trees; proximity queries]} } @article{Uhlm91c, folder = "BURKHARD/KELLER/METRIC", author = "J. K. Uhlmann", title = "Satisfying general proximity/similarity queries with metric trees", journal = ipl, year = 1991, volume = 40, number = 4, pages = "175--179", month = nov, abstract = {A new multidimensional search structure is described that is able to exploit metric information to efficiently satisfy a large class of proximity queries}, keywords = {[metric space; search trees; proximity queries; vp-trees]} } @inproceedings{Ciac97, folder = {BURKHARD/KELLER/METRIC}, author = {P. Ciaccia and M. Patella and P. Zezula}, title = {M-tree: An efficient access method for similarity search in metric spaces}, booktitle = "Proceedings of the 23rd International Conference on Very Large Data Bases ({VLDB})", editor = "M. Jarke and M. J. Carey and K. R. Dittrich and F. H. Lochovsky and P. Loucopoulos and M. A. Jeusfeld", year = 1997, address = "Athens, Greece", month = aug, pages = {426--435}, keywords = {[metric spaces; search tree]}, abstract = {A new access method, called M-tree, is proposed to organize and search large data sets from a generic ``metric space'', i.e. where object proximity is only defined by a distance function satisfying the positivity, symmetry, and triangle inequality postulates.} } ----------------------------------------------------------------------------- 24. Adapt the BV-tree to make it look like an R-tree. In particular, use object aggregation with the non-intersection property. This analogy is not quite perfect as the resulting BV-tree analog is NOT balanced. It is based on the principle of non-intersection. You will need to read the description of the BV-tree in the new text of Samet which will be made available to you. The basic idea is to build an R-tree with the principle that brothers may overlap but their boundaries do not intersect. The idea is to see if some of the principle used to construct an R-tree (e.g., split a node upon overflow) can also be used here. @inproceedings{Free95, folder = "FREESTON", key = "[Free95]", author = "M. Freeston", title = "A general solution of the n-dimensional B-tree problem", pages = "80--91", booktitle = acmsigmod95, address = "San Jose, CA", month = may, year = 1995, abstract = "We present a generic solution to a problem which lies at the heart of the unpredictable worst-case performance characteristics of a wide class of multi-dimensional index designs: those which employ a recursive partitioning of the data space. We then show how this solution can produce modified designs with fully predictable and controllable worst-case characteristics. In particular, we show how the recursive partitioning of an n-dimensional data space can be represented in such a way that the characteristics of the one-dimensional B-tree are preserved in n dimensions, as far as is topologically possible, i.e. a representation guaranteeing logarithmic access and update time, while also guaranteeing a one-third minimum occupancy of both data and index nodes.", keywords = "[points; k-d-b-trees; BV-tree]" } ----------------------------------------------------------------------------- 25. Look at the work with Andre Folkers on pictorial query specification and try to develop Fourier descriptors based on interiors of the objects instead of just their contours. @InProceedings{Folk02, folder = {FOLKERS}, author = {A. Folkers and H. Samet}, title = {Content-based Image Retrieval Using Fourier Descriptors on a Logo Database}, booktitle = "Proceedings of the 16th International Conference on Pattern Recognition", editor = {R. Kasturi and D.Laurendau and C.Suen}, year = 2002, address = {Quebec City, Canada}, month = aug, volume = 3, pages = {521--524}, abstract = {A system that enables the pictorial specification of queries in an image database is described. The queries are comprised of rectangle, polygon, ellipse, and B-spline shapes. The queries specify which shapes should appear in the target image as well as spatial constraints on the distance between them and their relative position. The retrieval process makes use of an abstraction of the contour of the shape which is invariant against translation, scale, rotation, and starting point that is based on the use of Fourier descriptors. These abstractions are used in a system to locate logos in an image database. The utility of this approach is illustrated using some sample queries.}, keywords = {[image databases; shape; Fourier descriptors]} } ----------------------------------------------------------------------------- 26. Look at the FBW approximation work with Andrzej Kochut and try to incorporate an idea proposed by Larry Davis idea for handling gray scale images. In this case, various threshold values are applied so that diferent color ranges are transmitted. One issue is that we can always break up $2^n$ gray levels into $n$ binary bits and transmit the FBW/FWB quadtree (octree) representation for each of the binary bits. Using the Kawaguchi (Kawa83) of encoding the various bit planes with Gray codes may reduce the storage requirements further. Of course, when we apply the different threshold values we must be careful not to use more than $n$ steps. @article{Same85c, author = {H. Samet}, title = {Data structures for quadtree approximation and compression}, journal = {Communications of the {ACM}}, volume = {28}, number = {9}, month = sep, year = {1985}, pages = {973--993}, keywords = {[regions]}, sametref = {A.8 A.8.2.2} } @article{Kawa83, folder = "KAWAGUCHI / DF-EXPRESSIONS", author = "E. Kawaguchi and T. Endo and J. Matsunaga", title = "Depth-first picture expression viewed from digital picture processing", journal = pami, volume = 5, number = 4, pages = "373--384", month = jul, year = 1983, abstract = "A picture coding strategy titled DF-expression is now studied from another point of view.(In reference to picture processing algorithm on the coded form). Our conclusion with this point is that the gray code is the best binary code system for DF-expression in information preserving sense.", keywords = "[regions; binary picture; data compaction; hierarchical picture representation; image processing; logical operation on picture; picture coding and decoding]", sametref = "D.1.2 A.1.3 A.2.2 A.6.3 A.8" } ----------------------------------------------------------------------------- 27. Can you apply the work of Maneewongvatana and Mount for approximate nearest neighbors to finding probabily approximately correct nearest neighbors for applications in databases. @InProceedings{Mane01a, folder = {MANEEWONGVATANA?}, author = "S. Maneewongvatana and D. M. Mount", title = "The analysis of a probabilistic approach to nearest neighbor searching", booktitle = {Proceedings of the 2001 Workshop on Algorithms and Data Structures ({WADS}'01)}, editor = {}, address = {Providence, RI}, year = 2001, month = aug, macronote = lncs2125, pages = "", keywords = {[points; approximate nearest neighbor searching; high\hyp{}dimensional data; Voronoi diagrams]}, note = {To appear}, } @InProceedings{Mane01c, folder = {MANEEWONGVATANA?}, author = "S. Maneewongvatana and D. M. Mount", title = "An empirical study of a new approach to nearest neighbor searching", booktitle = {Third Workshop on Algorithm Engineering and Experimentation ({ALENEX}'01)}, month = jan, year = 2001, address = {Washington, DC}, macronote = lncs2153, pages = {172--187}, url = "ftp.cs.umd.edu/pub/faculty/mount/Papers/alenex01.ps.gz", keywords = {[points; approximate nearest neighbor searchng; high\hyp{}dimensional data; Voronoi diagrams]} } @PhdThesis{Mane01b, folder = {MANEEWONGVATANA?}, author = "S. Maneewongvatana", title = {Multi-dimensional nearest neighbor searching with low-dimensional data}, school = {Computer Science Department, University of Maryland}, year = 2001, address = {College Park, MD}, abstract = {}, keywords = {[points; approximate nearest neighbor searching; high\hyp{}dimensional data; Voronoi diagrams; k-d trees; sliding-midpoint k-d tree; minimum-ambiguity k-d tree; os-tree; probably-approximately correct nearest neighbor searching]} } @InProceedings{Ciac00, folder = {CIACCIA}, author = {P. Ciaccia and M. Patella}, title = {{PAC} nearest neighbor queries: approximate and controlled search in high\hyp{}dimensional and metric spaces}, booktitle = "Proceedings of the 16th {IEEE} International Conference on Data Engineering", year = 2000, address = {San Diego, CA}, month = feb, pages = {244--255}, keywords = {[points; high\hyp{}dimensional data; incremental nearest neighbors; approximate nearest neighbors]} } ----------------------------------------------------------------------------- 28. Find some enhancement of the pictorial query tree work of Samet, Soffer, and Zotkin. @Article{Soff98a, folder = {PICTORIAL QUERY SPECIFICATION SOFFER/SAMET}, author = {A. Soffer and H. Samet}, title = {Pictorial query specification for browsing through spatially referenced image databases}, journal = jvlc, year = 1998, month = dec, volume = 9, number = 6, pages = {567--596}, abstract = {A pictorial query specification technique that enables the formulation of complex pictorial queries for browsing through a collection of spatially referenced images is presented.}, note = {Also an abbreviated version in {\em Proceedings of the Second International Conference on Visual Information Systems (VISUAL97)}, pages 117--124, San Diego, December 1997}, keywords = {[SAMET]} } @InProceedings{Soff98c, folder = {SOFFER}, author = {A. Soffer and H. Samet and D. Zotkin}, title = {Pictorial query trees for query specification in image databases}, booktitle = "Proceedings of the 14th International Conference on Pattern Recognition", editor = {A. K. Jain and S. Venkathesh and B. C. Lovell}, year = 1998, address = {Brisbane, Australia}, month = aug, volume = 1, pages = {919--921}, abstract = {A technique that enables specifying complex queries in image databases using pictorial query trees is presented. The leaves of a pictorial query tree correspond to individual pictorial queries that specify which objects should appear in the target images as well as how many occurrences of each object are required.}, keywords = {[image databases; pictorial query language]} }