nsf

changeset 120:94284c1ca133

.
author bshanks@bshanks.dyndns.org
date Tue Jul 07 15:47:43 2009 -0700 (16 years ago)
parents e61f822e0375
children 3aeb56c97327
files grant.bib grant.html grant.odt grant.pdf grant.txt grantBody.pdf postdoc.odt refs.pdf
line diff
1.1 --- a/grant.bib Tue Jul 07 14:57:48 2009 -0700 1.2 +++ b/grant.bib Tue Jul 07 15:47:43 2009 -0700 1.3 @@ -58,7 +58,7 @@ 1.4 author = {Carol L. Thompson and Sayan D. Pathak and Andreas Jeromin and Lydia L. Ng and Cameron R. {MacPherson} and Marty T. Mortrud and Allison Cusick and Zackery L. Riley and Susan M. Sunkin and Amy Bernard and Ralph B. Puchalski and Fred H. Gage and Allan R. Jones and Vladimir B. Bajic and Michael J. Hawrylycz and Ed S. Lein}, 1.5 month = dec, 1.6 year = {2008}, 1.7 - keywords = {{MOLNEURO,SYSBIO,SYSNEURO}}, 1.8 + keywords = {{MOLNEURO,} {SYSBIO,} {SYSNEURO}}, 1.9 pages = {1010--1021} 1.10 }, 1.11 1.12 @@ -66,6 +66,7 @@ 1.13 address = {San Diego, California}, 1.14 title = {{WikiGateway:} a library for interoperability and accelerated wiki development}, 1.15 isbn = {1-59593-111-2}, 1.16 + shorttitle = {{WikiGateway}}, 1.17 url = {http://portal.acm.org/citation.cfm?id=1104973.1104979}, 1.18 doi = {10.1145/1104973.1104979}, 1.19 abstract = {{WikiGateway} is an open-source suite of tools for automated interaction with wikis:• Python and Perl modules with functions like {getPage,} {putPage,} {getRecentChanges,} and more.• A mechanism to add {DAV,} Atom, or {XMLRPC} capabilities to any supported wiki server.• A command-line tool with functionality similar to the Perl and Python modules.• Demo applications built on top of these tools include a wiki copy command, a spam-cleaning bot, and a tool to recursively upload text files inside a directory structure as wiki {pages.All} {WikiGateway} tools are compatible with a number of different wiki engines. Developers can use {WikiGateway} to hide the differences between wiki engines and build applications which interoperate with many different wiki engines.}, 1.20 @@ -73,7 +74,7 @@ 1.21 publisher = {{ACM}}, 1.22 author = {Bayle Shanks}, 1.23 year = {2005}, 1.24 - keywords = {atom,client-side wiki,interoperability,interwiki,middleware,webdav,wiki,wikiclient,wikigateway,wikirpcinterface,wiki xmlrpc}, 1.25 + keywords = {atom, client-side wiki, interoperability, interwiki, middleware, webdav, wiki, wikiclient, wikigateway, wikirpcinterface, wiki xmlrpc}, 1.26 pages = {53--66} 1.27 }, 1.28 1.29 @@ -105,7 +106,7 @@ 1.30 author = {D C Van Essen and H A Drury and J Dickson and J Harwell and D Hanlon and C H Anderson}, 1.31 year = {2001}, 1.32 note = {{PMID:} 11522765}, 1.33 - keywords = {Anatomy, {Artistic,Anatomy,} {Cross-Sectional,Brain,Cerebral} {Cortex,Databases,} {Factual,Humans,Image} Processing, {Computer-Assisted,Magnetic} Resonance {Imaging,Medical} {Illustration,Neuroanatomy,Software,Systems} Integration}, 1.34 + keywords = {Anatomy, Artistic, Anatomy, {Cross-Sectional,} Brain, Cerebral Cortex, Databases, Factual, Humans, Image Processing, {Computer-Assisted,} Magnetic Resonance Imaging, Medical Illustration, Neuroanatomy, Software, Systems Integration}, 1.35 pages = {443--59} 1.36 }, 1.37 1.38 @@ -116,7 +117,7 @@ 1.39 author = {Sören Sonnenburg and Gunnar Raetsch and Christin Schaefer and Bernhard Schölkopf}, 1.40 year = {2006}, 1.41 note = {While classical kernel-based learning algorithms are based on a single kernel, in practice it is often desirable to use multiple kernels. Lanckriet et al. (2004) considered conic combinations of kernel matrices for classification, leading to a convex quadratically constrained quadratic program. We show that it can be rewritten as a semi-infinite linear program that can be efficiently solved by recycling the standard {SVM} implementations. Moreover, we generalize the formulation and our method to a larger class of problems, including regression and one-class classification. Experimental results show that the proposed algorithm works for hundred thousands of examples or hundreds of kernels to be combined, and helps for automatic model selection, improving the interpretability of the learning result. In a second part we discuss general speed up mechanism for {SVMs,} especially when used with sparse feature maps as appear for string kernels, allowing us to train a string kernel {SVM} on a 10 million real-world splice data set from computational biology. We integrated multiple kernel learning in our machine learning toolbox {SHOGUN} for which the source code is publicly available at http://www.fml.tuebingen.mpg.de/raetsch/projects/shogun.}, 1.42 - keywords = {{Learning/Statistics} \& {Optimisation,Multimodal} {Integration,Theory} \& Algorithms}, 1.43 + keywords = {{Learning/Statistics} \& Optimisation, Multimodal Integration, Theory \& Algorithms}, 1.44 howpublished = {http://eprints.pascal-network.org/archive/00003035/} 1.45 }, 1.46 1.47 @@ -124,6 +125,7 @@ 1.48 edition = {3}, 1.49 title = {Brain Maps: Structure of the Rat Brain}, 1.50 isbn = {0126105820}, 1.51 + shorttitle = {Brain Maps}, 1.52 publisher = {Academic Press}, 1.53 author = {Larry Swanson}, 1.54 month = nov, 1.55 @@ -153,7 +155,7 @@ 1.56 month = dec, 1.57 year = {2002}, 1.58 note = {{PMID:} 12466850}, 1.59 - keywords = {{Animals,Base} {Composition,Chromosomes,} {Mammalian,Conserved} {Sequence,CpG} {Islands,Evolution,} {Molecular,Gene} Expression {Regulation,Genes,Genetic} {Variation,Genome,Genome,} {Human,Genomics,Humans,Mice,Mice,} {Knockout,Mice,} {Transgenic,Models,} {Animal,Multigene} {Family,Mutagenesis,Neoplasms,Physical} Chromosome {Mapping,Proteome,Pseudogenes,Quantitative} Trait {Loci,Repetitive} Sequences, Nucleic {Acid,RNA,} {Untranslated,Selection} {(Genetics),Sequence} Analysis, {DNA,Sex} {Chromosomes,Species} {Specificity,Synteny}}, 1.60 + keywords = {Animals, Base Composition, Chromosomes, Mammalian, Conserved Sequence, {CpG} Islands, Evolution, Molecular, Gene Expression Regulation, Genes, Genetic Variation, Genome, Genome, Human, Genomics, Humans, Mice, Mice, Knockout, Mice, Transgenic, Models, Animal, Multigene Family, Mutagenesis, Neoplasms, Physical Chromosome Mapping, Proteome, Pseudogenes, Quantitative Trait Loci, Repetitive Sequences, Nucleic Acid, {RNA,} Untranslated, Selection {(Genetics),} Sequence Analysis, {DNA,} Sex Chromosomes, Species Specificity, Synteny}, 1.61 pages = {520--62} 1.62 }, 1.63 1.64 @@ -188,6 +190,7 @@ 1.65 @article{visel_genepaint.org:atlas_2004, 1.66 title = {{GenePaint.org:} an atlas of gene expression patterns in the mouse embryo}, 1.67 volume = {32}, 1.68 + shorttitle = {{GenePaint.org}}, 1.69 url = {http://nar.oxfordjournals.org/cgi/content/abstract/32/suppl_1/D552}, 1.70 doi = {10.1093/nar/gkh029}, 1.71 abstract = {High-throughput instruments were recently developed to determine gene expression patterns on tissue sections by {RNA} in situ hybridization. The resulting images of gene expression patterns, chiefly of E14.5 mouse embryos, are accessible to the public at http://www.genepaint.org. This relational database is searchable for gene identifiers and {RNA} probe sequences. Moreover, patterns and intensity of expression in [{\textasciitilde}]100 different embryonic tissues are annotated and can be searched using a standardized catalog of anatomical structures. A virtual microscope tool, the Zoom Image Server, was implemented in {GenePaint.org} and permits interactive zooming and panning across [{\textasciitilde}]15 000 high-resolution images.}, 1.72 @@ -201,6 +204,7 @@ 1.73 @article{magdaleno_bgem:in_2006, 1.74 title = {{BGEM:} An In Situ Hybridization Database of Gene Expression in the Embryonic and Adult Mouse Nervous System}, 1.75 volume = {4}, 1.76 + shorttitle = {{BGEM}}, 1.77 url = {http://dx.doi.org/10.1371%2Fjournal.pbio.0040086}, 1.78 doi = {10.1371/journal.pbio.0040086}, 1.79 number = {4}, 1.80 @@ -218,13 +222,14 @@ 1.81 booktitle = {Computational Systems Bioinformatics Conference, 2005. Workshops and Poster Abstracts. {IEEE}}, 1.82 author = {J. Carson and T. Ju and C. Thaller and M. Bello and I. Kakadiaris and J. Warren and G. Eichele and W. Chiu}, 1.83 year = {2005}, 1.84 - keywords = {atlas-based segmentation,automate robotic in situ hybridization image annotation,biological techniques,biological tissues,biology {computing,Brain,cell-cell} signaling,cell differentiation,cellular biophysics,cellular resolution,cluster analysis,data {mining,DNA} sequence database,functional genomics,gene expression pattern,genetics,image classification,image segmentation,mesh maps,pattern clustering,postnatal mouse brain,query interface,statistical analysis,tissue}, 1.85 + keywords = {atlas-based segmentation, automate robotic in situ hybridization image annotation, biological techniques, biological tissues, biology computing, Brain, cell-cell signaling, cell differentiation, cellular biophysics, cellular resolution, cluster analysis, data mining, {DNA} sequence database, functional genomics, gene expression pattern, genetics, image classification, image segmentation, mesh maps, pattern clustering, postnatal mouse brain, query interface, statistical analysis, tissue}, 1.86 pages = {358} 1.87 }, 1.88 1.89 @article{venkataraman_emage_2008, 1.90 title = {{EMAGE} Edinburgh Mouse Atlas of Gene Expression: 2008 update}, 1.91 volume = {36}, 1.92 + shorttitle = {{EMAGE} Edinburgh Mouse Atlas of Gene Expression}, 1.93 url = {http://nar.oxfordjournals.org/cgi/content/abstract/36/suppl_1/D860}, 1.94 doi = {10.1093/nar/gkm938}, 1.95 abstract = {{EMAGE} {(http://genex.hgu.mrc.ac.uk/Emage/database)} is a database of in situ gene expression patterns in the developing mouse embryo. Domains of expression from raw data images are spatially integrated into a set of standard {3D} virtual mouse embryos at different stages of development, allowing data interrogation by spatial methods. Sites of expression are also described using an anatomy ontology and data can be queried using text-based methods. Here we describe recent enhancements to {EMAGE} which include advances in spatial search methods including: a refined local spatial similarity search algorithm, a method to allow global spatial comparison of patterns in {EMAGE} and subsequent hierarchical-clustering, and spatial searches across multiple stages of development. In addition, we have extended data access by the introduction of web services and new {HTML-based} search interfaces, which allow access to data that has not yet been spatially annotated. We have also started incorporating full {3D} images of gene expression that have been generated using optical projection tomography {(OPT).}}, 1.96 @@ -235,7 +240,7 @@ 1.97 pages = {D860--865} 1.98 }, 1.99 1.100 -@inbook{hemert_matching_2008, 1.101 +@incollection{hemert_matching_2008, 1.102 series = {Communications in Computer and Information Science}, 1.103 title = {Matching Spatial Regions with Combinations of Interacting Gene Expression Patterns}, 1.104 volume = {13}, 1.105 @@ -252,7 +257,7 @@ 1.106 pages = {347--361} 1.107 }, 1.108 1.109 -@inbook{van_hemert_mining_2007, 1.110 +@incollection{van_hemert_mining_2007, 1.111 series = {Lecture Notes in Computer Science}, 1.112 title = {Mining Spatial Gene Expression Data for Association Rules}, 1.113 volume = {4414/2007}, 1.114 @@ -274,6 +279,7 @@ 1.115 title = {The Zebrafish Information Network: the zebrafish model organism database}, 1.116 volume = {34}, 1.117 issn = {1362-4962}, 1.118 + shorttitle = {The Zebrafish Information Network}, 1.119 url = {http://www.ncbi.nlm.nih.gov/pubmed/16381936}, 1.120 doi = {10.1093/nar/gkj086}, 1.121 abstract = {The Zebrafish Information Network {(ZFIN;} http://zfin.org) is a web based community resource that implements the curation of zebrafish genetic, genomic and developmental data. {ZFIN} provides an integrated representation of mutants, genes, genetic markers, mapping panels, publications and community resources such as meeting announcements and contact information. Recent enhancements to {ZFIN} include (i) comprehensive curation of gene expression data from the literature and from directly submitted data, (ii) increased support and annotation of the genome sequence, (iii) expanded use of ontologies to support curation and query forms, (iv) curation of morpholino data from the literature, and (v) increased versatility of gene pages, with new data types, links and analysis tools.}, 1.122 @@ -282,7 +288,7 @@ 1.123 author = {Judy Sprague and Leyla Bayraktaroglu and Dave Clements and Tom Conlin and David Fashena and Ken Frazer and Melissa Haendel and Douglas G Howe and Prita Mani and Sridhar Ramachandran and Kevin Schaper and Erik Segerdell and Peiran Song and Brock Sprunger and Sierra Taylor and Ceri E Van Slyke and Monte Westerfield}, 1.124 year = {2006}, 1.125 note = {{PMID:} 16381936}, 1.126 - keywords = {{Animals,Databases,} {Genetic,Gene} {Expression,Genomics,Internet,Models,} {Animal,Oligonucleotides,} {Antisense,Systems} {Integration,User-Computer} {Interface,Vocabulary,} {Controlled,Zebrafish,Zebrafish} Proteins}, 1.127 + keywords = {Animals, Databases, Genetic, Gene Expression, Genomics, Internet, Models, Animal, Oligonucleotides, Antisense, Systems Integration, {User-Computer} Interface, Vocabulary, Controlled, Zebrafish, Zebrafish Proteins}, 1.128 pages = {D581--5} 1.129 }, 1.130 1.131 @@ -340,6 +346,7 @@ 1.132 @article{barrett_ncbi_2007, 1.133 title = {{NCBI} {GEO:} mining tens of millions of expression profiles--database and tools update}, 1.134 volume = {35}, 1.135 + shorttitle = {{NCBI} {GEO}}, 1.136 url = {http://nar.oxfordjournals.org/cgi/content/abstract/35/suppl_1/D760}, 1.137 doi = {10.1093/nar/gkl887}, 1.138 abstract = {The Gene Expression Omnibus {(GEO)} repository at the National Center for Biotechnology Information {(NCBI)} archives and freely disseminates microarray and other forms of high-throughput data generated by the scientific community. The database has a minimum information about a microarray experiment {(MIAME)-compliant} infrastructure that captures fully annotated raw and processed data. Several data deposit options and formats are supported, including web forms, spreadsheets, {XML} and Simple Omnibus Format in Text {(SOFT).} In addition to data storage, a collection of user-friendly web-based interfaces and applications are available to help users effectively explore, visualize and download the thousands of experiments and tens of millions of gene expression patterns stored in {GEO.} This paper provides a summary of the {GEO} database structure and user facilities, and describes recent enhancements to database design, performance, submission format options, data query and retrieval utilities. {GEO} is accessible at http://www.ncbi.nlm.nih.gov/geo/}, 1.139 @@ -353,6 +360,7 @@ 1.140 @article{smith_mouse_2007, 1.141 title = {The mouse Gene Expression Database {(GXD):} 2007 update}, 1.142 volume = {35}, 1.143 + shorttitle = {The mouse Gene Expression Database {(GXD)}}, 1.144 url = {http://nar.oxfordjournals.org/cgi/content/abstract/35/suppl_1/D618}, 1.145 doi = {10.1093/nar/gkl1003}, 1.146 abstract = {The Gene Expression Database {(GXD)} provides the scientific community with an extensive and easily searchable database of gene expression information about the mouse. Its primary emphasis is on developmental studies. By integrating different types of expression data, {GXD} aims to provide comprehensive information about expression patterns of transcripts and proteins in wild-type and mutant mice. Integration with the other Mouse Genome Informatics {(MGI)} databases places the gene expression information in the context of genetic, sequence, functional and phenotypic information, enabling valuable insights into the molecular biology that underlies developmental and disease processes. In recent years the utility of {GXD} has been greatly enhanced by a large increase in data content, obtained from the literature and provided by researchers doing large-scale in situ and {cDNA} screens. In addition, we have continued to refine our query and display features to make it easier for users to interrogate the data. {GXD} is available through the {MGI} web site at http://www.informatics.jax.org/ or directly at http://www.informatics.jax.org/menus/expression\_menu.shtml.}, 1.147 @@ -374,7 +382,7 @@ 1.148 journal = {{NeuroImage}}, 1.149 author = {J. Annese and A. Pitiot and I. D. Dinov and A. W. Toga}, 1.150 year = {2004}, 1.151 - keywords = {Cerebral {Cortex,Cortical} {areas,Myelo-architecture}}, 1.152 + keywords = {Cerebral Cortex, Cortical areas, Myelo-architecture}, 1.153 pages = {15--26} 1.154 }, 1.155 1.156 @@ -382,6 +390,7 @@ 1.157 title = {A stereological approach to human cortical architecture: identification and delineation of cortical areas}, 1.158 volume = {20}, 1.159 issn = {0891-0618}, 1.160 + shorttitle = {A stereological approach to human cortical architecture}, 1.161 url = {http://www.sciencedirect.com/science/article/B6T02-43HDYPB-5/2/461101884330ed9e8b29a5f4195a349f}, 1.162 doi = {{10.1016/S0891-0618(00)00076-4}}, 1.163 abstract = {Stereology offers a variety of procedures to analyze quantitatively the regional and laminar organization in cytoarchitectonically defined areas of the human cerebral cortex. Conventional anatomical atlases are of little help in localizing specific cortical areas, since most of them are based on a single brain and use highly observer-dependent criteria for the delineation of cortical areas. In consequence, numerous cortical maps exist which greatly differ with respect to number, position, size and extent of cortical areas. We describe a novel algorithm-based procedure for the delineation of cortical areas, which exploits the automated estimation of volume densities of cortical cell bodies. Spatial sampling of the laminar pattern is performed with density profiles, followed by multivariate analysis of the profiles[`] shape, which locates the cytoarchitectonic borders between neighboring cortical areas at sites where the laminar pattern changes significantly. The borders are then mapped to a human brain atlas system comprising tools for three dimensional reconstruction, visualization and morphometric analysis. A sample of brains with labeled cortical areas is warped into the reference brain of the atlas system in order to generate a population map of the cortical areas, which describes the intersubject variability in spatial conformation of cortical areas. These population maps provide a novel tool for the interpretation of images obtained with functional imaging techniques.}, 1.164 @@ -390,7 +399,7 @@ 1.165 author = {A. Schleicher and K. Amunts and S. Geyer and T. Kowalski and T. Schormann and N. {Palomero-Gallagher} and K. Zilles}, 1.166 month = oct, 1.167 year = {2000}, 1.168 - keywords = {Cerebral {Cortex,Density} {profile,Multivariate} {statistics,Quantitative} {cytoarchitecture,Stereology-brain} mapping}, 1.169 + keywords = {Cerebral Cortex, Density profile, Multivariate statistics, Quantitative cytoarchitecture, Stereology-brain mapping}, 1.170 pages = {31--47} 1.171 }, 1.172 1.173 @@ -406,13 +415,14 @@ 1.174 author = {Oliver Schmitt and Lars Hömke and Lutz Dümbgen}, 1.175 month = may, 1.176 year = {2003}, 1.177 - keywords = {Brain {mapping,Cerebral} {Cortex,Cytoarchitecture,Excess} {mass,Lamination,Multiple} local rank {test,Neuroimaging,Profiles,Trajectories,Transition} {regions,Traverses}}, 1.178 + keywords = {Brain mapping, Cerebral Cortex, Cytoarchitecture, Excess mass, Lamination, Multiple local rank test, Neuroimaging, Profiles, Trajectories, Transition regions, Traverses}, 1.179 pages = {42--63} 1.180 }, 1.181 1.182 @article{schleicher_quantitative_2005, 1.183 title = {Quantitative architectural analysis: a new approach to cortical mapping}, 1.184 volume = {210}, 1.185 + shorttitle = {Quantitative architectural analysis}, 1.186 url = {http://dx.doi.org/10.1007/s00429-005-0028-2}, 1.187 doi = {10.1007/s00429-005-0028-2}, 1.188 abstract = {Abstract Recent progress in anatomical and functional {MRI} has revived the demand for a reliable, topographic map of the human cerebral 1.189 @@ -457,7 +467,7 @@ 1.190 pages = {251--264} 1.191 }, 1.192 1.193 -@inbook{adamson_tracking_2005, 1.194 +@incollection{adamson_tracking_2005, 1.195 series = {Lecture Notes in Computer Science}, 1.196 title = {A Tracking Approach to Parcellation of the Cerebral Cortex}, 1.197 volume = {3749/2005}, 1.198 @@ -485,7 +495,7 @@ 1.199 author = {Christopher J. Paciorek}, 1.200 month = may, 1.201 year = {2007}, 1.202 - keywords = {Bayesian {statistics,Disease} {mapping,Fourier} {basis,Generalized} linear mixed {model,Geostatistics,Risk} {surface,Spatial} {statistics,Spectral} basis}, 1.203 + keywords = {Bayesian statistics, Disease mapping, Fourier basis, Generalized linear mixed model, Geostatistics, Risk surface, Spatial statistics, Spectral basis}, 1.204 pages = {3631--3653} 1.205 }, 1.206 1.207 @@ -507,7 +517,7 @@ 1.208 title = {Home Page of Geoffrey Hinton}, 1.209 url = {http://www.cs.toronto.edu/~hinton/}, 1.210 howpublished = {http://www.cs.toronto.edu/{\textasciitilde}hinton/}, 1.211 - comment = {eep Boltzmann Machines.} 1.212 + annote = {eep Boltzmann Machines.} 1.213 }, 1.214 1.215 @misc{_dbm.pdf_????, 1.216 @@ -526,7 +536,7 @@ 1.217 booktitle = {{AAAI}}, 1.218 author = {C Kemp and {JB} Tenenbaum and {TL} Griffiths and T Yamada and N Ueda}, 1.219 year = {2006}, 1.220 - keywords = {infinite,model,relational} 1.221 + keywords = {infinite, model, relational} 1.222 }, 1.223 1.224 @article{serpico_new_2001, 1.225 @@ -556,6 +566,15 @@ 1.226 journal = {Geoscience and Remote Sensing, {IEEE} Transactions on}, 1.227 author = {{S.B.} Serpico and L. Bruzzone}, 1.228 year = {2001}, 1.229 - keywords = {algorithm,binary string,feature extraction,feature selection,geophysical measurement technique,geophysical signal processing,geophysical techniques,hyperspectral remote sensing,image processing,land surface,multidimensional signal processing,multispectral remote sensing,optical imaging,remote sensing,suboptimal search strategy,terrain mapping}, 1.230 + keywords = {algorithm, binary string, feature extraction, feature selection, geophysical measurement technique, geophysical signal processing, geophysical techniques, hyperspectral remote sensing, image processing, land surface, multidimensional signal processing, multispectral remote sensing, optical imaging, remote sensing, suboptimal search strategy, terrain mapping}, 1.231 pages = {1360--1367} 1.232 +}, 1.233 + 1.234 +@misc{boggs_spectral_2008, 1.235 + title = {Spectral Python}, 1.236 + url = {http://spectralpython.sourceforge.net/}, 1.237 + author = {Thomas Boggs}, 1.238 + month = jul, 1.239 + year = {2008}, 1.240 + howpublished = {http://spectralpython.sourceforge.net/} 1.241 } 1.242 \ No newline at end of file
2.1 --- a/grant.html Tue Jul 07 14:57:48 2009 -0700 2.2 +++ b/grant.html Tue Jul 07 15:47:43 2009 -0700 2.3 @@ -44,25 +44,25 @@ 2.4 Even the questions of how many areas should be recognized in cortex, and what their arrange- 2.5 ment is, are still not completely settled. A proposed division of the cortex into areas is called a 2.6 cortical map. In the rodent, the lack of a single agreed-upon map can be seen by contrasting the 2.7 -recent maps given by Swanson[21] on the one hand, and Paxinos and Franklin[16] on the other. 2.8 +recent maps given by Swanson[22] on the one hand, and Paxinos and Franklin[17] on the other. 2.9 While the maps are certainly very similar in their general arrangement, significant differences re- 2.10 main. 2.11 The Allen Mouse Brain Atlas dataset 2.12 - The Allen Mouse Brain Atlas (ABA) data[13] were produced by doing in-situ hybridization on 2.13 + The Allen Mouse Brain Atlas (ABA) data[14] were produced by doing in-situ hybridization on 2.14 slices of male, 56-day-old C57BL/6J mouse brains. Pictures were taken of the processed slice, 2.15 and these pictures were semi-automatically analyzed to create a digital measurement of gene 2.16 expression levels at each location in each slice. Per slice, cellular spatial resolution is achieved. 2.17 Using this method, a single physical slice can only be used to measure one single gene; many 2.18 different mouse brains were needed in order to measure the expression of many genes. 2.19 - Mus musculus is thought to contain about 22,000 protein-coding genes[26]. The ABA contains 2.20 + Mus musculus is thought to contain about 22,000 protein-coding genes[27]. The ABA contains 2.21 data on about 20,000 genes in sagittal sections, out of which over 4,000 genes are also measured 2.22 in coronal sections. Our dataset is derived from only the coronal subset of the ABA2. An auto- 2.23 mated nonlinear alignment procedure located the 2D data from the various slices in a single 3D 2.24 coordinate system. In the final 3D coordinate system, voxels are cubes with 200 microns on a 2.25 -side. There are 67x41x58 = 159,326 voxels, of which 51,533 are in the brain[15]. For each voxel 2.26 -and each gene, the expression energy[13] within that voxel is made available. 2.27 - The ABA is not the only large public spatial gene expression dataset[8][25][5][14][24][4][23][20][3]. 2.28 -However, with the exception of the ABA, GenePaint[25], and EMAGE[24], most of the other re- 2.29 +side. There are 67x41x58 = 159,326 voxels, of which 51,533 are in the brain[16]. For each voxel 2.30 +and each gene, the expression energy[14] within that voxel is made available. 2.31 + The ABA is not the only large public spatial gene expression dataset[9][26][6][15][25][4][24][21][3]. 2.32 +However, with the exception of the ABA, GenePaint[26], and EMAGE[25], most of the other re- 2.33 sources have not (yet) extracted the expression intensity from the ISH images and registered the 2.34 results into a single 3-D space. 2.35 The remainder of the background section will be divided into three parts, one for each major 2.36 @@ -79,9 +79,9 @@ 2.37 regions may be expressed as a function. The input to this function is a voxel, along with the gene 2.38 expression levels within that voxel; the output is the regional identity of the target voxel, that is, the 2.39 ____________________________________ 2.40 - 2The sagittal data do not cover the entire cortex, and also have greater registration error[15]. Genes were selected 2.41 + 2The sagittal data do not cover the entire cortex, and also have greater registration error[16]. Genes were selected 2.42 by the Allen Institute for coronal sectioning based on, “classes of known neuroscientific interest... or through post hoc 2.43 -identification of a marked non-ubiquitous expression pattern”[15]. 2.44 +identification of a marked non-ubiquitous expression pattern”[16]. 2.45 2 2.46 2.47 region to which the target voxel belongs. We call this function a classifier. In general, the input to 2.48 @@ -255,7 +255,7 @@ 2.49 is selectively underex- 2.50 pressed in area SS. As noted above, the GIS community has developed tools for supervised 2.51 classification and unsupervised clustering in the context of the analysis 2.52 - of hyperspectral imaging data. One tool is Spectral Python6. Spectral 2.53 + of hyperspectral imaging data. One tool is Spectral Python[5]. Spectral 2.54 Python implements various supervised and unsupervised classification 2.55 methods, as well as utility functions for loading, viewing, and saving 2.56 spatial data. Although Spectral Python has feature extraction methods 2.57 @@ -263,36 +263,35 @@ 2.58 new features computed based on the original features, it does not have 2.59 feature selection methods, that is, methods to select a small subset 2.60 out of the original features (although feature selection in hyperspectral 2.61 - imaging has been investigated by others[19]. 2.62 + imaging has been investigated by others[20]. 2.63 There is a substantial body of work on the analysis of gene expression data. Most of this con- 2.64 -cerns gene expression data which are not fundamentally spatial7. Here we review only that work 2.65 +cerns gene expression data which are not fundamentally spatial6. Here we review only that work 2.66 which concerns the automated analysis of spatial gene expression data with respect to anatomy. 2.67 - Relating to Goal 1, GeneAtlas[5] and EMAGE [24] allow the user to construct a search query by 2.68 + Relating to Goal 1, GeneAtlas[6] and EMAGE [25] allow the user to construct a search query by 2.69 demarcating regions and then specifying either the strength of expression or the name of another 2.70 gene or dataset whose expression pattern is to be matched. Neither GeneAtlas nor EMAGE allow 2.71 one to search for combinations of genes that define a region in concert. 2.72 - Relating to Goal 2, EMAGE[24] allows the user to select a dataset from among a large number 2.73 + Relating to Goal 2, EMAGE[25] allows the user to select a dataset from among a large number 2.74 of alternatives, or by running a search query, and then to cluster the genes within that dataset. 2.75 EMAGE clusters via hierarchical complete linkage clustering. 2.76 - [15] describes AGEA, ”Anatomic Gene Expression Atlas”. AGEA has three components. Gene 2.77 + [16] describes AGEA, ”Anatomic Gene Expression Atlas”. AGEA has three components. Gene 2.78 Finder: The user selects a seed voxel and the system (1) chooses a cluster which includes the 2.79 seed voxel, (2) yields a list of genes which are overexpressed in that cluster. Correlation: The user 2.80 selects a seed voxel and the system then shows the user how much correlation there is between 2.81 the gene expression profile of the seed voxel and every other voxel. Clusters: AGEA includes a 2.82 +preset hierarchical clustering of voxels based on a recursive bifurcation algorithm with correlation 2.83 ____________________________________ 2.84 - 6http://spectralpython.sourceforge.net/ 2.85 - 7By “fundamentally spatial” we mean that there is information from a large number of spatial locations indexed by 2.86 + 6By “fundamentally spatial” we mean that there is information from a large number of spatial locations indexed by 2.87 spatial coordinates; not just data which have only a few different locations or which is indexed by anatomical label. 2.88 6 2.89 2.90 -preset hierarchical clustering of voxels based on a recursive bifurcation algorithm with correlation 2.91 as the similarity metric. AGEA has been applied to the cortex. The paper describes interesting 2.92 results on the structure of correlations between voxel gene expression profiles within a handful of 2.93 cortical areas. However, that analysis neither looks for genes marking cortical areas, nor does it 2.94 suggest a cortical map based on gene expression data. Neither of the other components of AGEA 2.95 can be applied to cortical areas; AGEA’s Gene Finder cannot be used to find marker genes for the 2.96 cortical areas; and AGEA’s hierarchical clustering does not produce clusters corresponding to the 2.97 -cortical areas8. 2.98 +cortical areas7. 2.99 2.100 2.101 Figure 3: The top row shows the two 2.102 @@ -303,11 +302,11 @@ 2.103 area AUD, according to gradient sim- 2.104 ilarity. From left to right and top to 2.105 bottom, the genes are Ssr1, Efcbp1, 2.106 -Ptk7, and Aph1a. [6] looks at the mean expression level of genes within 2.107 +Ptk7, and Aph1a. [7] looks at the mean expression level of genes within 2.108 anatomical regions, and applies a Student’s t-test to de- 2.109 termine whether the mean expression level of a gene is 2.110 significantly higher in the target region. This relates to 2.111 - our Goal 1. [6] also clusters genes, relating to our Goal 2.112 + our Goal 1. [7] also clusters genes, relating to our Goal 2.113 2. For each cluster, prototypical spatial expression pat- 2.114 terns were created by averaging the genes in the cluster. 2.115 The prototypes were analyzed manually, without cluster- 2.116 @@ -323,27 +322,27 @@ 2.117 sults). Figures 4, 2, and 3 in the Preliminary Results 2.118 section contain evidence that each of our three choices 2.119 is the right one. 2.120 - [10] describes a technique to find combinations of 2.121 + [11] describes a technique to find combinations of 2.122 marker genes to pick out an anatomical region. They 2.123 use an evolutionary algorithm to evolve logical operators which combine boolean (thresholded) 2.124 images in order to match a target image. They apply their technique for finding combinations of 2.125 marker genes for the purpose of clustering genes around a “seed gene”. 2.126 Relating to our Goal 2, some researchers have attempted to parcellate cortex on the basis of 2.127 -non-gene expression data. For example, [17], [2], [18], and [1] associate spots on the cortex with 2.128 -the radial profile9 of response to some stain ([12] uses MRI), extract features from this profile, and 2.129 +non-gene expression data. For example, [18], [2], [19], and [1] associate spots on the cortex with 2.130 +the radial profile8 of response to some stain ([13] uses MRI), extract features from this profile, and 2.131 then use similarity between surface pixels to cluster. 2.132 - [22] describes an analysis of the anatomy of the hippocampus using the ABA dataset. In 2.133 + [23] describes an analysis of the anatomy of the hippocampus using the ABA dataset. In 2.134 addition to manual analysis, two clustering methods were employed, a modified Non-negative 2.135 Matrix Factorization (NNMF), and a hierarchical bifurcation clustering scheme using correlation as 2.136 +similarity. The paper yielded impressive results, proving the usefulness of computational genomic 2.137 ____________________________________ 2.138 - 8In both cases, the cause is that pairwise correlations between the gene expression of voxels in different areas but 2.139 + 7In both cases, the cause is that pairwise correlations between the gene expression of voxels in different areas but 2.140 the same layer are often stronger than pairwise correlations between the gene expression of voxels in different layers 2.141 but the same area. Therefore, a pairwise voxel correlation clustering algorithm will tend to create clusters representing 2.142 cortical layers, not areas. 2.143 - 9A radial profile is a profile along a line perpendicular to the cortical surface. 2.144 + 8A radial profile is a profile along a line perpendicular to the cortical surface. 2.145 7 2.146 2.147 -similarity. The paper yielded impressive results, proving the usefulness of computational genomic 2.148 anatomy. We have run NNMF on the cortical dataset, and while the results are promising, other 2.149 methods may perform as well or better (see Preliminary Results, Figure 6). 2.150 Comparing previous work with our Goal 1, there has been fruitful work on finding marker genes, 2.151 @@ -379,7 +378,7 @@ 2.152 Allen Brain Atlas raw data, and produce as output all 2.153 numbers and charts found in publications resulting from 2.154 the project. Source code to be released will include ex- 2.155 - tensions to Caret[7], an existing open-source scientific 2.156 + tensions to Caret[8], an existing open-source scientific 2.157 imaging program, and to Spectral Python. Data to be 2.158 released will include the 2-D “flat map” dataset. This 2.159 dataset will be submitted to a machine learning dataset 2.160 @@ -393,9 +392,9 @@ 2.161 area. Finding marker genes will be useful for drug discovery as well as for experimentation be- 2.162 cause marker genes can be used to design interventions which selectively target individual cortical 2.163 areas. 2.164 + The application of the marker gene finding algorithm to the cortex will also support the develop- 2.165 8 2.166 2.167 - The application of the marker gene finding algorithm to the cortex will also support the develop- 2.168 ment of new neuroanatomical methods. In addition to finding markers for each individual cortical 2.169 areas, we will find a small panel of genes that can find many of the areal boundaries at once. 2.170 The method developed in Goal 2 will provide a genoarchitectonic viewpoint that will contribute 2.171 @@ -410,12 +409,12 @@ 2.172 _ 2.173 Preliminary Results 2.174 Format conversion between SEV, MATLAB, NIFTI 2.175 -We have created software to (politely) download all of the SEV files10 from the Allen Institute 2.176 -website. We have also created software to convert between the SEV, MATLAB, and NIFTI file 2.177 -formats, as well as some of Caret’s file formats. 2.178 +We have created software to (politely) download all of the SEV files9 from the Allen Institute web- 2.179 +site. We have also created software to convert between the SEV, MATLAB, and NIFTI file formats, 2.180 +as well as some of Caret’s file formats. 2.181 Flatmap of cortex 2.182 We downloaded the ABA data and selected only those voxels which belong to cerebral cortex. 2.183 -We divided the cortex into hemispheres. Using Caret[7], we created a mesh representation of the 2.184 +We divided the cortex into hemispheres. Using Caret[8], we created a mesh representation of the 2.185 surface of the selected voxels. For each gene, and for each node of the mesh, we calculated an 2.186 average of the gene expression of the voxels “underneath” that mesh node. We then flattened 2.187 the cortex, creating a two-dimensional mesh. We converted this grid into a MATLAB matrix. We 2.188 @@ -434,11 +433,11 @@ 2.189 Correlation Recall that the instances are surface pixels, and consider the problem of attempt- 2.190 ing to classify each instance as either a member of a particular anatomical area, or not. The target 2.191 area can be represented as a boolean mask over the surface pixels. 2.192 - 10SEV is a sparse format for spatial data. It is the format in which the ABA data is made available. 2.193 + We calculated the correlation between each gene and each cortical area. The top row of Figure 2.194 +1 shows the three genes most correlated with area SS. 2.195 + 9SEV is a sparse format for spatial data. It is the format in which the ABA data is made available. 2.196 9 2.197 2.198 - We calculated the correlation between each gene and each cortical area. The top row of Figure 2.199 -1 shows the three genes most correlated with area SS. 2.200 Conditional entropy 2.201 For each region, we created and ran a forward stepwise procedure which attempted to find 2.202 pairs of genes such that the conditional entropy of the target area’s boolean mask, conditioned 2.203 @@ -547,9 +546,9 @@ 2.204 all genes at once, we ran a support vector machine to 2.205 classify cortical surface pixels based on their gene ex- 2.206 pression profiles. We achieved classification accuracy of 2.207 - about 81%11. However, as noted above, a classifier that 2.208 + about 81%10. However, as noted above, a classifier that 2.209 ____________________________________ 2.210 - 115-fold cross-validation. 2.211 + 105-fold cross-validation. 2.212 11 2.213 2.214 looks at all the genes at once isn’t as practically useful 2.215 @@ -577,7 +576,7 @@ 2.216 Our plan: what remains to be done 2.217 Flatmap cortex and segment cortical layers 2.218 There are multiple ways to flatten 3-D data into 2-D. We will compare mappings from manifolds to 2.219 -planes which attempt to preserve size (such as the one used by Caret[7]) with mappings which 2.220 +planes which attempt to preserve size (such as the one used by Caret[8]) with mappings which 2.221 preserve angle (conformal maps). We will also develop a segmentation algorithm to automatically 2.222 identify the layer boundaries. 2.223 Develop algorithms that find genetic markers for anatomical regions 2.224 @@ -646,9 +645,9 @@ 2.225 An area may be difficult to identify because the boundaries are misdrawn in the atlas, or be- 2.226 cause the shape of the natural domain of gene expression corresponding to the area is different 2.227 from the shape of the area as recognized by anatomists. We will develop extensions to our pro- 2.228 -cedure which (a) detect when a difficult area could be fit if its boundary were redrawn slightly12, 2.229 +cedure which (a) detect when a difficult area could be fit if its boundary were redrawn slightly11, 2.230 ____________________________________ 2.231 - 12Not just any redrawing is acceptable, only those which appear to be justified as a natural spatial domain of gene ex- 2.232 + 11Not just any redrawing is acceptable, only those which appear to be justified as a natural spatial domain of gene ex- 2.233 pression by multiple sources of evidence. Interestingly, the need to detect “natural spatial domains of gene expression” 2.234 in a data-driven fashion means that the methods of Goal 2 might be useful in achieving Goal 1, as well – particularly 2.235 13 2.236 @@ -685,7 +684,7 @@ 2.237 spond to interesting spatial regions. 2.238 Clustering and segmentation on pixels We will explore clustering and image segmentation 2.239 algorithms in order to segment the pixels into regions. We will explore k-means, spectral cluster- 2.240 -ing, gene shaving[9], recursive division clustering, multivariate generalizations of edge detectors, 2.241 +ing, gene shaving[10], recursive division clustering, multivariate generalizations of edge detectors, 2.242 multivariate generalizations of watershed transformations, region growing, active contours, graph 2.243 partitioning methods, and recursive agglomerative clustering with various linkage functions. These 2.244 methods can be combined with dimensionality reduction. 2.245 @@ -705,7 +704,7 @@ 2.246 identify spatial regions. It remains to be seen whether removal of redundancy would help or hurt 2.247 the ultimate goal of identifying interesting spatial regions. 2.248 Co-clustering We will explore some algorithms which simultaneously incorporate clustering 2.249 -on instances and on features (in our case, pixels and genes), for example, IRM[11]. These are 2.250 +on instances and on features (in our case, pixels and genes), for example, IRM[12]. These are 2.251 called co-clustering or biclustering algorithms. 2.252 Compare different methods In order to tell which method is best for genomic anatomy, for 2.253 each experimental method we will compare the cortical map found by unsupervised learning to a 2.254 @@ -729,7 +728,7 @@ 2.255 dence. There are three ways we will validate our marker genes to guard against this. First, we 2.256 will confirm that putative combinations of marker genes express the same pattern in both hemi- 2.257 spheres. Second, we will manually validate our final results on other gene expression datasets 2.258 -such as EMAGE, GeneAtlas, and GENSAT[8]. Third, we may conduct ISH experiments jointly with 2.259 +such as EMAGE, GeneAtlas, and GENSAT[9]. Third, we may conduct ISH experiments jointly with 2.260 collaborators to get further data on genes of particular interest. 2.261 Using the methods developed in Goal 2, we will present one or more hierarchical cortical 2.262 maps. We will identify and explain how the statistical structure in the gene expression data led to 2.263 @@ -746,8 +745,9 @@ 2.264 2.265 References Cited 2.266 [1] Chris Adamson, Leigh Johnston, Terrie Inder, Sandra Rees, Iven Mareels, and Gary Egan. 2.267 - A Tracking Approach to Parcellation of the Cerebral Cortex, volume 3749/2005 of Lecture 2.268 - Notes in Computer Science, pages 294–301. Springer Berlin / Heidelberg, 2005. 2.269 + A tracking approach to parcellation of the cerebral cortex. In Medical Image Computing 2.270 + and Computer-Assisted Intervention MICCAI 2005, volume 3749/2005 of Lecture Notes in 2.271 + Computer Science, pages 294–301. Springer Berlin / Heidelberg, 2005. 2.272 [2] J. Annese, A. Pitiot, I. D. Dinov, and A. W. Toga. A myelo-architectonic method for the struc- 2.273 tural classification of cortical areas. NeuroImage, 21(1):15–26, 2004. 2.274 [3] Tanya Barrett, Dennis B. Troup, Stephen E. Wilhite, Pierre Ledoux, Dmitry Rudnev, Carlos 2.275 @@ -757,34 +757,36 @@ 2.276 [4] George W. Bell, Tatiana A. Yatskievych, and Parker B. Antin. GEISHA, a whole-mount in 2.277 situ hybridization gene expression screen in chicken embryos. Developmental Dynamics, 2.278 229(3):677–687, 2004. 2.279 - [5] James P Carson, Tao Ju, Hui-Chen Lu, Christina Thaller, Mei Xu, Sarah L Pallas, Michael C 2.280 + [5] Thomas Boggs. Spectral python. http://spectralpython.sourceforge.net/, July 2008. 2.281 + [6] James P Carson, Tao Ju, Hui-Chen Lu, Christina Thaller, Mei Xu, Sarah L Pallas, Michael C 2.282 Crair, Joe Warren, Wah Chiu, and Gregor Eichele. A digital atlas to characterize the mouse 2.283 brain transcriptome. PLoS Comput Biol, 1(4):e41, 2005. 2.284 - [6] Mark H. Chin, Alex B. Geng, Arshad H. Khan, Wei-Jun Qian, Vladislav A. Petyuk, Jyl Boline, 2.285 + [7] Mark H. Chin, Alex B. Geng, Arshad H. Khan, Wei-Jun Qian, Vladislav A. Petyuk, Jyl Boline, 2.286 Shawn Levy, Arthur W. Toga, Richard D. Smith, Richard M. Leahy, and Desmond J. Smith. 2.287 A genome-scale map of expression for a mouse brain section obtained using voxelation. 2.288 Physiol. Genomics, 30(3):313–321, August 2007. 2.289 - [7] D C Van Essen, H A Drury, J Dickson, J Harwell, D Hanlon, and C H Anderson. An integrated 2.290 + [8] D C Van Essen, H A Drury, J Dickson, J Harwell, D Hanlon, and C H Anderson. An integrated 2.291 software suite for surface-based analyses of cerebral cortex. Journal of the American Medical 2.292 Informatics Association: JAMIA, 8(5):443–59, 2001. PMID: 11522765. 2.293 - [8] Shiaoching Gong, Chen Zheng, Martin L. Doughty, Kasia Losos, Nicholas Didkovsky, Uta B. 2.294 + [9] Shiaoching Gong, Chen Zheng, Martin L. Doughty, Kasia Losos, Nicholas Didkovsky, Uta B. 2.295 Schambra, Norma J. Nowak, Alexandra Joyner, Gabrielle Leblanc, Mary E. Hatten, and 2.296 Nathaniel Heintz. A gene expression atlas of the central nervous system based on bacte- 2.297 rial artificial chromosomes. Nature, 425(6961):917–925, October 2003. 2.298 - [9] Trevor Hastie, Robert Tibshirani, Michael Eisen, Ash Alizadeh, Ronald Levy, Louis Staudt, 2.299 +[10] Trevor Hastie, Robert Tibshirani, Michael Eisen, Ash Alizadeh, Ronald Levy, Louis Staudt, 2.300 Wing Chan, David Botstein, and Patrick Brown. ’Gene shaving’ as a method for identifying dis- 2.301 tinct sets of genes with similar expression patterns. Genome Biology, 1(2):research0003.1– 2.302 research0003.21, 2000. 2.303 -[10] Jano Hemert and Richard Baldock. Matching Spatial Regions with Combinations of Interact- 2.304 - ing Gene Expression Patterns, volume 13 of Communications in Computer and Information 2.305 - Science, pages 347–361. Springer Berlin Heidelberg, 2008. 2.306 -[11] C Kemp, JB Tenenbaum, TL Griffiths, T Yamada, and N Ueda. Learning systems of concepts 2.307 +[11] Jano Hemert and Richard Baldock. Matching spatial regions with combinations of interact- 2.308 + ing gene expression patterns. In Bioinformatics Research and Development, volume 13 of 2.309 + Communications in Computer and Information Science, pages 347–361. Springer Berlin Hei- 2.310 + delberg, 2008. 2.311 + 16 2.312 + 2.313 +[12] C Kemp, JB Tenenbaum, TL Griffiths, T Yamada, and N Ueda. Learning systems of concepts 2.314 with an infinite relational model. In AAAI, 2006. 2.315 -[12] F. Kruggel, M. K. Brckner, Th. Arendt, C. J. Wiggins, and D. Y. von Cramon. Analyzing the 2.316 +[13] F. Kruggel, M. K. Brckner, Th. Arendt, C. J. Wiggins, and D. Y. von Cramon. Analyzing the 2.317 neocortical fine-structure. Medical Image Analysis, 7(3):251–264, September 2003. 2.318 - 16 2.319 - 2.320 -[13] Ed S. Lein, Michael J. Hawrylycz, Nancy Ao, Mikael Ayres, Amy Bensinger, Amy Bernard, 2.321 +[14] Ed S. Lein, Michael J. Hawrylycz, Nancy Ao, Mikael Ayres, Amy Bensinger, Amy Bernard, 2.322 Andrew F. Boe, Mark S. Boguski, Kevin S. Brockway, Emi J. Byrnes, Lin Chen, Li Chen, 2.323 Tsuey-Ming Chen, Mei Chi Chin, Jimmy Chong, Brian E. Crook, Aneta Czaplinska, Chinh N. 2.324 Dang, Suvro Datta, Nick R. Dee, Aimee L. Desaki, Tsega Desta, Ellen Diep, Tim A. Dolbeare, 2.325 @@ -806,49 +808,49 @@ 2.326 Yaylaoglu, Rob C. Young, Brian L. Youngstrom, Xu Feng Yuan, Bin Zhang, Theresa A. Zwing- 2.327 man, and Allan R. Jones. Genome-wide atlas of gene expression in the adult mouse brain. 2.328 Nature, 445(7124):168–176, 2007. 2.329 -[14] Susan Magdaleno, Patricia Jensen, Craig L. Brumwell, Anna Seal, Karen Lehman, Andrew 2.330 +[15] Susan Magdaleno, Patricia Jensen, Craig L. Brumwell, Anna Seal, Karen Lehman, Andrew 2.331 Asbury, Tony Cheung, Tommie Cornelius, Diana M. Batten, Christopher Eden, Shannon M. 2.332 Norland, Dennis S. Rice, Nilesh Dosooye, Sundeep Shakya, Perdeep Mehta, and Tom Cur- 2.333 ran. BGEM: an in situ hybridization database of gene expression in the embryonic and adult 2.334 mouse nervous system. PLoS Biology, 4(4):e86 EP –, April 2006. 2.335 -[15] Lydia Ng, Amy Bernard, Chris Lau, Caroline C Overly, Hong-Wei Dong, Chihchau Kuan, 2.336 +[16] Lydia Ng, Amy Bernard, Chris Lau, Caroline C Overly, Hong-Wei Dong, Chihchau Kuan, 2.337 Sayan Pathak, Susan M Sunkin, Chinh Dang, Jason W Bohland, Hemant Bokil, Partha P 2.338 Mitra, Luis Puelles, John Hohmann, David J Anderson, Ed S Lein, Allan R Jones, and Michael 2.339 Hawrylycz. An anatomic gene expression atlas of the adult mouse brain. Nat Neurosci, 2.340 12(3):356–362, March 2009. 2.341 -[16] George Paxinos and Keith B.J. Franklin. The Mouse Brain in Stereotaxic Coordinates. Aca- 2.342 +[17] George Paxinos and Keith B.J. Franklin. The Mouse Brain in Stereotaxic Coordinates. Aca- 2.343 demic Press, 2 edition, July 2001. 2.344 -[17] A. Schleicher, N. Palomero-Gallagher, P. Morosan, S. Eickhoff, T. Kowalski, K. Vos, 2.345 +[18] A. Schleicher, N. Palomero-Gallagher, P. Morosan, S. Eickhoff, T. Kowalski, K. Vos, 2.346 K. Amunts, and K. Zilles. Quantitative architectural analysis: a new approach to cortical 2.347 mapping. Anatomy and Embryology, 210(5):373–386, December 2005. 2.348 -[18] Oliver Schmitt, Lars Hmke, and Lutz Dmbgen. Detection of cortical transition regions utilizing 2.349 + 17 2.350 + 2.351 +[19] Oliver Schmitt, Lars Hmke, and Lutz Dmbgen. Detection of cortical transition regions utilizing 2.352 statistical analyses of excess masses. NeuroImage, 19(1):42–63, May 2003. 2.353 -[19] S.B. Serpico and L. Bruzzone. A new search algorithm for feature selection in hyperspec- 2.354 +[20] S.B. Serpico and L. Bruzzone. A new search algorithm for feature selection in hyperspec- 2.355 tral remote sensing images. Geoscience and Remote Sensing, IEEE Transactions on, 2.356 39(7):1360–1367, 2001. 2.357 - 17 2.358 - 2.359 -[20] Constance M. Smith, Jacqueline H. Finger, Terry F. Hayamizu, Ingeborg J. McCright, Janan T. 2.360 +[21] Constance M. Smith, Jacqueline H. Finger, Terry F. Hayamizu, Ingeborg J. McCright, Janan T. 2.361 Eppig, James A. Kadin, Joel E. Richardson, and Martin Ringwald. The mouse gene expres- 2.362 sion database (GXD): 2007 update. Nucl. Acids Res., 35(suppl_1):D618–623, 2007. 2.363 -[21] Larry Swanson. Brain Maps: Structure of the Rat Brain. Academic Press, 3 edition, November 2.364 +[22] Larry Swanson. Brain Maps: Structure of the Rat Brain. Academic Press, 3 edition, November 2.365 2003. 2.366 -[22] Carol L. Thompson, Sayan D. Pathak, Andreas Jeromin, Lydia L. Ng, Cameron R. MacPher- 2.367 +[23] Carol L. Thompson, Sayan D. Pathak, Andreas Jeromin, Lydia L. Ng, Cameron R. MacPher- 2.368 son, Marty T. Mortrud, Allison Cusick, Zackery L. Riley, Susan M. Sunkin, Amy Bernard, 2.369 Ralph B. Puchalski, Fred H. Gage, Allan R. Jones, Vladimir B. Bajic, Michael J. Hawrylycz, 2.370 and Ed S. Lein. Genomic anatomy of the hippocampus. Neuron, 60(6):1010–1021, Decem- 2.371 ber 2008. 2.372 -[23] Pavel Tomancak, Amy Beaton, Richard Weiszmann, Elaine Kwan, ShengQiang Shu, 2.373 +[24] Pavel Tomancak, Amy Beaton, Richard Weiszmann, Elaine Kwan, ShengQiang Shu, 2.374 Suzanna E Lewis, Stephen Richards, Michael Ashburner, Volker Hartenstein, Susan E Cel- 2.375 niker, and Gerald M Rubin. Systematic determination of patterns of gene expression during 2.376 drosophila embryogenesis. Genome Biology, 3(12):research008818814, 2002. PMC151190. 2.377 -[24] Shanmugasundaram Venkataraman, Peter Stevenson, Yiya Yang, Lorna Richardson, 2.378 +[25] Shanmugasundaram Venkataraman, Peter Stevenson, Yiya Yang, Lorna Richardson, 2.379 Nicholas Burton, Thomas P. Perry, Paul Smith, Richard A. Baldock, Duncan R. Davidson, 2.380 and Jeffrey H. Christiansen. EMAGE edinburgh mouse atlas of gene expression: 2008 up- 2.381 date. Nucl. Acids Res., 36(suppl_1):D860–865, 2008. 2.382 -[25] Axel Visel, Christina Thaller, and Gregor Eichele. GenePaint.org: an atlas of gene expression 2.383 +[26] Axel Visel, Christina Thaller, and Gregor Eichele. GenePaint.org: an atlas of gene expression 2.384 patterns in the mouse embryo. Nucl. Acids Res., 32(suppl_1):D552–556, 2004. 2.385 -[26] Robert H Waterston, Kerstin Lindblad-Toh, Ewan Birney, Jane Rogers, Josep F Abril, Pankaj 2.386 +[27] Robert H Waterston, Kerstin Lindblad-Toh, Ewan Birney, Jane Rogers, Josep F Abril, Pankaj 2.387 Agarwal, Richa Agarwala, Rachel Ainscough, Marina Alexandersson, Peter An, Stylianos E 2.388 Antonarakis, John Attwood, Robert Baertsch, Jonathon Bailey, Karen Barlow, Stephan Beck, 2.389 Eric Berry, Bruce Birren, Toby Bloom, Peer Bork, Marc Botcherby, Nicolas Bray, Michael R 2.390 @@ -863,6 +865,8 @@ 2.391 cinda A Fulton, Robert S Fulton, Terrence S Furey, Diane Gage, Richard A Gibbs, Gustavo 2.392 Glusman, Sante Gnerre, Nick Goldman, Leo Goodstadt, Darren Grafham, Tina A Graves, 2.393 Eric D Green, Simon Gregory, Roderic Guig, Mark Guyer, Ross C Hardison, David Haussler, 2.394 + 18 2.395 + 2.396 Yoshihide Hayashizaki, LaDeana W Hillier, Angela Hinrichs, Wratko Hlavina, Timothy Holzer, 2.397 Fan Hsu, Axin Hua, Tim Hubbard, Adrienne Hunt, Ian Jackson, David B Jaffe, L Steven John- 2.398 son, Matthew Jones, Thomas A Jones, Ann Joy, Michael Kamal, Elinor K Karlsson, Donna 2.399 @@ -870,8 +874,6 @@ 2.400 drew Kirby, Diana L Kolbe, Ian Korf, Raju S Kucherlapati, Edward J Kulbokas, David Kulp, 2.401 Tom Landers, J P Leger, Steven Leonard, Ivica Letunic, Rosie Levine, Jia Li, Ming Li, Chris- 2.402 tine Lloyd, Susan Lucas, Bin Ma, Donna R Maglott, Elaine R Mardis, Lucy Matthews, Evan 2.403 - 18 2.404 - 2.405 Mauceli, John H Mayer, Megan McCarthy, W Richard McCombie, Stuart McLaren, Kirsten 2.406 McLay, John D McPherson, Jim Meldrim, Beverley Meredith, Jill P Mesirov, Webb Miller, Tra- 2.407 cie L Miner, Emmanuel Mongin, Kate T Montgomery, Michael Morgan, Richard Mott, James C
3.1 Binary file grant.odt has changed
4.1 Binary file grant.pdf has changed
5.1 --- a/grant.txt Tue Jul 07 14:57:48 2009 -0700 5.2 +++ b/grant.txt Tue Jul 07 15:47:43 2009 -0700 5.3 @@ -266,7 +266,7 @@ 5.4 5.5 %%As noted above, there has been much work in the machine learning literature on both supervised and unsupervised learning and there are many available algorithms for each. However, the algorithms require the scientist to provide a framework for representing the problem domain, and the way that this framework is set up has a large impact on performance. Creating a good framework can require creatively reconceptualizing the problem domain, and is not merely a mechanical "fine-tuning" of numerical parameters. For example, we believe that domain-specific scoring measures (such as gradient similarity, which is discussed in Preliminary Results) may be necessary in order to achieve the best results in this application. So, the project involves more than the blind application of existing machine learning analysis programs to a new dataset. 5.6 5.7 -As noted above, the GIS community has developed tools for supervised classification and unsupervised clustering in the context of the analysis of hyperspectral imaging data. One tool is Spectral Python\footnote{http://spectralpython.sourceforge.net/}. Spectral Python implements various supervised and unsupervised classification methods, as well as utility functions for loading, viewing, and saving spatial data. Although Spectral Python has feature extraction methods (such as principal components analysis) which create a small set of new features computed based on the original features, it does not have feature selection methods, that is, methods to select a small subset out of the original features (although feature selection in hyperspectral imaging has been investigated by others\cite{serpico_new_2001}. %%We intend to extend Spectral Python's reportoire of supervised and unsupervised machine learning methods, as well as to add feature selection methods. 5.8 +As noted above, the GIS community has developed tools for supervised classification and unsupervised clustering in the context of the analysis of hyperspectral imaging data. One tool is Spectral Python\cite{boggs_spectral_2008}. Spectral Python implements various supervised and unsupervised classification methods, as well as utility functions for loading, viewing, and saving spatial data. Although Spectral Python has feature extraction methods (such as principal components analysis) which create a small set of new features computed based on the original features, it does not have feature selection methods, that is, methods to select a small subset out of the original features (although feature selection in hyperspectral imaging has been investigated by others\cite{serpico_new_2001}. %%We intend to extend Spectral Python's reportoire of supervised and unsupervised machine learning methods, as well as to add feature selection methods. 5.9 5.10 There is a substantial body of work on the analysis of gene expression data. Most of this concerns gene expression data which are not fundamentally spatial\footnote{By "__fundamentally__ spatial" we mean that there is information from a large number of spatial locations indexed by spatial coordinates; not just data which have only a few different locations or which is indexed by anatomical label.}. Here we review only that work which concerns the automated analysis of spatial gene expression data with respect to anatomy. 5.11 5.12 @@ -368,7 +368,7 @@ 5.13 \caption{Upper left: $wwc1$. Upper right: $mtif2$. Lower left: wwc1 + mtif2 (each pixel's value on the lower left is the sum of the corresponding pixels in the upper row).} 5.14 \label{MOcombo}\end{wrapfigure} 5.15 5.16 -We are enthusiastic about the sharing of methods and data, and at the conclusion of the project, we will make all of our data and computer source code publically available, either in supplemental attachments to publications, or on a website. The source code will be released under the GNU Public License. We intend to include a software program which, when run, will take as input the Allen Brain Atlas raw data, and produce as output all numbers and charts found in publications resulting from the project. Source code to be released will include extensions to Caret\cite{van_essen_integrated_2001}, an existing open-source scientific imaging program, and to Spectral Python. Data to be released will include the 2-D "flat map" dataset. This dataset will be submitted to a machine learning dataset repository. 5.17 +We are enthusiastic about the sharing of methods and data, and at the conclusion of the project, we will make all of our data and computer source code publicly available, either in supplemental attachments to publications, or on a website. The source code will be released under the GNU Public License. We intend to include a software program which, when run, will take as input the Allen Brain Atlas raw data, and produce as output all numbers and charts found in publications resulting from the project. Source code to be released will include extensions to Caret\cite{van_essen_integrated_2001}, an existing open-source scientific imaging program, and to Spectral Python. Data to be released will include the 2-D "flat map" dataset. This dataset will be submitted to a machine learning dataset repository. 5.18 5.19 %% Our goal is that replicating our results, or applying the methods we develop to other targets, will be quick and easy for other investigators. 5.20
6.1 Binary file grantBody.pdf has changed
7.1 Binary file postdoc.odt has changed
8.1 Binary file refs.pdf has changed