@article{Elmagarmid07,
 author = {Elmagarmid, Ahmed K. and Ipeirotis, Panagiotis G. and Verykios, Vassilios S.},
 title = {Duplicate Record Detection: A Survey},
 journal = {IEEE Transactions on Knowledge and Data Engineering},
 volume = {19},
 number = {1},
 year = {2007},
 issn = {1041-4347},
 pages = {1--16},
 doi = {http://dx.doi.org/10.1109/TKDE.2007.9},
 publisher = {IEEE Educational Activities Department},
 address = {Piscataway, NJ, USA},
 }

@inproceedings{Carvalho08b,
 author = {de Carvalho, Moisés G. and Laender, Alberto H. F. and Gon\c{c}alves, 
    Marcos André and Porto, Thiago C.},
 title = {The Impact of Parameter Setup on a Genetic Programming Approach to Record 
 	Deduplication},
 booktitle = {Proceedings of the 23rd Brazilian Symposium on Databases},
 year = {2008},
 pages = {91--105},
 address = {Campinas, SP, Brazil},
 }

@inproceedings{Carvalho06,
 author = {de Carvalho, Moisés G. and Gon\c{c}alves, Marcos André and Laender, Alberto H. F. 
 	and da Silva, Altigran S.},
 title = {Learning to Deduplicate},
 booktitle = {Proceedings of the Sixth ACM/IEEE-CS Joint Conference on Digital Libraries},
 year = {2006},
 isbn = {1-59593-354-9},
 pages = {41--50},
 address = {Chapel Hill, NC, USA},
 doi = {http://doi.acm.org/10.1145/1141753.1141760},
 }

@article{BilenkoEtAl03,
 author = {Bilenko, Mikhail and Mooney, Raymond and Cohen, William and Ravikumar, 
 	Pradeep and Fienberg, Stephen},
 title = {Adaptive Name Matching in Information Integration},
 journal = {IEEE Intelligent Systems},
 volume = {18},
 number = {5},
 year = {2003},
 issn = {1541-1672},
 pages = {16--23},
 doi = {http://dx.doi.org/10.1109/MIS.2003.1234765},
 publisher = {IEEE Educational Activities Department},
 address = {Piscataway, NJ, USA},
 }

@inproceedings{BilenkoMooney03,
 author = {Bilenko, Mikhail and Mooney, Raymond J.},
 title = {Adaptive Duplicate Detection Using Learnable String Similarity Measures},
 booktitle = {Proceedings of the Ninth ACM SIGKDD International Conference on 
 	Knowledge Discovery and Data Mining},
 year = {2003},
 isbn = {1-58113-737-0},
 pages = {39--48},
 address = {Washington, DC, USA},
 doi = {http://doi.acm.org/10.1145/956750.956759},
 }

@book{Banzhaf98,
 author = {Banzhaf, Wolfgang and Francone, Frank D. and Keller, Robert E. and Nordin, Peter},
 title = {Genetic Programming: An Introduction on the Automatic Evolution of Computer 
 	Programs and Its Applications},
 year = {1998},
 isbn = {1-55860-510-X},
 publisher = {Morgan Kaufmann Publishers Inc.},
 address = {San Francisco, CA, USA},
 }
 
@techreport{BellDravis06,
 author      = {Bell, Royce and Dravis, Frank},
 title       = {Is Your Data Dirty?: (And Does That Matter?)},
 year        = {2006},
 institution = {Accenture Whiter Paper},
 note        = {Available at http://www.accenture.com},
}
 
@inproceedings{Bhattacharya04,
 author      = {Bhattacharya, Indrajit and Getoor, Lise},
 title       = {Iterative Record Linkage for Cleaning and Integration},
 booktitle   = {Proceedings of the Ninth ACM SIGMOD Workshop on Research Issues in Data Mining 
 	and Knowledge Discovery},
 year        = {2004},
 pages       = {11--18},
 address     = {Paris, France},
 }
 
@inproceedings{Chaudhuri03,
 author = {Chaudhuri, Surajit and Ganjam, Kris and Ganti, Venkatesh and Motwani, Rajeev},
 title = {Robust and Efficient Fuzzy Match for Online Data Cleaning},
 booktitle = {Proceedings of the 2003 ACM SIGMOD International Conference on Management of Data},
 year = {2003},
 isbn = {1-58113-634-X},
 pages = {313--324},
 address = {San Diego, CA, USA},
 doi = {http://doi.acm.org/10.1145/872757.872796},
 }
 
@inproceedings{Carvalho08a,
 author = {de Carvalho, Moisés G. and Laender, Alberto H. F. and Gon\c{c}alves, Marcos André and 
 	da Silva, Altigran S.},
 title = {Replica Identification Using Genetic Programming},
 booktitle = {Proceedings of the 2008 ACM Symposium on Applied Computing},
 year = {2008},
 isbn = {978-1-59593-753-7},
 pages = {1801--1806},
 address = {Fortaleza, CE, Brazil},
 doi = {http://doi.acm.org/10.1145/1363686.1364118},
 }

@article{Fellegi69,
 author = {Fellegi, Ivan P. and Sunter, Alan B.},
 citeulike-article-id = {590229},
 citeulike-linkout-0 = {http://dx.doi.org/10.2307/2286061},
 citeulike-linkout-1 = {http://www.jstor.org/stable/2286061},
 doi = {10.2307/2286061},
 journal = {Journal of the American Statistical Association},
 keywords = {record\_linkage},
 number = {328},
 pages = {1183--1210},
 posted-at = {2006-04-18 08:45:58},
 priority = {2},
 title = {A Theory for Record Linkage},
 url = {http://dx.doi.org/10.2307/2286061},
 volume = {64},
 year = {1969},
 }

@inproceedings{Koudas06,
 author = {Koudas, Nick and Sarawagi, Sunita and Srivastava, Divesh},
 title = {Record Linkage: Similarity Measures and Algorithms},
 booktitle = {Proceedings of the 2006 ACM SIGMOD International Conference on Management of Data},
 year = {2006},
 isbn = {1-59593-434-0},
 pages = {802--803},
 address = {Chicago, IL, USA},
 doi = {http://doi.acm.org/10.1145/1142473.1142599},
 }
 
@book{Koza92,
 address = {Cambridge, MA},
 author = {Koza, J. R.},
 citeulike-article-id = {1505719},
 keywords = {bibtex-import},
 posted-at = {2007-07-26 20:53:07},
 priority = {0},
 publisher = {The MIT Press},
 title = {Genetic Programming: On the Programming of Computers By Means of Natural Selection},
 year = {1992},
 }
 
@article{Verykios03,
 author = {Verykios, V. S. and Moustakides, G. V. and Elfeky, M. G.},
 title = {A Bayesian Decision Model for Cost Optimal Record Matching},
 journal = {The VLDB Journal},
 volume = {12},
 number = {1},
 year = {2003},
 issn = {1066-8888},
 pages = {28--40},
 doi = {http://dx.doi.org/10.1007/s00778-002-0072-y},
 publisher = {Springer-Verlag New York, Inc.},
 address = {Secaucus, NJ, USA},
 }
 
@techreport{Wheatley04,
 author = {Wheatley, M},
 title = {Operation Clean Data},
 year = {2004},
 month = {August},
 institution = {CIO Asia Magazine},
 note = {Available at http://www.cio-asia.com},
 pubcat = {techreport},
 }

@book{Jain91,
 author = {Jain, R. K.},
 citeulike-article-id = {5190414},
 day = {30},
 howpublished = {Hardcover},
 isbn = {0471503363},
 keywords = {allocation, resource},
 month = {April},
 posted-at = {2009-07-17 09:36:14},
 priority = {2},
 publisher = {Wiley},
 address = {New York, NY, USA},
 title = {The Art of Computer Systems Performance Analysis: Techniques for Experimental 
 	Design, Measurement, Simulation, and Modeling},
 url = {http://www.amazon.com/exec/obidos/redirect?tag=citeulike07-20&path=ASIN/0471503363},
 year = {1991},
 }

@article{GuBaxter06,
 author = {Gu, Lifang and Baxter, Rohan},
 citeulike-article-id = {2200851},
 doi = {10.1007/11677437_12},
 journal = {Selected Papers from Australasian Data Mining Conference},
 volume = {3755},
 pages = {146--160},
 posted-at = {2008-01-06 20:31:06},
 priority = {2},
 title = {Decision Models for Record Linkage},
 url = {http://dx.doi.org/10.1007/11677437_12},
 year = {2006},
 }

@inproceedings{Christen05,
  author    = {Peter Christen},
  title     = {Probabilistic Data Generation for Deduplication and Data Linkage},
  booktitle = {Proceedings of Intelligent Data Engineering and Automated Learning},
  year      = {2005},
  pages     = {109-116},
  ee        = {http://dx.doi.org/10.1007/11508069_15},
  crossref  = {DBLP:conf/ideal/2005},
  bibsource = {DBLP, http://dblp.uni-trier.de}
 }

@proceedings{DBLP:conf/ideal/2005,
  editor    = {Marcus Gallagher and
               James M. Hogan and
               Fr{\'e}d{\'e}ric Maire},
  title     = {Intelligent Data Engineering and Automated Learning - IDEAL
               2005, 6th International Conference, Brisbane, Australia,
               July 6-8, 2005, Proceedings},
  booktitle = {IDEAL},
  publisher = {Springer},
  series    = {Lecture Notes in Computer Science},
  volume    = {3578},
  year      = {2005},
  isbn      = {3-540-26972-X},
  bibsource = {DBLP, http://dblp.uni-trier.de}
 }

@book{BaezaBerthier99,
 author = {Baeza-Yates, Ricardo A. and Ribeiro-Neto, Berthier},
 title = {Modern Information Retrieval},
 year = {1999},
 isbn = {020139829X},
 publisher = {Addison-Wesley Longman Publishing Co., Inc.},
 address = {Boston, MA, USA},
}
 
@article{Newcombe59,
 title = {Automatic Linkage of Vital Records},
 author = {Howard B. Newcombe and James M. Kennedy and S.J. Axford and A.P. James},
 journal = {Science},
 number = 3381,
 pages = {954--959},
 volume = 130,
 year = 1959,       
 month = {October},
 biburl = {http://www.bibsonomy.org/bibtex/2613c244f0c03d065f214a0fb6a886f48/pirot},
 keywords = {imported},
}

@inproceedings{Tejada02,
 author = {Tejada, Sheila and Knoblock, Craig A. and Minton, Steven},
 title = {Learning Domain-Independent String Transformation Weights for High Accuracy Object Identification},
 booktitle = {Proceedings of the Eighth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
 year = {2002},
 isbn = {1-58113-567-X},
 pages = {350--359},
 address = {Edmonton, AB, Canada},
 doi = {http://doi.acm.org/10.1145/775047.775099},
}

@article{Tejada01,
 author = {Tejada, Sheila and Knoblock, Craig A. and Minton, Steven},
 title = {Learning Object Identification Rules for Information Integration},
 journal = {Information Systems},
 volume = {26},
 number = {8},
 year = {2001},
 issn = {0306-4379},
 pages = {607--633},
 doi = {http://dx.doi.org/10.1016/S0306-4379(01)00042-4},
 publisher = {Elsevier Science Ltd.},
 address = {Oxford, UK, UK},
}

@article{Cohen00,
 author = {Cohen, William W.},
 title = {Data Integration Using Similarity Joins and a Word-Based Information Representation Language},
 journal = {ACM Transactions on Information Systems},
 volume = {18},
 number = {3},
 year = {2000},
 issn = {1046-8188},
 pages = {288--321},
 doi = {http://doi.acm.org/10.1145/352595.352598},
 publisher = {ACM},
 address = {New York, NY, USA},
}

@inproceedings{Christen08,
 author = {Christen, Peter},
 title = {Febrl: a Freely Available Record Linkage System with a Graphical User Interface},
 booktitle = {Proceedings of the Second Australasian Workshop on Health Data and Knowledge Management},
 year = {2008},
 isbn = {978-1-920682-61-3},
 pages = {17--25},
 address = {Wollongong, NSW, Australia},
}

@inproceedings{Goncalves09,
  author    = {Gabriel Silva Gon\c{c}alves and
               Moisés G. de Carvalho and
               Alberto H. F. Laender and
               Marcos André Gon\c{c}alves},
  title     = {Sele\c{c}ão Automática de Exemplos de Treino
               para um Método de Deduplica\c{c}ão de Registros
               baseado em Programa\c{c}ão Genética},
  booktitle = {XXIV Simpósio Brasileiro de Banco de Dados},
  year      = {2009},
  pages     = {76-90},
  address   = {Fortaleza, CE, Brasil},
  ee        = {http://www.lbd.dcc.ufmg.br:8080/colecoes/sbbd/2009/006.pdf},
}

@book{Joachims02,
 author = {Joachims, Thorsten},
 title = {Learning to Classify Text Using Support Vector Machines: Methods, Theory and Algorithms},
 year = {2002},
 isbn = {079237679X},
 publisher = {Kluwer Academic Publishers},
 address = {Norwell, MA, USA},
}

@article{Geer08,
 author = {Geer, David},
 title = {Reducing the Storage Burden via Data Deduplication},
 journal = {IEEE Computer},
 volume = {41},
 number = {12},
 year = {2008},
 issn = {0018-9162},
 pages = {15--17},
 doi = {http://dx.doi.org/10.1109/MC.2008.502},
 publisher = {IEEE Computer Society Press},
 address = {Los Alamitos, CA, USA},
}
 
@TECHREPORT{Winkler99,
    author = {William E. Winkler},
    title = {The State of Record Linkage and Current Research Problems},
    institution = {Statistical Research Division, U.S. Census Bureau},
    year = {1999}
} 

@book{Salton89,
 author = {Salton, Gerard},
 title = {Automatic Text Processing: The Transformation, Analysis, and Retrieval of Information by Computer},
 year = {1989},
 isbn = {0-201-12227-8},
 publisher = {Addison-Wesley Longman Publishing Co., Inc.},
 address = {Boston, MA, USA},
}

@inproceedings{CarvalhoS03,
 author = {Carvalho, Joyce C. P. and da Silva, Altigran S.},
 title = {Finding Similar Identities among Objects from Multiple Web Sources},
 booktitle = {Proceedings of the Fifth ACM International Workshop on Web Information and Data Management},
 year = {2003},
 isbn = {1-58113-725-7},
 pages = {90--93},
 location = {New Orleans, Louisiana, USA},
 doi = {http://doi.acm.org/10.1145/956699.956719},
 publisher = {ACM},
 address = {New York, NY, USA},
 }