@inbook{5fff2300c3914af8a29386a9446f73de,
title = "Using WWW-distribution of words in detecting peculiar web pages",
abstract = "In this paper, we propose TFIGF, a method which detects peculiar web pages using distribution of words in WWW given a set of keywords. Our TFIGF detects a set of index words which represent a WWW page by estimating their importance in the WWW page and their rareness in WWW. Experiments using both English and Japanese WWW pages clearly show superiority of our approach over a traditional method which employs a limited number of WWW pages in the estimation.",
author = "Masayuki Hirose and Einoshin Suzuki",
year = "2004",
doi = "10.1007/978-3-540-30214-8_31",
language = "English",
isbn = "9783540233572",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "355--362",
editor = "Einoshin Suzuki and Setsuo Arikawa",
booktitle = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
address = "Germany",
}