Stop words

Publications (208)

more >>

  • 1
    Experiments in 8 European Languages with Hummingbird SearchServer™ at CLEF2002
    specifies a stop file containing typically a couple hundred stop words to not index; the stop file also contains instructions...
  • 2
    First experiments with CLEF
    following add-ons to process the queries: · Stop words We used the stop words supplied by the CLEF project · Stemming....
  • 3
    Dictionary-based Amharic - English Information Retrieval
    based on their IDF value, the other uses a list of English stop words to perform the same task. The resulting translated (English)...
  • 4
    DCU at WikipediaMM 2009: Document Expansion from Wikipedia Abstracts
    For the DBpedia, we use the text as index and remove the stop words computed from itself. An example document from DBpedia...
  • 5
    On the Evaluation of Snippet Selection for Information Retrieval
    system of last year. This bug affects the filtering of stop words, which is a part of the similarity-based centrality ranking...
  • 6
    DCU @ CLEF-IP 2009: Exploring Standard IR Techniques on Patent Retrieval
    words1, 2. Digits, and 3. Field-specific stop words To get the fields stop words, the field frequency for terms is calculated...
  • 7
    Amharic-English Information Retrieval with Pseudo Relevance Feedback
    Term bigrams were then looked up in the dictionaries and stop words were removed from the remaining Amharic query words based...
  • 8
    Charles University at CLEF 2007 Ad-Hoc Track
    , <heading>, <text> Word forms: original + lemmas <em>Stop words</em>: 40 most frequent original forms + 40 most frequent lemmas...</div> </div> </li> <li> <div class="pos">9</div> <div class="list_item"> <a href="/clef/document/dei_unipd_it_documents_71612_86356_CLEF2003wn-adhoc-BrandEt2003/">Océ at CLEF 2003</a> <div class="paper_details">2003 - <a class="minor_link" href="/clef/person/roel_brand/">Roel Brand</a>,<a class="minor_link" href="/clef/person/marvin_brunner/">Marvin Brunner</a>,<a class="minor_link" href="/clef/person/samuel_driessen/">Samuel Driessen</a>,<a class="minor_link" href="/clef/person/pascha_iljin/">Pascha Iljin</a>,<a class="minor_link" href="/clef/person/jakob_klok/">Jakob Klok</a></div> <div class="paper_highlight"> were removed from indexes. We have decided to leave the <em>stop words</em> in the indexes because of the following reasons. It is...</div> </div> </li> <li> <div class="pos">10</div> <div class="list_item"> <a href="/clef/document/dei_unipd_it_documents_71612_85150_CLEF2009wn-ImageCLEF-DemerdashEt2009/">CLaC at ImageCLEF 2009</a> <div class="paper_details">2009 - <a class="minor_link" href="/clef/person/osama_el_demerdash/">Osama El Demerdash</a>,<a class="minor_link" href="/clef/person/sabine_bergler/">Sabine Bergler</a>,<a class="minor_link" href="/clef/person/leila_kosseim/">Leila Kosseim</a></div> <div class="paper_highlight"> Retrieval The text is tokenized and preprocessed by removing <em>stop words</em> (grammatical words which do not contribute to the meaning)...</div> </div> </li> </ul> </div> </div> <div style="clear: both"></div> </div> </div> <div id="page_footer"> <div> <a href="http://www.insight-centre.org/"><img width="81" height="40" id="insight_logo" alt="INSIGHT" src="/clef/static/img/insight.png" /></a> <a href="http://www.nuigalway.ie/"><img width="176" height="50" id="nui_logo" alt="NUI Galway" src="/clef/static/img/nuigalway.png" /></a> <a href="http://www.sfi.ie/"><img width="175" height="75" id="sfi_logo" alt="SFI" src="/clef/static/img/sfi.png" /></a> <p>This resource has been funded by Science Foundation Ireland under Grant No. SFI/08/CE/I1380 (Lion-2) and by Grant No. SFI/12/RC/2289 (INSIGHT)</p> <p>INSIGHT - National University of Ireland, Galway</p> </div> </div> </div> <script type="text/javascript"> var NORMALISE_COUNTS=true var LABEL_OFFSET = NORMALISE_COUNTS ? -47 : -30 var LEFT_MARGIN = NORMALISE_COUNTS ? 55 : 40; var margin = {top:20, bottom:20, left:LEFT_MARGIN, right:20}, width = 830 var aspectratio = width / 16, height = aspectratio * 9; // 16:9 width = width - margin.left - margin.right, height = (height / 2) - margin.top - margin.bottom; var svg = d3.select("#trend_chart").append("svg") .attr("width", width + margin.left + margin.right) .attr("height", height + margin.top + margin.bottom) .append("g") .attr("transform", "translate("+margin.left+","+margin.top+")"); var topics_by_year = { "topics_by_year": [ { "num_papers": 2, "year": 2000 }, { "num_papers": 9, "year": 2001 }, { "num_papers": 4, "year": 2002 }, { "num_papers": 8, "year": 2003 }, { "num_papers": 13, "year": 2004 }, { "num_papers": 17, "year": 2005 }, { "num_papers": 19, "year": 2006 }, { "num_papers": 21, "year": 2007 }, { "num_papers": 15, "year": 2008 }, { "num_papers": 21, "year": 2009 }, { "num_papers": 15, "year": 2010 }, { "num_papers": 16, "year": 2011 } ] }; var data = topics_by_year["topics_by_year"]; var all_paper_stats = { "2000": 19, "2001": 38, "2002": 41, "2003": 59, "2004": 79, "2005": 123, "2006": 120, "2007": 117, "2008": 129, "2009": 145, "2010": 123, "2011": 107 }; var topic_year = 0 if (NORMALISE_COUNTS) { for(var i=0; i<data.length; i++) { data[i].num_papers /= all_paper_stats[data[i].year]; } } years = data.map(function(d) { return d.year; }); for (year in all_paper_stats) { year = parseInt(year); if (years.indexOf(year) == -1) { data.push({year: year, num_papers: 0}) } } //Sort by year again data = data.sort( function(a, b) { return d3.ascending(a.year, b.year); } ); var x = d3.scale.linear().range([0, width]) var x2 = d3.scale.ordinal().rangeBands([0, width], .15); var y = d3.scale.linear().range([height, 0]); min_year = d3.min(data, function(d) {return d.year}); max_year = d3.max(data, function(d) {return d.year}); max_value = d3.max(data, function(d) {return d.num_papers}); x.domain([min_year, max_year]); y.domain([0, d3.max(data, function(d) { return d.num_papers; })]); x2.domain(data.map(function(d) { return d.year; })); //Ugly hack to make the line chart go to the start and end of the graph data_line = [].concat([{num_papers: data[0].num_papers, year: 'START'}], data, [{num_papers: data[data.length-1].num_papers, year: 'END'}]) var area = d3.svg.area() .interpolate("bundle").tension(0.95) .x(function(d) { if (d.year=='START') return 0; else if (d.year=='END') return width; else return x2(d.year) + x2.rangeBand()/2; }) .y0(height) .y1(function(d) { return y(d.num_papers) } ); var line = d3.svg.line() .interpolate("bundle").tension(0.95) .x(function(d) { if (d.year=='START') return 0; else if (d.year=='END') return width; else return x2(d.year) + x2.rangeBand()/2; }) .y(function(d) { return y(d.num_papers); }); function RGB2HTML(red, green, blue) { var decColor =0x1000000+ blue + 0x100 * green + 0x10000 *red ; return '#'+decColor.toString(16).substr(1); } svg.append("path") .datum(data_line) .attr("class", "area") .attr("d", area); svg.append("path") .datum(data_line) .attr("class", "line") .attr("d", line); //X Axis var xAxis = d3.svg.axis() .scale(x2) .orient("bottom"); //If there are many years, use the short year form e.g. 2006 -> '06 if (data.length > 25) { xAxis.tickFormat(function(n) { return "'"+(''+n).substring(2); }); } svg.append('g') .attr('class', 'x axis') .attr('transform', 'translate(0,' + height + ')') .call(xAxis); //Y Axis var format = ".1%" var yAxis = d3.svg.axis() .scale(y) .orient('left') .ticks(8); if (NORMALISE_COUNTS) { yAxis.tickFormat(d3.format(format)); } else { yAxis.tickFormat(d3.format("d")); } svg.append("g") .attr("class", "y axis") .call(yAxis) .append("text") .attr("transform", "rotate(-90)") .attr("y", LABEL_OFFSET) .attr("x", "-2.71em") .style("text-anchor", "end") .text($('#tmplDocFreq').text()); // defaultColor = "steelblue"; defaultColor = "#4682B4"; clickedColor = "#C6DAF2"; svg.selectAll('.bar').data(data).enter().append('rect') .attr('class', 'bar') .attr('x', function(d) { return x2(d.year); }) .attr('width', x2.rangeBand()) .attr('y', function(d) { return y(d.num_papers); }) .attr('height', function(d) { return height - y(d.num_papers); }) .style('fill', function(d) { return d.year == topic_year ? clickedColor : defaultColor;} ) .on("click", function(d){ console.log(topic_year) var clickedYear = d['year'] console.log(clickedYear) console.log(window.location) console.log(this) if (this.style.fill == "rgb(70, 130, 180)") { //change the view per year redirectUrl = topic_year == 0 ? window.location.href + clickedYear : window.location.href.substring(0, window.location.href.length - 5) + clickedYear; console.log(topic_year) console.log(clickedYear) console.log(redirectUrl) d3.selectAll("rect").style("fill", defaultColor); d3.select(this).style( "fill", clickedColor); } else { //change the view back to overall topic redirectUrl = topic_year == clickedYear ? window.location.href.substring(0, window.location.href.length - 5) : window.location.href.substring(0, window.location.href.length - 5) + clickedYear; d3.select(this).style( "fill", defaultColor); } window.location.replace(redirectUrl); }); </script> </body> </html>