using System; using System.IO; using System.Collections; using System.Xml.XPath; namespace blurxml { ////////////////////////////////////////////////////////////////////////////////////////////////////////// // HEADLINE CLASS // represents a newsfeed headline (story title and url) ////////////////////////////////////////////////////////////////////////////////////////////////////////// public class Headline { private string title; private string link; public Headline( ) { title = null; link = null; } public Headline( string title, string link ) { this.title = title; this.link = link; } public string Link { get{ return link; } set{ link = value; } } public string Title { get{ return title; } set{ title = value; } } }//Headline class ////////////////////////////////////////////////////////////////////////////////////////////////////////// // NEWSFEED STRUCT // represents a newsfeed (url, and processing specifics) ////////////////////////////////////////////////////////////////////////////////////////////////////////// public struct Newsfeed { private string siteurl; private string link; private string title; private string xpath; public Newsfeed( string url, string title, string link, string xpath ) { this.siteurl = url; this.title = title; this.link = link; this.xpath = xpath; } public string Url { get{ return siteurl; } set{ siteurl = value; } } public string Link { get{ return link; } set{ link = value; } } public string Title { get{ return title; } set{ title = value; } } public string XPath { get{ return xpath; } set{ xpath = value; } } }//struct newsfeed ////////////////////////////////////////////////////////////////////////////////////////////////////////// // HEADLINEFILTER CLASS // Operates on Headline arrays and maintains an internal list of the already processed headlines ////////////////////////////////////////////////////////////////////////////////////////////////////////// public class HeadlineFilter { Headline[] current; public HeadlineFilter( Headline[] seed ) { current = seed; } public Headline[] filter_for_new( Headline[] newest ) { if( current == null ) { current = newest; Console.WriteLine("current was null"); return current; } Headline[] tmp = new Headline[ newest.Length ]; //worst-case: all are new bool contained = false; int fresh = 0; for( int i = 0; i < newest.Length; i++ ) { for( int j = 0; j < current.Length; j++ ) { if( String.Compare(newest[i].Title , current[j].Title) == 0 ) { contained = true; } }//for j if( contained == false ) { tmp[fresh] = newest[i]; fresh++; } contained = false; }//for i Headline[] filtered = new Headline[ fresh ]; Array.Copy( tmp , filtered , fresh ); this.current = (Headline[] )newest.Clone(); //now *this* is the last array of Headlines we looked at return filtered; } } ////////////////////////////////////////////////////////////////////////////////////////////////////////// // RSSREADER CLASS // Reads in and operates on the RSS XML feed ////////////////////////////////////////////////////////////////////////////////////////////////////////// public class RSSReader { public Newsfeed slashdot; public Newsfeed wirednews; public Newsfeed anandtech; public Newsfeed newsforge; public RSSReader( ) { //testing... //slashdot = new Newsfeed( "http://spinlock/slashdot.xml", // "title","url","/backslash/story" ); slashdot = new Newsfeed( "http://slashdot.org/slashdot.xml", "title","url","/backslash/story" ); wirednews = new Newsfeed("http://www.wired.com/news_drop/netcenter/netcenter.rdf", "title","link","/rss/channel/item"); anandtech = new Newsfeed( "http://www.newsisfree.com/HPE/xml/feeds/61/161.xml", "title","link","/rss/channel/item"); newsforge = new Newsfeed( "http://newsforge.com/newsforge.xml", "title","url","/backslash/story" ); } public Headline grab_headline_from_node( XPathNavigator nav , Newsfeed feed ) { XPathNodeIterator iter = nav.SelectDescendants( XPathNodeType.Element, false); Headline headline = new Headline( ); while( iter.MoveNext() ) { string name = iter.Current.Name; if( name == feed.Title ) { headline.Title = iter.Current.Value; } if( name == feed.Link ) { headline.Link = iter.Current.Value; } } return headline; } public Headline[] process_feed( Newsfeed feed ) { int i = 0; Headline[] headlines = new Headline[ 15 ]; //RSS has maximum of 15 's per try { XPathDocument xpdoc = new XPathDocument( feed.Url ); XPathNavigator nav = xpdoc.CreateNavigator( ); XPathNodeIterator iter = nav.Select( feed.XPath ); for( i = 0; iter.MoveNext() ; i++ ) { headlines[i] = grab_headline_from_node( iter.Current, feed ); } } catch( Exception ex ) { Console.WriteLine( ex.Message.ToString() ); } Headline[] actual = new Headline[ i ]; Array.Copy( headlines , actual , i ); return actual; } /* public static void Main( ) { // usage: // create a Headline array to store the headlines // create an RSSReader instance // call process_feed on any of the built-in websites, and grab the returned array // you can check how many headlines you have by calling .Length on the array // Headline[] hlns; RSSReader xrr = new RSSReader( ); // need one RSSReader HeadlineFilter filter; Newsfeed feed = new Newsfeed("c:\\program files\\bsnews\\headlines.xml", "title","link","/headlines/headline"); hlns = xrr.process_feed( feed ); foreach( Headline h in hlns ) Console.WriteLine(h.Title+" :: "+h.Link ); } // EXAMPLE 1 : SLASHDOT Console.WriteLine("//// Slashdot ////"); // THE ONLY PARTS THAT CHANGE: hlns = xrr.process_feed( xrr.slashdot ); // process slashdot Console.WriteLine( hlns.Length.ToString() ); filter = new HeadlineFilter( hlns ); foreach( Headline h in hlns ) Console.WriteLine(h.Title+" :: "+h.Link ); //NOW FILTER THE RESULTS (we're checking again soon, so they'll prolly be the same) //now filter slashdot by checking the website again and using it as input to the filter Headline[] filtered = filter.filter_for_new( xrr.process_feed(xrr.slashdot)); foreach( Headline h in filtered ) Console.WriteLine(h.Title+" "+h.Link); // EXAMPLE 2 : ANANDTECH Console.WriteLine("//// Anandtech News ////"); hlns = xrr.process_feed( xrr.anandtech ); // process anandtech Console.WriteLine( hlns.Length.ToString() ); foreach( Headline h in hlns ) Console.WriteLine(h.Title+" :: "+h.Link ); // EXAMPLE 3 : NEWSFORGE Console.WriteLine("//// Newsforge ////"); // process Newsforge hlns = xrr.process_feed( xrr.newsforge ); Console.WriteLine( hlns.Length.ToString() ); foreach( Headline h in hlns ) Console.WriteLine(h.Title+" :: "+h.Link ); } */ } }//blurxml namespace