/** \file A program to read in the NYT's headline feed and produce a simple HTML page from the headlines. */ #include #include #include #include "stopif.h" /** \mainpage The front page of the Grey Lady's web site is as gaudy as can be, including several headlines and sections trying to get your attention, various formatting schemes, and even photographs--in color. This program reads in the NYT Headlines RSS feed, and writes a simple list in plain HTML. You can then click through to the headline that modestly piques your attention. For notes on compilation, see the \ref compilation page. */ /** \page compilation Compiling the program Save the following code to \c makefile. Notice that cURL has a program, \c curl-config, that behaves like \c pkg-config, but is cURL-specific. \code CFLAGS =-g -Wall -O3 `curl-config --cflags` -I/usr/include/libxml2 LDLIBS=`curl-config --libs ` -lxml2 CC=c99 nyt_feed: \endcode Having saved your makefile, use make nyt_feed to compile. Of course, you have to have the development packages for libcurl and libxml2 installed for this to work. */ //These have in-line Doxygen documentation. The < points to the prior text //being documented. char *rss_url = "http://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml"; /**< The URL for an NYT RSS. */ char *rssfile = "nytimes_feeds.rss"; /**< A local file to write the RSS to.*/ char *outfile = "now.html"; /**< The output file to open in your browser.*/ /** Print a list of headlines in HTML format to the outfile, which is overwritten. \param urls The list of urls. This should have been tested for non-NULLness \param titles The list of titles, also pre-tested to be non-NULL. If the length of the \c urls list or the \c titles list is \c NULL, this will crash. */ void print_to_html(xmlXPathObjectPtr urls, xmlXPathObjectPtr titles){ FILE *f = fopen(outfile, "w"); for (int i=0; i< titles->nodesetval->nodeNr; i++) fprintf(f, "%s
\n" , xmlNodeGetContent(urls->nodesetval->nodeTab[i]) , xmlNodeGetContent(titles->nodesetval->nodeTab[i])); fclose(f); } /** Parse an RSS feed on the hard drive. This will parse the XML, then find all nodes matching the XPath for the title elements and all nodes matching the XPath for the links. Then, it will write those to the outfile. \param infile The RSS file in. */ int parse(char const *infile){ const xmlChar *titlepath= (xmlChar*)"//item/title"; const xmlChar *linkpath= (xmlChar*)"//item/link"; xmlDocPtr doc = xmlParseFile(infile); Stopif(!doc, return -1, "Error: unable to parse file \"%s\"\n", infile); xmlXPathContextPtr context = xmlXPathNewContext(doc); Stopif(!context, return -2, "Error: unable to create new XPath context\n"); xmlXPathObjectPtr titles = xmlXPathEvalExpression(titlepath, context); xmlXPathObjectPtr urls = xmlXPathEvalExpression(linkpath, context); Stopif(!titles || !urls, return -3, "either the Xpath '//item/title' " "or '//item/link' failed."); print_to_html(urls, titles); xmlXPathFreeObject(titles); xmlXPathFreeObject(urls); xmlXPathFreeContext(context); xmlFreeDoc(doc); return 0; } /** Use cURL's easy interface to download the current RSS feed. \param url The URL of the NY Times RSS feed. Any of the ones listed at \url http://www.nytimes.com/services/xml/rss/nyt/ should work. \param outfile The headline file to write to your hard drive. First save the RSS feed to this location, then overwrite it with the short list of links. \return 1==OK, 0==failure. */ int get_rss(char const *url, char const *outfile){ FILE *feedfile = fopen(outfile, "w"); if (!feedfile) return -1; CURL *curl = curl_easy_init(); if(!curl) return -1; curl_easy_setopt(curl, CURLOPT_URL, url); curl_easy_setopt(curl, CURLOPT_WRITEDATA, feedfile); CURLcode res = curl_easy_perform(curl); if (res) return -1; curl_easy_cleanup(curl); fclose(feedfile); return 0; } int main(void) { Stopif(get_rss(rss_url, rssfile), return 1, "failed to download %s to %s.\n", rss_url, rssfile); parse(rssfile); printf("Wrote headlines to %s. Have a look at it in your browser.\n", outfile); }