Home Page   |   Products   |   Customer Service   |   About Us   |   Contact Us   |   Search

HTML TO XML - CNN Example Source Code

 
Back to HTML TO XML main page
 

// HTML unicode string
string html = string.Empty;

// Encoding of the HTML
System.Text.Encoding encoding = null;

bool result = false;

// Get html from site
result = GetHTMLFromSite( "http://edition.cnn.com", ref html, out encoding, "output.html" );

// Create logger, leave the file name empty if no log is needed
Noviway.IO.Logger logger = new Noviway.IO.Logger( "Log.txt" );

// Create the HTML parser
Noviway.HTMLParser.Parser parser = new Noviway.HTMLParser.Parser( logger, html );

// Process the data
parser.Process();

// Create our special HTML document structure Noviway.HTMLParser.HTMLDocument doc = new Noviway.HTMLParser.HTMLDocument( logger, parser.Tags, encoding );

// Process our HTML document
>
result = doc.Process();

// Create a XMLDocument object
XmlDocument xmlDoc = null;

// Now we create the XML document
result = doc.CreateXml( out xmlDoc );

// Save the xml or anything else you want to
xmlDoc.Save("Output.xml");

XmlElement element = null;

// Find the element by name and index
result = doc.FindElementByText( xmlDoc, "More stories", out element );

element = (XmlElement)element.NextSibling;

ArrayList news = new ArrayList();

string text = string.Empty, href = string.Empty;

for ( int i = 0; i < element.ChildNodes.Count; i++ )
{

      try { XmlElement a = (XmlElement)element.ChildNodes[i].ChildNodes[1];

     text = a.InnerText;

     href = a.Attributes["href"].Value;

     news.Add( text ); } catch{}

}

Console.WriteLine("CNN news:");

Console.WriteLine("----------------------:");

foreach( string strNews in news )
     Console.WriteLine( strNews );

Console.WriteLine("----------------------:");




 
Share with others:   
 
  Webmaster: Eran Aharonovich © All rights reserved to Eran Aharonovich 2007  
TML>