Here i am creating a xml using linq and not getting in the required format.Here is my code
List<string> listvalue = new List<string>();
listvalue.Add("http://example.com/sample.html");
listvalue.Add("http://example.com/new.html");
foreach (string url in listvalue)
{
var document = new HtmlWeb().Load(url);
var urls = document.DocumentNode.Descendants("img")
.Select(e => e.GetAttributeValue("src", null))
.Where(s => !String.IsNullOrEmpty(s));
List<string> asList = urls.ToList();
GenerateXml(url, asList);
}
and
protected void GenerateXml(string url, List<string> listitems) //generateXml
{
XNamespace nsSitemap = "http://www.sitemaps.org/schemas/sitemap/0.9";
XNamespace nsImage = "http://www.google.com/schemas/sitemap-image/1.1";
var sitemap = new XDocument(new XDeclaration("1.0", "UTF-8", ""));
var urlSet = new XElement(nsSitemap + "urlset",
new XAttribute("xmlns", nsSitemap),
new XAttribute(XNamespace.Xmlns + "image", nsImage),
new XElement(nsSitemap + "url",
new XElement(nsSitemap + "loc", url),
from urlNode in listitems
select new XElement(nsImage + "image",
new XElement(nsImage + "loc", urlNode)
)));
sitemap.Add(urlSet);
sitemap.Save(System.Web.HttpContext.Current.Server.MapPath("/Static/sitemaps/Sitemap-image.xml"));
}
I need it in the below format
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1">
<url>
<loc>http://example.com/sample.html</loc>
<image:image>
<image:loc>http://example.com/image.jpg</image:loc>
</image:image>
<image:image>
<image:loc>http://example.com/photo.jpg</image:loc>
</image:image>
</url>
<url>
<loc>http://example.com/new.html</loc>
<image:image>
<image:loc>http://example.com/newimage.jpg</image:loc>
</image:image>
<image:image>
<image:loc>http://example.com/newphoto.jpg</image:loc>
</image:image>
</url>
</urlset>
But here i am getting a single url tag. How to achieve this? Any suggestion?
It sounds like this is really just a case of wanting to fetch all the URLs (from all the source documents) before you call GenerateXml
at all - and remember where each one came from. That's as simple as:
var sources = new List<string>
{
"http://example.com/sample.html",
"http://example.com/new.html"
};
var imagesBySource = sources
.ToDictionary(source => source,
source => new HtmlWeb().Load(url)
.DocumentNode.Descendants("img")
.Select(e => e.GetAttributeValue("src", null))
.Where(s => !String.IsNullOrEmpty(s))
.ToList());
GenerateXml(imagesBySource);
You'd then need to change GenerateXml
to take a Dictionary<string, List<string>>
. Something like (untested):
protected void GenerateXml(Dictionary<string, List<string>> imagesByUrl)
{
XNamespace nsSitemap = "http://www.sitemaps.org/schemas/sitemap/0.9";
XNamespace nsImage = "http://www.google.com/schemas/sitemap-image/1.1";
var sitemap = new XDocument(new XDeclaration("1.0", "UTF-8", ""));
var urlSet = new XElement(nsSitemap + "urlset",
new XAttribute("xmlns", nsSitemap),
new XAttribute(XNamespace.Xmlns + "image", nsImage),
imagesByUrl.Select(entry =>
new XElement(nsSitemap + "url",
new XElement(nsSitemap + "loc", entry.Key),
from urlNode in entry.Value
select new XElement(nsImage + "image",
new XElement(nsImage + "loc", urlNode)
)
)
);
sitemap.Add(urlSet);
var path = HttpContext.Current.Server.MapPath("/Static/sitemaps/Sitemap-image.xml");
sitemap.Save(path);
}
Note that this won't guarantee that the order of the sources is preserved. If you need that, you should probably create a class with Url
and Images
properties, and pass a list of those to GenerateXml
instead.
See more on this question at Stackoverflow