'How to Export formatted CSV data to file?
I have a python script that parses/prints all the urls from an xml file to csv format. It does this, but it doesn't create an actual csv file. How do I turn this into an actual csv file?
Here's the script repo https://github.com/daveoconnor/site-map-parser
Here's the code:
from sitemapparser import SiteMapParser
import pandas as pd
import csv
sm = SiteMapParser('https://shop.busybeaver.com/sitemaps/bb_shop_sitemap.xml') # reads /sitemap.xml
if sm.has_sitemaps():
sitemaps = sm.get_sitemaps() # returns iterator of sitemapper.Sitemap instances
else:
urls = sm.get_urls() # returns iterator of sitemapper.Url instances
from sitemapparser.exporters import CSVExporter
# sm set as per earlier library usage example
csv_exporter = CSVExporter(sm)
if sm.has_sitemaps():
print(csv_exporter.export_sitemaps())
elif sm.has_urls():
print(csv_exporter.export_urls())
And here's the Class used to for mat the data (CSVExporter):
import csv
import io
from ..exporter import Exporter
from ..url import Url
from ..sitemap import Sitemap
from datetime import datetime
class CSVExporter(Exporter):
short_name = 'csv'
def export_sitemaps(self):
"""
returns csv data with format:
url: string
lastmod: ISO8601 format date
"""
buffer = io.StringIO()
writer = csv.DictWriter(
buffer,
delimiter=",",
fieldnames=Sitemap.fields,
quoting=csv.QUOTE_NONNUMERIC
)
writer.writeheader()
for sm in self.data.get_sitemaps():
row = {}
for field in Sitemap.fields:
v = getattr(sm, field)
row[field] = v if type(v) is not datetime else v.isoformat()
writer.writerow(row)
return buffer.getvalue().rstrip()
def export_urls(self):
"""
returns csv data with format:
url: string
lastmod: ISO8601 format date
changefreq: string
priority: float, 0-1
"""
buffer = io.StringIO()
writer = csv.DictWriter(
buffer,
delimiter=",",
fieldnames=Url.fields,
quoting=csv.QUOTE_NONNUMERIC
)
writer.writeheader()
for url in self.data.get_urls():
row = {}
for field in Url.fields:
v = getattr(url, field)
row[field] = v if type(v) is not datetime else v.isoformat()
writer.writerow(row)
return buffer.getvalue().rstrip()
--UPDATE
added to bottom:
open('file.csv', 'w').write(csv_exporter.export_urls())
This creates a csv, but there is an extra row between each url. How can I write without that extra row?
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|