Python Scrapy - save only Item values and not dict lists?
pI have a spider I setup to parse through RSS feeds and save the results
to a database, which works perfectly. However, all the values I save to
the database look like the following:/p
pcode{http://feeds.reuters.com/~r/reuters/businessNews/~3/_mdwUbG4B88/story01.htm}/code/p
pMy question is, where and how do I edit the item (I'm assuming in the
pipeline?) so that only the text is saved and not the {} or ./p pHere's
the pipeline code I'm using:/p precodefrom sqlalchemy.orm import
sessionmaker from models import Feeds, db_connect, create_feeds_table
import MySQLdb import settings class Feeds2Pipeline(object): def
__init__(self): Initialize database connection engine = db_connect()
create_feeds_table(engine) self.Session = sessionmaker(bind=engine) def
process_item(self, item, spider): Save the feeds into the database session
= self.Session() feed = Feeds(**item) try: session.add(feed)
session.commit() except: session.rollback() raise finally: session.close()
return item /code/pre pAnd here's the additional database back-end:/p
precodefrom sqlalchemy import create_engine from
sqlalchemy.ext.declarative import declarative_base from
sqlalchemy.engine.url import URL from sqlalchemy import create_engine,
Column, Integer, String, Date import settings DeclarativeBase =
declarative_base() def db_connect(): return
create_engine(URL(**settings.DATABASE)) def create_feeds_table(engine):
DeclarativeBase.metadata.create_all(engine) class Feeds(DeclarativeBase):
__tablename__ = rawFeeds id = Column(Integer, primary_key=True) link =
Column('link', String) date = Column('date', String) source =
Column('source', String) /code/pre
No comments:
Post a Comment