python data sink
# The four classes below implement a simple DataPipeline. The code would need to be filled in by the user.
class WordDoc:
...
class PDF:
...
class SQLDatabase(DataSource, DataSink):
@get.register(WordDoc) # Tells the DataPipeline that this SQL database can provide a WordDoc
def get_word_doc(query: Dict[str, Any]) -> WordDoc:
"""Returns a WordDoc from an SQL database based on the `filename` in the query."""
@put.register(WordDoc) # Tell the DataPipeline that this SQL database can store a WordDoc
def put_word_doc(doc: WordDoc, query: Dict[str, Any]):
"""Stores the document in the SQL database using the query as an identifier."""
class DocumentTransformer(Transformer):
@transform.register(WordDoc, PDF) # Tells the DataPipeline that we know how to convert a WordDoc to a PDF
def Word_to_PDF(doc: WordDoc) -> PDF:
"""Converts a WordDoc to a PDF and returns the PDF."""
# The line of code below can now be used to request a PDF.
# The WordDoc with the filename `find_me` will be pulled from the SQL database then converted to a PDF and returned to the user.
my_pdf = pipeline.get(PDF, query={"filename": "find_me"})
# Note also that because we implemented a `put(WordDoc)` method in the SQLDatabase that it will also store WordDocs that pass through the SQL database via the pipeline but are not already in the database.