Bases: DataSource
A simple json writer for writing data to Databricks DBFS.
Examples:
>>> import pyspark.sql.functions as sf
>>> df = spark.range(0, 10, 1, 2).withColumn("value", sf.expr("concat('value_', id)"))
Register the data source.
>>> from pyspark_datasources import SimpleJsonDataSource
>>> spark.dataSource.register(SimpleJsonDataSource)
Append the DataFrame to a DBFS path as json files.
>>> (
... df.write.format("simplejson")
... .mode("append")
... .option("databricks_url", "https://your-databricks-instance.cloud.databricks.com")
... .option("databricks_token", "your-token")
... .save("/path/to/output")
... )
Overwrite the DataFrame to a DBFS path as json files.
>>> (
... df.write.format("simplejson")
... .mode("overwrite")
... .option("databricks_url", "https://your-databricks-instance.cloud.databricks.com")
... .option("databricks_token", "your-token")
... .save("/path/to/output")
... )
Source code in pyspark_datasources/simplejson.py
| class SimpleJsonDataSource(DataSource):
"""
A simple json writer for writing data to Databricks DBFS.
Examples
--------
>>> import pyspark.sql.functions as sf
>>> df = spark.range(0, 10, 1, 2).withColumn("value", sf.expr("concat('value_', id)"))
Register the data source.
>>> from pyspark_datasources import SimpleJsonDataSource
>>> spark.dataSource.register(SimpleJsonDataSource)
Append the DataFrame to a DBFS path as json files.
>>> (
... df.write.format("simplejson")
... .mode("append")
... .option("databricks_url", "https://your-databricks-instance.cloud.databricks.com")
... .option("databricks_token", "your-token")
... .save("/path/to/output")
... )
Overwrite the DataFrame to a DBFS path as json files.
>>> (
... df.write.format("simplejson")
... .mode("overwrite")
... .option("databricks_url", "https://your-databricks-instance.cloud.databricks.com")
... .option("databricks_token", "your-token")
... .save("/path/to/output")
... )
"""
@classmethod
def name(self) -> str:
return "simplejson"
def writer(self, schema: StructType, overwrite: bool):
return SimpleJsonWriter(schema, self.options, overwrite)
|