Skip to content

SimpleJsonDataSource

Bases: DataSource

A simple json writer for writing data to Databricks DBFS.

Examples:

>>> import pyspark.sql.functions as sf
>>> df = spark.range(0, 10, 1, 2).withColumn("value", sf.expr("concat('value_', id)"))

Register the data source.

>>> from pyspark_datasources import SimpleJsonDataSource
>>> spark.dataSource.register(SimpleJsonDataSource)

Append the DataFrame to a DBFS path as json files.

>>> (
...     df.write.format("simplejson")
...     .mode("append")
...     .option("databricks_url", "https://your-databricks-instance.cloud.databricks.com")
...     .option("databricks_token", "your-token")
...     .save("/path/to/output")
... )

Overwrite the DataFrame to a DBFS path as json files.

>>> (
...     df.write.format("simplejson")
...     .mode("overwrite")
...     .option("databricks_url", "https://your-databricks-instance.cloud.databricks.com")
...     .option("databricks_token", "your-token")
...     .save("/path/to/output")
... )
Source code in pyspark_datasources/simplejson.py
class SimpleJsonDataSource(DataSource):
    """
    A simple json writer for writing data to Databricks DBFS.

    Examples
    --------

    >>> import pyspark.sql.functions as sf
    >>> df = spark.range(0, 10, 1, 2).withColumn("value", sf.expr("concat('value_', id)"))

    Register the data source.

    >>> from pyspark_datasources import SimpleJsonDataSource
    >>> spark.dataSource.register(SimpleJsonDataSource)

    Append the DataFrame to a DBFS path as json files.

    >>> (
    ...     df.write.format("simplejson")
    ...     .mode("append")
    ...     .option("databricks_url", "https://your-databricks-instance.cloud.databricks.com")
    ...     .option("databricks_token", "your-token")
    ...     .save("/path/to/output")
    ... )

    Overwrite the DataFrame to a DBFS path as json files.

    >>> (
    ...     df.write.format("simplejson")
    ...     .mode("overwrite")
    ...     .option("databricks_url", "https://your-databricks-instance.cloud.databricks.com")
    ...     .option("databricks_token", "your-token")
    ...     .save("/path/to/output")
    ... )
    """
    @classmethod
    def name(self) -> str:
        return "simplejson"

    def writer(self, schema: StructType, overwrite: bool):
        return SimpleJsonWriter(schema, self.options, overwrite)