Apache Spark
Overview
Installing
Install via Apolo CLI
apolo app-template get spark -o myspark.yaml # Example of myspark.yaml
template_name: spark
template_version: v1.0.0
display_name: myspark
input:
spark_application_config:
type: Python
main_application_file:
path: storage:my-spark-job/main.py
arguments:
- "--input-path"
- "storage:datasets/input.csv"
- "--output-path"
- "storage:results/"
dependencies:
pypi_packages:
- "pandas==2.0.3"
- "numpy>=1.24.0"
packages:
- "org.apache.spark:spark-sql_2.12:3.5.3"
volumes:
- storage_uri:
path: storage:datasets/
mount_path:
path: /data/input
mode:
mode: r
- storage_uri:
path: storage:results/
mount_path:
path: /data/output
mode:
mode: rw
spark_auto_scaling_config:
initial_executors: 2
min_executors: 1
max_executors: 10
shuffle_tracking_timeout: 60
driver_config:
preset:
name: cpu-medium
executor_config:
instances: 3
preset:
name: cpu-large
image:
repository: spark
tag: 3.5.3References
Last updated
Was this helpful?