I am trying to create a job through python, this is my creation script:
def submit_download_sra_fastq(sra_batch_file, batch_name, project_id, region, job_name, docker_image, cpus, memory_mib,
machine_type, max_retry_count, run_minutes, parallelism):
sra_count = 0
with open(sra_batch_file) as infile:
for _ in infile:
sra_count += 1
if batch_name is None:
gcs_batch_file = 'gs://sra-fastq/batches/sra-batch'+str(time.time()).split('.')[0]+'.txt'
else:
gcs_batch_file = 'gs://sra-fastq/batches/' + batch_name + '.txt'
command = f'gsutil cp {sra_batch_file} {gcs_batch_file}'
subprocess.run(command.split())
client = batch_v1.BatchServiceClient()
# Define what will be done as part of the job.
runnable = batch_v1.Runnable()
runnable.container = batch_v1.Runnable.Container()
runnable.container.image_uri = docker_image
runnable.container.options = '--mount type=bind,src=/mnt/disks/sra-download/,target=/sra-download'
runnable.container.entrypoint = "/bin/sh"
runnable.container.commands = ["-c", f"hsubatch download-sra-fastq {gcs_batch_file} $BATCH_TASK_INDEX"]
# Jobs can be divided into tasks. In this case, we have only one task.
task = batch_v1.TaskSpec()
task.runnables = [runnable]
volume = batch_v1.Volume()
volume.device_name = 'sra-download'
volume.mount_path = "/mnt/disks/sra-download/"
volume.mount_options = "rw,async"
task.volumes = [volume]
# We can specify what resources are requested by each task.
resources = batch_v1.ComputeResource()
resources.cpu_milli = cpus * 1000 # in milliseconds per cpu-second. This means the task requires 2 whole CPUs.
resources.memory_mib = memory_mib # in MiB
task.compute_resource = resources
task.max_retry_count = max_retry_count
task.max_run_duration = str(run_minutes * 60) + "s"
# Tasks are grouped inside a job using TaskGroups.
# Currently, it's possible to have only one task group.
group = batch_v1.TaskGroup()
group.task_spec = task
group.task_count = sra_count
group.parallelism = parallelism
# Policies are used to define on what kind of virtual machines the tasks will run on.
# In this case, we tell the system to use "e2-standard-4" machine type.
# Read more about machine types here: https://cloud.google.com/compute/docs/machine-types
policy = batch_v1.AllocationPolicy.InstancePolicy()
policy.machine_type = machine_type
policy.provisioning_model = "SPOT"
newdisk = batch_v1.AllocationPolicy.AttachedDisk()
newdisk.new_disk.size_gb = 100
newdisk.new_disk.type_ = 'pd-ssd'
newdisk.device_name = 'sra-download'
policy.disks = [newdisk]
instances = batch_v1.AllocationPolicy.InstancePolicyOrTemplate()
instances.policy = policy
allocation_policy = batch_v1.AllocationPolicy()
allocation_policy.instances = [instances]
job = batch_v1.Job()
job.task_groups = [group]
job.allocation_policy = allocation_policy
job.labels = {"env": "testing", "type": "container"}
# We use Cloud Logging as it's an out of the box available option
job.logs_policy = batch_v1.LogsPolicy()
job.logs_policy.destination = batch_v1.LogsPolicy.Destination.CLOUD_LOGGING
create_request = batch_v1.CreateJobRequest()
create_request.job = job
create_request.job_id = job_name
# The job's parent is the region in which the job will run
create_request.parent = f"projects/{project_id}/locations/{region}"
client.create_job(create_request)
print(f"Created job: {job_name}")
You can see that I am trying to mount a new disk to this instance, and then run a docker image that bind mounts to that new disk. I get these errors when I run the command:
ERROR 2023-01-12T07:21:24.898106623Z mke2fs 1.46.5 (30-Dec-2021)
INFO 2023-01-12T07:21:24.901911705Z Discarding device blocks: 0/26214400
INFO 2023-01-12T07:21:24.966901031Z done
INFO 2023-01-12T07:21:24.967126864Z Creating filesystem with 26214400 4k blocks and 6553600 inodes Filesystem UUID: a26b88d9-e73b-4a34-ba38-5b11c9b6f996 Superblock backups stored on blocks: 32768, 98304, 163840, 229376, 294912
INFO 2023-01-12T07:21:24.967145137Z , 819200, 884736, 1605632, 2654208, 4096000
INFO 2023-01-12T07:21:24.967153330Z , 7962624,
INFO 2023-01-12T07:21:24.967160683Z 11239424
INFO 2023-01-12T07:21:24.967173601Z , 20480000
INFO 2023-01-12T07:21:24.967189740Z , 23887872 Allocating group tables: 0/800
INFO 2023-01-12T07:21:24.968106547Z done
INFO 2023-01-12T07:21:24.968778284Z Writing inode tables: 0/800
INFO 2023-01-12T07:21:25.002198029Z 68/800
INFO 2023-01-12T07:21:25.174615487Z f927477c601c: Pull complete
INFO 2023-01-12T07:21:25.245055966Z 7eb0e5940f70: Pull complete
INFO 2023-01-12T07:21:25.257080266Z Digest: sha256:3be61617119fbccfd39a8b892debe155abb6e2d96ba7dcd1f1f911b24847a619
INFO 2023-01-12T07:21:25.262943714Z Status: Downloaded newer image for gcr.io/mdurrant/hsulab-gcp-batch:latest
INFO 2023-01-12T07:21:25.266026529Z gcr.io/mdurrant/hsulab-gcp-batch:latest
ERROR 2023-01-12T07:21:25.278811140Z mke2fs 1.46.5 (30-Dec-2021)
INFO 2023-01-12T07:21:25.282597747Z Discarding device blocks: 0/26214400
INFO 2023-01-12T07:21:25.342568710Z done
INFO 2023-01-12T07:21:25.342952775Z Creating filesystem with 26214400 4k blocks and 6553600 inodes Filesystem UUID: 572c3a05-08af-4efb-9e14-24833c696273 Superblock backups stored on blocks: 32768, 98304, 163840, 229376, 294912, 819200, 884736,
INFO 2023-01-12T07:21:25.342978304Z 1605632, 2654208, 4096000, 7962624, 11239424, 20480000, 23887872
INFO 2023-01-12T07:21:25.342985559Z Allocating group tables:
INFO 2023-01-12T07:21:25.342992886Z 0/800
INFO 2023-01-12T07:21:25.343864852Z done
INFO 2023-01-12T07:21:25.344516947Z Writing inode tables: 0/800
INFO 2023-01-12T07:21:25.377867873Z done
INFO 2023-01-12T07:21:25.382698785Z Creating journal (131072 blocks):
INFO 2023-01-12T07:21:25.384877891Z done Writing superblocks and filesystem accounting information:
INFO 2023-01-12T07:21:25.385367312Z 0/800
INFO 2023-01-12T07:21:25.387779978Z
INFO 2023-01-12T07:21:25.395778084Z done
ERROR 2023-01-12T07:21:25.497736569Z mount: /mnt/disks/sra-download: wrong fs type, bad option, bad superblock on /dev/sdb, missing codepage or helper program, or other error.
Looks like there is an issue with mounting the new disk, any idea what is going wrong?