I don’t want to get banned, but I have to say, it seems like a reasonable question to ask whether there are GPUs available. The instructions say to try another region or try again later, but I have now tried on two days across multiple hours, and written a python script to hit all regions.
First I tried:
gcloud compute machine-types list --filter=“name=a2-highgpu-2g”
gcloud compute machine-types list --filter=“name=a2-highgpu-1g”
Then, thinking perhaps there is too much contention on higher end GPUs, I just tried:
PROJECT=“…”
INSTANCE_NAME=“piper-train”
MACHINE_TYPE=“g2-standard-4”
IMAGE_FAMILY=“common-cu120-ubuntu-2204-py310”
IMAGE_PROJECT=“deeplearning-platform-release”
BOOT_DISK_SIZE=“200GB”
METADATA=“install-nvidia-driver=True”
MAINT_POLICY=“TERMINATE”
I will attach my python script. Am I doing something wrong? It was quite easy to get a lightweight GCP server, but to get anything with a GPU seems (so far) impossible.
Apologies: preformatting here appears to eat indentation? I’m not sure how to paste in code. I am going to manually edit.
#
# GCP Utils
import argparse
import shlex
import subprocess
from pathlib import Path
DEFAULT_PARAMS_FILE = Path(__file__).with_name("gcp_params.dat")
DEFAULT_ZONES_FILE = Path(__file__).with_name("gcp_zones.txt")
def read_zones(zonefile):
zones = []
seen = set()
with open(zonefile, "r", encoding="utf-8") as file:
for raw_line in file:
zone = raw_line.strip()
if not zone or zone.lstrip().startswith("#"):
continue
if zone in seen:
continue
zones.append(zone)
seen.add(zone)
return zones
def read_gcp_params(param_file):
params = {}
with open(param_file, "r", encoding="utf-8") as file:
for raw_line in file:
line = raw_line.strip()
if not line or line.startswith("#"):
continue
if "=" not in line:
raise ValueError(f"Invalid parameter line: {raw_line}")
key, value = line.split("=", 1)
tokens = shlex.split(value, posix=True)
params[key.strip()] = tokens[0] if tokens else ""
return params
def fetch_machine_type_zones(machine_type):
cmd = [
"gcloud",
"compute",
"machine-types",
"list",
f"--filter=name={machine_type}",
"--format=value(zone)",
]
try:
result = subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
except subprocess.CalledProcessError as e:
print(f"Error fetching zones for machine type {machine_type}: {e.stderr.strip() or e.stdout.strip()}")
return []
zones = []
seen = set()
for raw_line in result.stdout.splitlines():
zone = raw_line.strip()
if not zone or zone in seen:
continue
zones.append(zone)
seen.add(zone)
return zones
def try_create_instance(zone, instance_name, project, machine_type, image_family, image_project, boot_disk_size, metadata, maintenance_policy):
cmd = [
"gcloud",
"compute",
"instances",
"create",
instance_name,
f"--project={project}",
f"--zone={zone}",
f"--machine-type={machine_type}",
f"--image-family={image_family}",
f"--image-project={image_project}",
f"--boot-disk-size={boot_disk_size}",
f"--metadata={metadata}",
f"--maintenance-policy={maintenance_policy}",
]
try:
subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
return True
except subprocess.CalledProcessError as e:
print(f"Error creating instance in zone {zone}: {e.stderr.strip() or e.stdout.strip()}")
return False
def find_available_zone(zones, params):
for zone in zones:
print(f"Trying to create instance in zone: {zone}")
if try_create_instance(
zone,
params["INSTANCE_NAME"],
params["PROJECT"],
params["MACHINE_TYPE"],
params["IMAGE_FAMILY"],
params["IMAGE_PROJECT"],
params["BOOT_DISK_SIZE"],
params["METADATA"],
params["MAINT_POLICY"],
):
return zone
return None
def list_servers(project):
cmd = [
"gcloud",
"compute",
"instances",
"list",
f"--project={project}",
]
return subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
def shut_all_servers(project):
cmd = [
"gcloud",
"compute",
"instances",
"stop",
f"--project={project}",
]
return subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--params-file", default=DEFAULT_PARAMS_FILE, type=Path)
parser.add_argument("--zones-file", type=Path)
return parser.parse_args()
def main():
args = parse_args()
params = read_gcp_params(args.params_file)
if args.zones_file:
zones = read_zones(args.zones_file)
else:
zones = fetch_machine_type_zones(params["MACHINE_TYPE"])
if not zones:
print("No zones available for requested machine type")
return
if find_available_zone(zones, params):
print("Instance created successfully")
else:
print("Failed to create instance in all zones")
if __name__ == "__main__":
main()

