vendor: replace third_party/nature-id gitlink with tracked files

This commit is contained in:
NODA1 System
2026-02-21 11:00:42 +01:00
parent a91309de11
commit 69486a92be
18 changed files with 1376 additions and 1 deletions

110
third_party/nature-id/inat_api.py vendored Normal file
View File

@@ -0,0 +1,110 @@
import json, os, pickle, requests, shelve, sys, time
#############################################################################
# #
# API calls to obtain taxonomic information. Used in case of name changes. #
# #
# See documention at https://api.inaturalist.org/v1/docs/#/Taxa #
# #
# We throttle the number of calls to less than 60 per minute. We also #
# implement a cache to avoid repeated lookups of the same taxa across runs. #
# Cache entries include time stamps and they expire after two weeks. #
# #
#############################################################################
API_HOST = "https://api.inaturalist.org/v1"
CACHE_EXPIRATION = 14 * 24 * 3600 # cache expires after 2 weeks
TOO_MANY_API_CALLS_DELAY = 60 # wait this long after error 429
# The cache stores the json responses.
if sys.platform == 'win32':
DATA_DIR = os.path.join(os.path.expanduser('~'),
'AppData', 'Local', 'inat_api')
else:
DATA_DIR = os.path.join(os.path.expanduser('~'), '.cache', 'inat_api')
if not os.path.exists(DATA_DIR):
os.makedirs(DATA_DIR)
cache = shelve.open(os.path.join(DATA_DIR, 'api.cache'))
# API call throttling.
class Throttle:
API_MAX_CALLS = 60 # max 60 calls per minute
API_INTERVAL = 60 # 1 minute
def __init__(self):
self.callTimes = [] # times of api calls
# wait if necessary to avoid more than API_MAX_CALLS in API_INTERVAL
def wait(self):
while len(self.callTimes) >= self.API_MAX_CALLS:
waitTime = self.callTimes[0] - (time.time() - self.API_INTERVAL)
if waitTime > 0:
print('Throttling API calls, '
f'sleeping for {waitTime:.1f} seconds.')
time.sleep(waitTime)
continue
self.callTimes = self.callTimes[1:]
self.callTimes.append(time.time())
api_call_throttle = Throttle()
# argument is an id or a list of id's
def get_taxa_by_id(id):
if type(id) is list:
url = API_HOST + '/taxa/' + '%2C'.join([str(i) for i in id])
else:
url = API_HOST + f'/taxa/{id}'
tim = time.time()
if not url in cache or cache[url][0] < tim - CACHE_EXPIRATION:
delay = TOO_MANY_API_CALLS_DELAY
headers = {'Content-type' : 'application/json' }
while True:
api_call_throttle.wait()
response = requests.get(url, headers=headers)
if response.status_code == requests.codes.too_many:
time.sleep(delay)
delay *= 2
else:
break
if response.status_code == requests.codes.ok:
cache[url] = (tim, response.json())
else:
print(response.text)
return None
return cache[url][1]
# returns taxa by name
def get_taxa(params):
url = API_HOST + '/taxa'
for key, val in params.items():
if type(val) == bool:
params[key] = 'true' if val else 'false'
key = pickle.dumps((url, params)).hex()
tim = time.time()
if not key in cache or cache[key][0] < tim - CACHE_EXPIRATION:
delay = TOO_MANY_API_CALLS_DELAY
headers = {'Content-type' : 'application/json' }
while True:
api_call_throttle.wait()
response = requests.get(url, headers=headers, params=params)
if response.status_code == requests.codes.too_many:
time.sleep(delay)
delay *= 2
else:
break
if response.status_code == requests.codes.ok:
cache[key] = (tim, response.json())
else:
print(response.text)
return None
return cache[key][1]
if __name__ == '__main__':
assert not 'Not a top-level Python module!'