vendor: replace third_party/nature-id gitlink with tracked files

2026-02-21 11:00:42 +01:00
parent a91309de11
commit 69486a92be
18 changed files with 1376 additions and 1 deletions
--- a/third_party/nature-id/inat_api.py
+++ b/third_party/nature-id/inat_api.py
@@ -0,0 +1,110 @@
+import json, os, pickle, requests, shelve, sys, time
+
+#############################################################################
+#                                                                           #
+# API calls to obtain taxonomic information. Used in case of name changes.  #
+#                                                                           #
+# See documention at https://api.inaturalist.org/v1/docs/#/Taxa             #
+#                                                                           #
+# We throttle the number of calls to less than 60 per minute. We also       #
+# implement a cache to avoid repeated lookups of the same taxa across runs. #
+# Cache entries include time stamps and they expire after two weeks.        #
+#                                                                           #
+#############################################################################
+
+API_HOST                 = "https://api.inaturalist.org/v1"
+CACHE_EXPIRATION         = 14 * 24 * 3600  # cache expires after 2 weeks
+TOO_MANY_API_CALLS_DELAY = 60              # wait this long after error 429
+
+# The cache stores the json responses.
+
+if sys.platform == 'win32':
+    DATA_DIR  = os.path.join(os.path.expanduser('~'),
+                             'AppData', 'Local', 'inat_api')
+else:
+    DATA_DIR  = os.path.join(os.path.expanduser('~'), '.cache', 'inat_api')
+
+if not os.path.exists(DATA_DIR):
+    os.makedirs(DATA_DIR)
+
+cache = shelve.open(os.path.join(DATA_DIR, 'api.cache'))
+
+# API call throttling.
+
+class Throttle:
+
+    API_MAX_CALLS = 60   # max 60 calls per minute
+    API_INTERVAL  = 60   # 1 minute
+
+    def __init__(self):
+        self.callTimes = []   # times of api calls
+
+    # wait if necessary to avoid more than API_MAX_CALLS in API_INTERVAL
+    def wait(self):
+        while len(self.callTimes) >= self.API_MAX_CALLS:
+            waitTime = self.callTimes[0] - (time.time() - self.API_INTERVAL)
+            if waitTime > 0:
+                print('Throttling API calls, '
+                      f'sleeping for {waitTime:.1f} seconds.')
+                time.sleep(waitTime)
+                continue
+            self.callTimes = self.callTimes[1:]
+        self.callTimes.append(time.time())
+
+api_call_throttle = Throttle()
+
+# argument is an id or a list of id's
+def get_taxa_by_id(id):
+    if type(id) is list:
+        url = API_HOST + '/taxa/' + '%2C'.join([str(i) for i in id])
+    else:
+        url = API_HOST + f'/taxa/{id}'
+    tim = time.time()
+    if not url in cache or cache[url][0] < tim - CACHE_EXPIRATION:
+        delay = TOO_MANY_API_CALLS_DELAY
+        headers = {'Content-type' : 'application/json' }
+        while True:
+            api_call_throttle.wait()
+            response = requests.get(url, headers=headers)
+            if response.status_code == requests.codes.too_many:
+                time.sleep(delay)
+                delay *= 2
+            else:
+                break
+        if response.status_code == requests.codes.ok:
+            cache[url] = (tim, response.json())
+        else:
+            print(response.text)
+            return None
+    return cache[url][1]
+
+# returns taxa by name
+def get_taxa(params):
+    url = API_HOST + '/taxa'
+    for key, val in params.items():
+        if type(val) == bool:
+            params[key] = 'true' if val else 'false'
+    key = pickle.dumps((url, params)).hex()
+    tim = time.time()
+    if not key in cache or cache[key][0] < tim - CACHE_EXPIRATION:
+        delay = TOO_MANY_API_CALLS_DELAY
+        headers = {'Content-type' : 'application/json' }
+        while True:
+            api_call_throttle.wait()
+            response = requests.get(url, headers=headers, params=params)
+            if response.status_code == requests.codes.too_many:
+                time.sleep(delay)
+                delay *= 2
+            else:
+                break
+        if response.status_code == requests.codes.ok:
+            cache[key] = (tim, response.json())
+        else:
+            print(response.text)
+            return None
+    return cache[key][1]
+
+
+if __name__ == '__main__':
+
+    assert not 'Not a top-level Python module!'