misc/aqe: Initial import.

2018-06-01 00:15:10 -07:00
parent c5401e5128
commit b51e9456c6
9 changed files with 415 additions and 0 deletions
--- a/README.md
+++ b/README.md
@@ -0,0 +1,41 @@
+## AQE: A Query Engine
+
+This is an implementation of a knowledge base, hacked together in Python
+3 (it won't work in Python 2 for reasons of modules) for now to quickly
+iterate on ideas.
+
+There are a few key points:
+
+ A `KnowledgeBase` contains facts.
+ A fact is a tuple: (relationship, subject, object). For example,
+  `('is', 'sky', 'blue')`.
+ A `KnowledgeBase` has three core methods: ask, retract, and tell.
+ The `ask` method queries the `KnowledgeBase` to ascertain whether
+  a fact is true. Either the subject or the object may be `None`,
+  in which case all satisifiable facts are returned.
+ The `retract` method tells the `KnowledgeBase` that the fact is
+  no longer true. If it's rainy, we might retract our fact about the
+  sky being blue.
+ The `tell` method tells the `KnowledgeBase` that the fact is
+  now true. For example, if it's rainy (and we've retracted the previous
+  'sky is blue' fact), we might tell the `KnowledgeBase` that
+  `('is', 'sky', 'grey')`.
+ A `KnowledgeBase` can also perform substitutions.
+ An action contains positive and negative preconditions, retractions,
+  and updates. The positive condition list contains facts that must
+  be true for a knowledge base, and the negative condition list contains
+  facts that must be false. If these preconditions hold, the retractions
+  are applied, followed by the updates.
+ See `test_actions.py` for an example.
+
+### Limitations
+
+ Singleton facts aren't supported; that is, there is no way to make a
+  `KnowledgeBase` assert that there is only one relationship → subject
+  mapping. For example, the `KnowledgeBase` will admit that
+  `('is', 'shrödingers cat', 'alive')` and
+  `('is', 'schrödingers cat', 'dead') are both true simultaneously.
+
+### TODO
+
+ Rewrite in C++?
--- a/init.py
+++ b/init.py
--- a/actions.py
+++ b/actions.py
@@ -0,0 +1,33 @@
+import copy
+import logging
+
+class Action:
+
+    def __init__(self, pos_precond, neg_precond, retracts, updates):
+        self.pos_precond = copy.deepcopy(pos_precond)
+        self.neg_precond = copy.deepcopy(neg_precond)
+        self.retracts = copy.deepcopy(retracts)
+        self.updates = copy.deepcopy(updates)
+
+    def satisfied(self, kb, subject, obj):
+        for fact in self.pos_precond:
+            if not kb.ask(kb.subst(fact, subject, obj)):
+                logging.warning('{} is not valid in the current knowledgebase'.format(fact))
+                return False
+
+        for fact in self.neg_precond:
+            if kb.ask(kb.subst(fact, subject, obj)):
+                logging.warning('{} is valid in the current knowledgebase'.format(fact))
+                return False
+        return True
+
+    def perform(self, kb, subject, obj):
+        if not self.satisfied(kb, subject, obj):
+            return None
+        kbprime = copy.deepcopy(kb)
+        for retraction in self.retracts:
+            kbprime.retract(kb.subst(retraction, subject, obj))
+        for update in self.updates:
+            kbprime.tell(kb.subst(update, subject, obj))
+        return kbprime
+
--- a/data/corpus.json
+++ b/data/corpus.json
--- a/kb.py
+++ b/kb.py
@@ -0,0 +1,159 @@
+"""
+AQE: A Query Engine
+
+This is a proof of concept of a baseline query engine for AI work.
+"""
+
+class InvalidQuery(Exception):
+    pass
+
+class Inconsistency(Exception):
+    def __init__(self, fact):
+        self.fact = fact
+    
+    def __str__(self):
+        return 'Inconsistency: {}'.format(self.fact)
+
+class KnowledgeBase:
+    
+    def __init__(self):
+        # TODO(kyle): support loading an initial set of facts.
+        self.__kb__ = {}
+        self.__facts__ = set()
+        
+    def tell(self, fact):
+        relationship, subject, obj = fact
+        
+        # NB: in the future, these assertions may not need to be true; there
+        # might be space in the world for "fuzzy" facts.
+        assert(relationship)
+        assert(subject)
+        assert(obj)
+        if relationship not in self.__kb__:
+            self.__kb__[relationship] = {'subjects':{}, 'objects': {}}
+        
+        if subject not in self.__kb__[relationship]['subjects']:
+            self.__kb__[relationship]['subjects'][subject] = set()
+        self.__kb__[relationship]['subjects'][subject].add(obj)
+        
+        if obj not in self.__kb__[relationship]['objects']:
+            self.__kb__[relationship]['objects'][obj] = set()
+        self.__kb__[relationship]['objects'][obj].add(subject)
+        self.__facts__.add(fact)
+
+    def retract(self, fact):
+        relationship, subject, obj = fact
+        
+        # For now, these assertions are required. In the future, it would be
+        # interesting to say something to the effect of "forget everything you
+        # know about X".
+        assert(relationship)
+        assert(subject)
+        assert(obj)
+        
+        # TODO(kyle): answer existential question: if I delete all the objects
+        # from a subject (or vice versa), should that subject/object be kept or
+        # removed entirely? This is the difference between "I have no concept
+        # of X" and "I am aware that X exists but I don't know anything about it".
+        # For now, I'm electing to keep the entry.
+        #
+        # Similarly, if the relationship is empty, we could make the argument
+        # for removing it --- at the expense of now saying that we have no
+        # concept of this relationship.
+        try:
+            self.__kb__[relationship]['subjects'][subject].remove(obj)
+            self.__kb__[relationship]['objects'][obj].remove(subject)
+            self.__facts__.remove(fact)
+        except KeyError:
+            # Being told to forget something about something you don't know
+            # isn't an error.
+            pass
+        pass
+
+    def ask(self, fact):
+        relationship, subject, obj = fact
+        
+        # A future milestone will remove this requirement to support free
+        # variables.
+        assert(relationship)
+        
+        if relationship and subject and obj:
+            if fact in self.__facts__:
+                return [fact,]
+            return []
+
+        if relationship and subject:
+            return [(relationship, subject, _obj) for _obj
+                    in self.__kb__[relationship]['subjects'][subject]]
+
+        if relationship and obj:
+            return [(relationship, _subject, obj) for _subject
+                    in self.__kb__[relationship]['objects'][obj]]
+
+    def facts(self):
+        return list(self.__facts__)
+        
+    def is_consistent(self):
+        try:
+            for fact in self.__facts__:
+                relationship, subject, obj = fact
+                if obj not in self.__kb__[relationship]['subjects'][subject]:
+                    raise Inconsistency(fact)
+                if subject not in self.__kb__[relationship]['objects'][obj]:
+                    raise Inconsistency(fact)
+
+            for relationship, v in self.__kb__.items():
+                for subject in v['subjects'].keys():
+                    for obj in v['subjects'][subject]:
+                        if (relationship, subject, obj) not in self.__facts__:
+                            raise Inconsistency(fact)
+
+                for obj in v['objects'].keys():
+                    for subject in v['objects'][obj]:
+                        if (relationship, subject, obj) not in self.__facts__:
+                            raise Inconsistency(fact)
+        except KeyError:
+            raise Inconsistency(fact)
+
+        return True
+
+    def __len__(self):
+        return len(self.__facts__)
+
+    def subst(self, fact, subject, obj):
+        relationship, _subject, _obj = fact
+        if _subject is None:
+            _subject = subject
+        if _subject == '?any':
+            _subject = None
+        elif _subject == '?subject':
+            _subject = subject
+        elif _subject == '?object':
+            _subject = obj
+
+        if _obj is None:
+            _obj = obj
+        if _obj == '?any':
+            _obj = None
+        elif _obj == '?subject':
+            _obj = subject
+        elif _obj == '?object':
+            _obj = obj
+
+        if _subject == '?current':
+            possibilities = self.ask((relationship, None, _obj))
+            assert(len(possibilities) == 1)
+            _, _subject, _ = possibilities[0]
+        elif _obj == '?current':
+            possibilities = self.ask((relationship, subject, None))
+            assert(len(possibilities) == 1)
+            _, _, _obj = possibilities[0]
+
+        return (relationship, _subject, _obj)
+
+
+def from_facts(facts):
+    kb = KnowledgeBase()
+    for fact in facts:
+        kb.tell(fact)
+    return kb
--- a/sample.py
+++ b/sample.py
@@ -0,0 +1,47 @@
+import base64
+import itertools
+import json
+import kb
+import pickle
+import random
+
+FACTS = """
+gANdcQAoWAIAAABpc3EBWAgAAABhaXJsaW5lcnECWAUAAABGbHllcnEDh3EEaAFYBwAAAG9ha2xh
+bmRxBVgHAAAAQWlycG9ydHEGh3EHaAFoBVgEAAAAQ2l0eXEIh3EJaAFYBgAAAGRlbnZlcnEKaAaH
+cQtoAWgKaAiHcQxoAVgGAAAAY2JyNjAwcQ1YBgAAAERyaXZlcnEOh3EPaAFYBwAAAHRyb29wZXJx
+EGgOh3ERWAIAAABhdHESaAJoCodxE2gSaA1oBYdxFGUu
+"""
+
+def load():
+    facts = base64.decodebytes(FACTS.encode('ascii'))
+    facts = pickle.loads(facts)
+    skb = kb.KnowledgeBase()
+    for fact in facts:
+        skb.tell(fact)
+        
+    return skb
+
+def load_facts(corpus_path='data/corpus.json', is_count=1000000):
+    facts = set()
+    corpus = json.loads(open(corpus_path).read())
+    if 'nouns' in corpus and 'adjectives' in corpus:
+        perms = list(itertools.product(corpus['nouns'],
+            corpus['adjectives']))
+        if len(perms) < is_count:
+            is_count = len(perms)-1;
+        pool = random.choices(perms, k=is_count)
+        for noun, adjective in pool:
+            facts.add(('is', noun, adjective))
+
+    if 'cities' in corpus:
+        for city in corpus['cities']:
+            facts.add(('is', city, 'City'))
+
+    return facts
+
+def generate_tail_number():
+    letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+    tailno = 'N' + str(random.randint(10, 99))
+    tailno += random.choice(letters)
+    tailno += random.choice(letters)
+    return tailno
--- a/test_actions.py
+++ b/test_actions.py
@@ -0,0 +1,67 @@
+import actions
+import kb
+import unittest
+
+INITIAL_FACTS = [
+    ('is', 'N29EO', 'Plane'),
+    ('at', 'N29EO', 'dia'),
+    ('is', 'N10IV', 'Plane'),
+    ('at', 'N10IV', 'oak'),
+    ('is', 'N33FR', 'Plane'),
+    ('at', 'N33FR', 'lga'),
+    ('is', '1Z12345E0205271688', 'Package'),
+    ('at', '1Z12345E0205271688', 'dia'),
+    ('is', '1Z12345E6605272234', 'Package'),
+    ('at', '1Z12345E6605272234', 'dia'),
+    ('is', '1Z12345E0305271640', 'Package'),
+    ('at', '1Z12345E0305271640', 'oak'),
+    ('is', '1Z12345E1305277940', 'Package'),
+    ('at', '1Z12345E1305277940', 'lga'),
+    ('is', '1Z12345E6205277936', 'Package'),
+    ('at', '1Z12345E6205277936', 'lga'),
+    ('is', 'dia', 'Airport'),
+    ('is', 'lga', 'Airport'),
+    ('is', 'oak', 'Airport'),
+]
+
+FLY_POS_PRECONDS = [
+    ('is', '?subject', 'Plane'),
+    ('is', '?object', 'Airport'),
+]
+
+FLY_NEG_PRECONDS = [
+    ('at', '?subject', '?object'),
+]
+
+FLY_RETRACTIONS = [
+    ('at', '?subject', '?current'), 
+]
+
+FLY_UPDATES = [
+    ('at', '?subject', '?object'),
+]
+
+fly = actions.Action(FLY_POS_PRECONDS, FLY_NEG_PRECONDS,
+                     FLY_RETRACTIONS, FLY_UPDATES)
+
+class ActionTestSuite(unittest.TestCase):
+
+    def setUp(self):
+        self.kb = kb.from_facts(INITIAL_FACTS)
+
+    def test_a_flight(self):
+        self.assertTrue(self.kb.ask(('at', 'N10IV', 'oak')))
+        self.assertFalse(self.kb.ask(('at', 'N10IV', 'lga')))
+
+        shadow = fly.perform(self.kb, 'N10IV', 'lga')
+        self.assertTrue(shadow)
+
+        # Shadow should reflect the updates and retractions.
+        self.assertTrue(shadow.ask(('at', 'N10IV', 'lga')))
+        self.assertFalse(shadow.ask(('at', 'N10IV', 'oak')))
+
+        # The original shouldn't be touched.
+        self.assertTrue(self.kb.ask(('at', 'N10IV', 'oak')))
+        self.assertFalse(self.kb.ask(('at', 'N10IV', 'lga')))
+
+
--- a/test_kb.py
+++ b/test_kb.py
@@ -0,0 +1,59 @@
+import copy
+import kb
+import random
+import sample
+import unittest
+
+
+class KnowledgeBaseTestSuite(unittest.TestCase):
+    
+    def setUp(self):
+        self.kb = sample.load()
+        
+    def test_a_sanity_check(self):
+        assert(self.kb.is_consistent())
+        for fact in self.kb.__facts__:
+            self.assertTrue(self.kb.ask(fact))
+        
+    def test_tell(self):
+        new_fact = ('is', 'berkeley', 'City')
+        
+        # make sure it's not something we already know
+        self.assertFalse(self.kb.ask(new_fact))
+        self.kb.tell(new_fact)
+        answer = self.kb.ask(new_fact)
+        self.assertListEqual(answer, [new_fact,])
+        
+    def test_inconsistency(self):
+        badkb = copy.deepcopy(self.kb)
+        badfact = random.choice(badkb.facts())
+        relationship, subject, obj = badfact
+        
+        # muck with subjects part
+        badkb.__kb__[relationship]['subjects'][subject].remove(obj)
+        with self.assertRaises(kb.Inconsistency):
+            badkb.is_consistent()
+        
+        # muck with objects part
+        badkb = copy.deepcopy(self.kb)
+        badkb.__kb__[relationship]['objects'][obj].remove(subject)
+        with self.assertRaises(kb.Inconsistency):
+            badkb.is_consistent()
+        
+        # muck with facts part
+        badkb = copy.deepcopy(self.kb)
+        badkb.__facts__.remove(badfact)
+        with self.assertRaises(kb.Inconsistency):
+            badkb.is_consistent()
+            
+        # inject false data into the subject
+        badkb = copy.deepcopy(self.kb)
+        badkb.__kb__[relationship]['subjects'][subject].add('false memory')
+        with self.assertRaises(kb.Inconsistency):
+            badkb.is_consistent()
+
+        # inject false data into the object
+        badkb = copy.deepcopy(self.kb)
+        badkb.__kb__[relationship]['objects'][obj].add('false memory')
+        with self.assertRaises(kb.Inconsistency):
+            badkb.is_consistent()
--- a/util.py
+++ b/util.py
@@ -0,0 +1,8 @@
+import random
+
+def generate_tail_number():
+    letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+    tailno = 'N' + random.randint(10, 99) 
+    tailno += random.choice(letters)
+    tailno += random.choice(letters)
+    return tailno