# QA Retrieval with PGVector

In [1]:
!pip install langchain openai python-dotenv datasets pgvector psycopg2-binary



In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
from datasets import load_dataset

data = load_dataset("wikipedia", "20220301.simple", split='train[:10]')

  from .autonotebook import tqdm as notebook_tqdm
Found cached dataset wikipedia (/Users/edd/.cache/huggingface/datasets/wikipedia/20220301.simple/2.0.0/aa542ed919df55cc5d3347f42dd4521d05ca68751f50dbc32bae2a7f1e167559)


In [4]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=400,
    chunk_overlap=20,
)

In [5]:
text_splitter.split_text(data[6]['text'])[:3]

['Alan Mathison Turing OBE FRS (London, 23 June 1912 – Wilmslow, Cheshire, 7 June 1954) was an English mathematician and computer scientist. He was born in Maida Vale, London.\n\nEarly life and family \nAlan Turing was born in Maida Vale, London on 23 June 1912. His father was part of a family of merchants from Scotland. His mother, Ethel Sara, was the daughter of an engineer.',
 'Education \nTuring went to St. Michael\'s, a school at 20 Charles Road, St Leonards-on-sea, when he was five years old.\n"This is only a foretaste of what is to come, and only the shadow of what is going to be.” – Alan Turing.',
 'The Stoney family were once prominent landlords, here in North Tipperary. His mother Ethel Sara Stoney (1881–1976) was daughter of Edward Waller Stoney (Borrisokane, North Tipperary) and Sarah Crawford (Cartron Abbey, Co. Longford); Protestant Anglo-Irish gentry.']

In [6]:
query = "Where was Alaning Turing born and what is an Engima machine?"

In [7]:
from langchain.docstore.document import Document

documents = \
    [Document(page_content=chunk_text, metadata={"id": record['id'] + str(chunk_idx), "source": record['url']})
     for record in data
     for (chunk_idx, chunk_text) in enumerate(text_splitter.split_text(record['text']))]

Start a local Postgres instance with the PGVector extension available

```bash
docker run --rm -it --name vector-store -p 5432:5432 -e POSTGRES_USER=postgres -e POSTGRES_PASSWORD=postgres -e POSTGRES_DB=postgres ankane/pgvector -c log_statement=all
```

In [8]:
from langchain.vectorstores.pgvector import PGVector
import sqlalchemy

connection_string = PGVector.connection_string_from_db_params(
    driver="psycopg2",
    host="0.0.0.0",
    port=5432,
    database="postgres",
    user="postgres",
    password="postgres",
)

engine = sqlalchemy.create_engine(connection_string) 
with engine.connect() as conn:
    conn.execute(sqlalchemy.sql.text('CREATE EXTENSION IF NOT EXISTS vector; COMMIT;'))

In [9]:
from langchain.embeddings.openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model='text-embedding-ada-002')

store = PGVector.from_documents(
    embedding=embeddings,
    documents=documents,
    collection_name="wikipedia",
    connection_string=connection_string,
)

In [10]:
store.similarity_search(query)

[Document(page_content='Alan Mathison Turing OBE FRS (London, 23 June 1912 – Wilmslow, Cheshire, 7 June 1954) was an English mathematician and computer scientist. He was born in Maida Vale, London.\n\nEarly life and family \nAlan Turing was born in Maida Vale, London on 23 June 1912. His father was part of a family of merchants from Scotland. His mother, Ethel Sara, was the daughter of an engineer.', metadata={'id': '130', 'source': 'https://simple.wikipedia.org/wiki/Alan%20Turing'}),
 Document(page_content='Educated in Dublin at Alexandra School and College; on October 1st 1907 she married Julius Mathison Turing, latter son of Reverend John Robert Turing and Fanny Boyd, in Dublin. Born on June 23rd 1912, Alan Turing would go on to be regarded as one of the greatest figures of the twentieth century.', metadata={'id': '133', 'source': 'https://simple.wikipedia.org/wiki/Alan%20Turing'}),
 Document(page_content='A brilliant mathematician and cryptographer Alan was to become the founder of

In [11]:
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(
    model_name='gpt-3.5-turbo',
    temperature=0
)

## Stuff

In [12]:
from langchain.chains import RetrievalQA

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=store.as_retriever()
)

qa.run(query)

'Alan Turing was born in Maida Vale, London. \n\nAn Enigma machine was a cipher machine used by the Nazi German military during World War II to encrypt and decrypt secret messages. It was considered highly secure at the time and was used to protect sensitive communications. Turing played a crucial role in breaking the Enigma code, which greatly aided the Allied forces in their efforts during the war.'

In [13]:
from langchain.chains import RetrievalQAWithSourcesChain

qa_with_sources = RetrievalQAWithSourcesChain.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=store.as_retriever()
)

qa_with_sources(query)

{'question': 'Where was Alaning Turing born and what is an Engima machine?',
 'answer': 'Alan Turing was born in Maida Vale, London. An Enigma machine was a device designed by Turing at Bletchley Park to break secret Enigma encrypted messages used by the Nazi German war machine during World War 2.\n',
 'sources': '\n- https://simple.wikipedia.org/wiki/Alan%20Turing'}

## Map-Reduce

In [14]:
from langchain.chains.question_answering import load_qa_chain

qa = RetrievalQA(
    combine_documents_chain=load_qa_chain(llm, chain_type="map_reduce"), 
    retriever=store.as_retriever()
)

qa.run(query)

'The given portion of the document does not provide any information about where Alan Turing was born or what an Enigma machine is.'

## Conversational

In [16]:
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

qa = ConversationalRetrievalChain.from_llm(llm, store.as_retriever(), memory=memory)

qa({"question": query})

{'question': 'Where was Alaning Turing born and what is an Engima machine?',
 'chat_history': [HumanMessage(content='Where was Alaning Turing born and what is an Engima machine?', additional_kwargs={}, example=False),
  AIMessage(content='Alan Turing was born in Maida Vale, London. \n\nAn Enigma machine was a cipher machine used by the Nazi German military during World War II to encrypt and decrypt secret messages. It was considered highly secure at the time and was used to protect sensitive communications. Turing played a crucial role in breaking the Enigma code, which greatly aided the Allied forces in their efforts during the war.', additional_kwargs={}, example=False)],
 'answer': 'Alan Turing was born in Maida Vale, London. \n\nAn Enigma machine was a cipher machine used by the Nazi German military during World War II to encrypt and decrypt secret messages. It was considered highly secure at the time and was used to protect sensitive communications. Turing played a crucial role in

In [17]:
qa = ConversationalRetrievalChain.from_llm(llm, store.as_retriever(), return_source_documents=True)

qa({"question": query, "chat_history": []})

{'question': 'Where was Alaning Turing born and what is an Engima machine?',
 'chat_history': [],
 'answer': 'Alan Turing was born in Maida Vale, London. \n\nAn Enigma machine was a cipher machine used by the Nazi German military during World War II to encrypt and decrypt secret messages. It was considered highly secure at the time and was used to protect sensitive communications. Turing played a crucial role in breaking the Enigma code, which greatly aided the Allied forces in their efforts during the war.',
 'source_documents': [Document(page_content='Alan Mathison Turing OBE FRS (London, 23 June 1912 – Wilmslow, Cheshire, 7 June 1954) was an English mathematician and computer scientist. He was born in Maida Vale, London.\n\nEarly life and family \nAlan Turing was born in Maida Vale, London on 23 June 1912. His father was part of a family of merchants from Scotland. His mother, Ethel Sara, was the daughter of an engineer.', metadata={'id': '130', 'source': 'https://simple.wikipedia.o