Skip to content
Image to Text with Gemini Pro Vision Model in Python Notebook
  • AI Chat
  • Code
  • Report
  • !pip install -q -U google-generativeai
    import pathlib
    import textwrap
    
    import google.generativeai as genai
    
    from IPython.display import display
    from IPython.display import Markdown
    
    
    def to_markdown(text):
      text = text.replace('•', '  *')
      return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))
    import os
    
    gemini_api_key = os.environ["GEMINI_API_KEY"]
    genai.configure(api_key=gemini_api_key)
    #list of available models
    
    for m in genai.list_models():
      if 'generateContent' in m.supported_generation_methods:
        print(m.name)
    model = genai.GenerativeModel('gemini-pro-vision')
    import PIL.Image
    
    img = PIL.Image.open('invoice_test.jpg')
    img
    
    
    response = model.generate_content(img)
    
    to_markdown(response.text)