Merge branch 'main' into fix/make-convert-token-to-string-pickleable

saattrupdan · web-flow · commit fb73f5c0c91f · 2024-11-28T07:22:18.000+01:00
diff --git a/README.md b/README.md
@@ -300,6 +300,33 @@ print(add(**result))
 
 A great advantage of passing functions directly to specify the structure is that the structure of the LLM will change with the function's definition. No need to change the code at several places!
 
+You can also embed various functions into an enum to generate params:
+
+```python
+from enum import Enum
+from functools import partial
+
+import outlines
+
+
+def add(a: int, b: int) -> int:
+    return a + b
+
+def mul(c: float, d: float) -> float:
+    return c * d
+
+class Operation(Enum):
+    add = partial(add)
+    mul = partial(mul)
+
+model = outlines.models.transformers("WizardLM/WizardMath-7B-V1.1")
+generator = outlines.generate.json(model, add)
+result = generator("Return json with two float named c and d respectively. c is negative and d greater than 1.0.")
+
+print(result)
+# {'c': -3.14, 'd': 1.5}
+```
+
 ## Prompting
 
 Building prompts can get messy. **Outlines** makes it easier to write and manage
diff --git a/benchmarks/bench_processors.py b/benchmarks/bench_processors.py
@@ -9,6 +9,12 @@
 except ImportError:
     pass
 
+try:
+    import jax
+    import jax.numpy as jnp
+except ImportError:
+    pass
+
 
 def is_mlx_lm_allowed():
     try:
@@ -18,6 +24,14 @@ def is_mlx_lm_allowed():
     return mx.metal.is_available()
 
 
+def is_jax_allowed():
+    try:
+        import jax  # noqa: F401
+    except ImportError:
+        return False
+    return True
+
+
 def get_mock_processor_inputs(array_library, num_tokens=30000):
     """
     logits: (4, 30,000 ) dtype=float
@@ -43,6 +57,13 @@ def get_mock_processor_inputs(array_library, num_tokens=30000):
         input_ids = mx.random.randint(
             low=0, high=num_tokens, shape=(4, 2048), dtype=mx.int32
         )
+    elif array_library == "jax":
+        logits = jnp.random.uniform(
+            key=jax.random.PRNGKey(0), shape=(4, num_tokens), dtype=jnp.float32
+        )
+        input_ids = jnp.random.randint(
+            key=jax.random.PRNGKey(0), low=0, high=num_tokens, shape=(4, 2048)
+        )
     else:
         raise ValueError
 
@@ -67,6 +88,8 @@ class LogitsProcessorPassthroughBenchmark:
         params += ["mlx"]
     if torch.cuda.is_available():
         params += ["torch_cuda"]
+    if is_jax_allowed():
+        params += ["jax"]
 
     def setup(self, array_library):
         self.logits_processor = HalvingLogitsProcessor()
diff --git a/docs/cookbook/extract_event_details.md b/docs/cookbook/extract_event_details.md
@@ -0,0 +1,34 @@
+This recipe demonstrates how to use the `outlines` library to extract structured event details from a text message.
+We will extract the title, location, and start date and time from messages like the following:
+
+```plaintext
+Hello Kitty, my grandmother will be here, I think it's better to postpone
+our appointment to review math lessons to next Monday at 2pm at the same
+place, 3 avenue des tanneurs, one hour will be enough see you 😘
+```
+
+Let see how to extract the event details from the message with the MLX
+library dedicated to  Apple Silicon processor  (M series).
+
+```python
+--8<-- "docs/cookbook/extract_event_details.py"
+```
+
+The output will be:
+
+```plaintext
+Today: Saturday 16 November 2024 and it's 10:55
+```
+
+and the extracted event information will be:
+
+```json
+{
+  "title":"Math Review",
+  "location":"3 avenue des tanneurs",
+  "start":"2024-11-22T14:00:00Z"
+}
+```
+
+
+To find out more about this use case, we recommend the project developped by [Joseph Rudoler](https://x.com/JRudoler) the [ICS Generator](https://github.com/jrudoler/ics-generator)
diff --git a/docs/cookbook/extract_event_details.py b/docs/cookbook/extract_event_details.py
@@ -0,0 +1,46 @@
+from datetime import datetime
+
+from pydantic import BaseModel, Field
+
+from outlines import generate, models
+
+# Load the model
+model = models.mlxlm("mlx-community/Hermes-3-Llama-3.1-8B-8bit")
+
+
+# Define the event schema using Pydantic
+class Event(BaseModel):
+    title: str = Field(description="title of the event")
+    location: str
+    start: datetime = Field(
+        default=None, description="date of the event if available in iso format"
+    )
+
+
+# Get the current date and time
+now = datetime.now().strftime("%A %d %B %Y and it's %H:%M")
+
+# Define the prompt
+prompt = f"""
+Today's date and time are {now}
+Given a user message, extract information of the event like date and time in iso format, location and title.
+If the given date is relative, think step by step to find the right date.
+Here is the message:
+"""
+
+# Sample message
+message = """Hello Kitty, my grandmother will be here , I think it's better to postpone our
+appointment to review math lessons to next Friday at 2pm at the same place, 3 avenue des tanneurs, I think that one hour will be enough
+see you 😘 """
+
+# Create the generator
+generator = generate.json(model, Event)
+
+# Extract the event information
+event = generator(prompt + message)
+
+# Print the current date and time
+print(f"Today: {now}")
+
+# Print the extracted event information in JSON format
+print(event.json())
diff --git a/examples/beam-cloud/README.md b/examples/beam-cloud/README.md
@@ -0,0 +1,5 @@
+## Deploy Outlines on Beam
+
+1. Create an account [here](https://beam.cloud) and install the Beam SDK
+2. Download the `app.py` file to your computer
+3. Deploy it as a serverless API by running: `beam deploy app.py:predict`
diff --git a/examples/beam-cloud/app.py b/examples/beam-cloud/app.py
@@ -0,0 +1,39 @@
+from beam import Image, endpoint, env
+
+if env.is_remote():
+    import outlines
+
+
+# Pre-load models when the container first starts
+def load_models():
+    import outlines
+
+    model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
+    return model
+
+
+@endpoint(
+    name="outlines-serverless",
+    gpu="A10G",
+    cpu=1,
+    memory="16Gi",
+    on_start=load_models,
+    image=Image().add_python_packages(
+        ["outlines", "torch", "transformers", "accelerate"]
+    ),
+)
+def predict(context, **inputs):
+    default_prompt = """You are a sentiment-labelling assistant.
+    Is the following review positive or negative?
+
+    Review: This restaurant is just awesome!
+    """
+
+    prompt = inputs.get("prompt", default_prompt)
+
+    # Unpack cached model from context
+    model = context.on_start_value
+    # Inference
+    generator = outlines.generate.choice(model, ["Positive", "Negative"])
+    answer = generator(prompt)
+    return {"answer": answer}
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -74,6 +74,7 @@ markdown_extensions:
   - pymdownx.emoji:
       emoji_index: !!python/name:material.extensions.emoji.twemoji
       emoji_generator: !!python/name:material.extensions.emoji.to_svg
+  - pymdownx.snippets:
 
 
 extra_css:
@@ -123,6 +124,7 @@ nav:
       - Structured Generation from PDFs: cookbook/read-pdfs.md
       - Earnings reports to CSV: cookbook/earnings-reports.md
       - Digitizing receipts with vision models: cookbook/receipt-digitization.md
+      - Extract events details from text: cookbook/extract_event_details.md
       - Run on the cloud:
           - BentoML: cookbook/deploy-using-bentoml.md
           - Cerebrium: cookbook/deploy-using-cerebrium.md
diff --git a/outlines/base.py b/outlines/base.py
@@ -5,12 +5,23 @@
 from typing import Callable, Optional
 
 import numpy as np
-from numpy.lib.function_base import (
-    _calculate_shapes,
-    _parse_gufunc_signature,
-    _parse_input_dimensions,
-    _update_dim_sizes,
-)
+
+# Import required functions based on NumPy version
+np_major_version = int(np.__version__.split(".")[0])
+if np_major_version >= 2:
+    from numpy.lib._function_base_impl import (
+        _calculate_shapes,
+        _parse_gufunc_signature,
+        _parse_input_dimensions,
+        _update_dim_sizes,
+    )
+else:
+    from numpy.lib.function_base import (
+        _calculate_shapes,
+        _parse_gufunc_signature,
+        _parse_input_dimensions,
+        _update_dim_sizes,
+    )
 
 # Allow nested loops for running in notebook. We don't enable it globally as it
 # may interfere with other libraries that use asyncio.
diff --git a/outlines/fsm/json_schema.py b/outlines/fsm/json_schema.py
@@ -2,6 +2,7 @@
 import json
 import re
 import warnings
+from enum import Enum
 from typing import Callable, Optional, Tuple, Type, Union
 
 from jsonschema.protocols import Validator
@@ -306,6 +307,8 @@ def to_regex(
         for choice in instance["enum"]:
             if type(choice) in [int, float, bool, type(None), str]:
                 choices.append(re.escape(json.dumps(choice)))
+            elif isinstance(choice, dict):
+                choices.append(to_regex(resolver, choice, whitespace_pattern))
             else:
                 raise TypeError(f"Unsupported data type in enum: {type(choice)}")
         return f"({'|'.join(choices)})"
@@ -524,7 +527,7 @@ def to_regex(
     )
 
 
-def get_schema_from_signature(fn: Callable) -> str:
+def get_schema_from_signature(fn: Callable) -> dict:
     """Turn a function signature into a JSON schema.
 
     Every JSON object valid to the output JSON Schema can be passed
@@ -550,3 +553,16 @@ def get_schema_from_signature(fn: Callable) -> str:
     model = create_model(fn_name, **arguments)
 
     return model.model_json_schema()
+
+
+def get_schema_from_enum(myenum: type[Enum]) -> dict:
+    if len(myenum) == 0:
+        raise ValueError(
+            f"Your enum class {myenum.__name__} has 0 members. If you are working with an enum of functions, do not forget to register them as callable (using `partial` for instance)"
+        )
+    choices = [
+        get_schema_from_signature(elt.value.func) if callable(elt.value) else elt.value
+        for elt in myenum
+    ]
+    schema = {"title": myenum.__name__, "enum": choices}
+    return schema
diff --git a/outlines/generate/json.py b/outlines/generate/json.py
@@ -1,10 +1,15 @@
 import json as pyjson
+from enum import Enum
 from functools import singledispatch
 from typing import Callable, Optional, Union
 
 from pydantic import BaseModel
 
-from outlines.fsm.json_schema import build_regex_from_schema, get_schema_from_signature
+from outlines.fsm.json_schema import (
+    build_regex_from_schema,
+    get_schema_from_enum,
+    get_schema_from_signature,
+)
 from outlines.generate.api import SequenceGeneratorAdapter
 from outlines.models import OpenAI
 from outlines.samplers import Sampler, multinomial
@@ -48,6 +53,11 @@ def json(
         regex_str = build_regex_from_schema(schema, whitespace_pattern)
         generator = regex(model, regex_str, sampler)
         generator.format_sequence = lambda x: schema_object.parse_raw(x)
+    elif isinstance(schema_object, type(Enum)):
+        schema = pyjson.dumps(get_schema_from_enum(schema_object))
+        regex_str = build_regex_from_schema(schema, whitespace_pattern)
+        generator = regex(model, regex_str, sampler)
+        generator.format_sequence = lambda x: pyjson.loads(x)
     elif callable(schema_object):
         schema = pyjson.dumps(get_schema_from_signature(schema_object))
         regex_str = build_regex_from_schema(schema, whitespace_pattern)
diff --git a/outlines/models/__init__.py b/outlines/models/__init__.py
@@ -16,4 +16,4 @@
 from .transformers_vision import TransformersVision, transformers_vision
 from .vllm import VLLM, vllm
 
-LogitsGenerator = Union[Transformers, LlamaCpp, ExLlamaV2Model, MLXLM, VLLM]
+LogitsGenerator = Union[Transformers, LlamaCpp, OpenAI, ExLlamaV2Model, MLXLM, VLLM]
diff --git a/outlines/models/mlxlm.py b/outlines/models/mlxlm.py
@@ -167,12 +167,7 @@ def sample(logits: "mx.array") -> Tuple["mx.array", float]:
             prob = softmax_logits[0, token]
             return token, prob
 
-        kv_heads = (
-            [self.model.n_kv_heads] * len(self.model.layers)
-            if isinstance(self.model.n_kv_heads, int)
-            else self.model.n_kv_heads
-        )
-        cache = [mlx_lm.models.base.KVCache(self.model.head_dim, n) for n in kv_heads]
+        cache = mlx_lm.models.cache.make_prompt_cache(self.model)
 
         # kv cache contains processed input IDs, we pass the unprocessed inputs and cache to model()
         unprocessed_input_ids = prompt
diff --git a/outlines/processors/base_logits_processor.py b/outlines/processors/base_logits_processor.py
@@ -20,6 +20,16 @@ def is_mlx_array_type(array_type):
     return issubclass(array_type, mx.array)
 
 
+def is_jax_array_type(array_type):
+    try:
+        import jaxlib
+    except ImportError:
+        return False
+    return issubclass(array_type, jaxlib.xla_extension.ArrayImpl) or isinstance(
+        array_type, jaxlib.xla_extension.ArrayImpl
+    )
+
+
 class OutlinesLogitsProcessor(Protocol):
     """
     Base class for logits processors which normalizes types of logits:
@@ -101,6 +111,12 @@ def _to_torch(tensor_like: Array) -> torch.Tensor:
             # https://ml-explore.github.io/mlx/build/html/usage/numpy.html
             return torch.from_dlpack(tensor_like)
 
+        elif is_jax_array_type(type(tensor_like)):
+            import jax
+
+            torch_tensor = torch.from_dlpack(jax.dlpack.to_dlpack(tensor_like))
+            return torch_tensor
+
         else:
             raise TypeError(
                 "LogitsProcessor must be called with either np.NDArray, "
@@ -129,6 +145,11 @@ def _from_torch(tensor: torch.Tensor, target_type: Type) -> Array:
             # numpy doesn't support bfloat16, mlx doesn't support direct conversion from torch
             return mx.array(tensor.float().numpy())
 
+        elif is_jax_array_type(target_type):
+            import jax
+
+            return jax.dlpack.from_dlpack(tensor)
+
         else:
             raise TypeError(
                 f"Failed to convert torch tensors to target_type `{target_type}`"
diff --git a/pyproject.toml b/pyproject.toml
diff --git a/tests/fsm/test_json_schema.py b/tests/fsm/test_json_schema.py
diff --git a/tests/generate/test_integration_transformers.py b/tests/generate/test_integration_transformers.py
diff --git a/tests/models/test_mlxlm.py b/tests/models/test_mlxlm.py
diff --git a/tests/processors/test_base_processor.py b/tests/processors/test_base_processor.py