diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 5978f85345c7e5739af03e45a80a2936fcbbd447..6c960fc5114bc9e2a7f398415852fdb0ca2d4f18 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -12,28 +12,15 @@ before_script: nix-store --import < ./cache.nar || true fi - nix-env -iA git -f '<nixpkgs>' - nix-env -iA cachix -f https://github.com/NixOS/nixpkgs/tarball/db557aab7b690f5e0e3348459f2e4dc8fd0d9298 - - mkdir -p $HOME/.config/nix - cat > $HOME/.config/nix/nix.conf <<EOF - substituters = https://cache.nixos.org https://trec-car-tools.cachix.org - trusted-public-keys = cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY= trec-car-tools.cachix.org-1:zlHkXRIQit3P6UdWD/tC7rHI4OmorVE/e2d9HcE80dQ= - EOF - - mkdir -p $HOME/.config/cachix - cat > $HOME/.config/cachix/config.dhall <<EOF - { authToken = $CACHIX_AUTH_TOKEN - , binaryCaches = - [ { name = "trec-car-tools" - , secretKey = $CACHIX_SECRET_KEY - } - ] : List { name : Text, secretKey : Text } - } - EOF + - nix-env -iA git -f '<nixpkgs>' + - nix-env -iA cachix -f '<nixpkgs>' + - cachix use trec-car-tools + - cachix authtoken $CACHIX_AUTH_TOKEN + - nix-build '<nixpkgs>' -A hello | cachix push trec-car-tools validate: - image: nixos/nix + # Use 2.3.12 due to https://github.com/NixOS/nix/issues/5797 + image: nixos/nix:2.3.12 script: - nix-build . -A env --cores 1 -j $THREADS | cachix push trec-car-tools - nix-store --export $(nix-store -qR ./result) > ./cache.nar diff --git a/cabal.project b/cabal.project index d1addb9430e8a20d047b9b6b8cf3982f1552b6c8..38dee44ad00023401aec860917a372cf1bc2d18b 100644 --- a/cabal.project +++ b/cabal.project @@ -1,7 +1,9 @@ allow-newer: - base, transformers, ghc-boot, template-haskell, time, + base, transformers, ghc-boot, template-haskell, time, bytestring, + lens, pipes, aeson + packages: simplir/*.cabal simplir-*/*.cabal diff --git a/default.nix b/default.nix index d15e19064175d35f03689fdbc03ab275d44e9041..d71b7bf7536b7de52a4600a80e88d9d1eb4bf199 100644 --- a/default.nix +++ b/default.nix @@ -8,7 +8,7 @@ let localDir = nixpkgs.nix-gitignore.gitignoreSourcePure [ ./.gitignore ]; - trec-eval = nixpkgs.enableDebugging (nixpkgs.callPackage ./trec-eval.nix {}); + trec-eval = nixpkgs.callPackage ./trec-eval.nix {}; all-cabal-hashes = let @@ -60,6 +60,7 @@ let lzma = dontCheck super.lzma; ListLike = doJailbreak super.ListLike; text-icu = dontCheck super.text-icu; + streaming-commons = dontCheck super.streaming-commons; pipes-zlib = doJailbreak super.pipes-zlib; pipes-text = doJailbreak (super.callHackage "pipes-text" "0.0.2.5" {}); pipes-bzip = dontCheck (doJailbreak (super.callHackage "pipes-bzip" "0.2.0.4" { bzlib = null; })); @@ -67,17 +68,12 @@ let pipes-interleave = doJailbreak (super.callHackage "pipes-interleave" "1.1.3" {}); html-parse = self.callCabal2nix "html-parse" ./vendor/html-parse {}; b-tree = doJailbreak (self.callHackage "b-tree" "0.1.4" {}); - log-domain = self.callCabal2nix "log-domain" (fetchFromGitHub { - owner = "ekmett"; - repo = "log-domain"; - rev = "f0b5e8528965ba1cf8a2f47ea8b2750285914b6d"; - sha256 = "0d46bkymf8sz01cq4pizrs5dn0xn5yd3chqgczbad4yaqjridjl7"; - }) {}; - warc = self.callCabal2nix "warc" (fetchFromGitHub { - owner = "bgamari"; + warc = self.callCabal2nix "warc" (nixpkgs.fetchFromGitLab { + domain = "git.smart-cactus.org"; + owner = "ben"; repo = "warc"; - rev = "76ce71f4e5e0bb51ea22c1210215ec194e33b442"; - sha256 = "1p3zmyhrrj44bj9l5zrcw34bzkqg12g94nbb4q5d9l8dirg837j6"; + rev = "3c052804e4f17bf76f2d24c565e7ab975b0ad7e1"; + sha256 = "sha256:027b3zjvrav7qbjgn40xkdacv62745y8n0krwkwdkhqs577rznfs"; }) {}; pinch = self.callCabal2nix "pinch" ./vendor/pinch {}; @@ -85,7 +81,7 @@ let }; in otherOverrides // simplirPackages // { simplirPackages = simplirPackages; }; - ghcVersion = "ghc883"; + ghcVersion = "ghc8107"; haskellPackages = nixpkgs.haskell.packages."${ghcVersion}".override { inherit all-cabal-hashes; overrides = haskellOverrides; diff --git a/graph-algorithms/graph-algorithms.cabal b/graph-algorithms/graph-algorithms.cabal index 4ec44fbe42e2dae1af479c18fd9f5803b554143d..c6640230c18d1236ba20791a24da9df59f3bdb4f 100644 --- a/graph-algorithms/graph-algorithms.cabal +++ b/graph-algorithms/graph-algorithms.cabal @@ -24,11 +24,11 @@ library MultiWayIf, GeneralizedNewtypeDeriving, BangPatterns, GADTs, DeriveFunctor - build-depends: base >=4.10 && <4.15, - hashable >=1.2 && <1.4, + build-depends: base >=4.10 && <4.17, + hashable >=1.2 && <1.5, unordered-containers >=0.2 && <0.3, containers >=0.5 && <0.7, - transformers >=0.5 && <0.6, + transformers >=0.5 && <0.7, vector >=0.12 && <0.13, vector-th-unbox, indexed-vector, diff --git a/nix/sources.json b/nix/sources.json new file mode 100644 index 0000000000000000000000000000000000000000..67c3ff36a6d44c764ead4f8ad73f34f69bd7a1ac --- /dev/null +++ b/nix/sources.json @@ -0,0 +1,26 @@ +{ + "niv": { + "branch": "master", + "description": "Easy dependency management for Nix projects", + "homepage": "https://github.com/nmattia/niv", + "owner": "nmattia", + "repo": "niv", + "rev": "5830a4dd348d77e39a0f3c4c762ff2663b602d4c", + "sha256": "1d3lsrqvci4qz2hwjrcnd8h5vfkg8aypq3sjd4g3izbc8frwz5sm", + "type": "tarball", + "url": "https://github.com/nmattia/niv/archive/5830a4dd348d77e39a0f3c4c762ff2663b602d4c.tar.gz", + "url_template": "https://github.com/<owner>/<repo>/archive/<rev>.tar.gz" + }, + "nixpkgs": { + "branch": "release-21.11", + "description": "Nix Packages collection", + "homepage": "", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "08370e1e271f6fe00d302bebbe510fe0e2c611ca", + "sha256": "1s9g0vry5jrrvvna250y538i99zy12xy3bs7m3gb4iq64qhyd6bq", + "type": "tarball", + "url": "https://github.com/NixOS/nixpkgs/archive/08370e1e271f6fe00d302bebbe510fe0e2c611ca.tar.gz", + "url_template": "https://github.com/<owner>/<repo>/archive/<rev>.tar.gz" + } +} diff --git a/nix/sources.nix b/nix/sources.nix new file mode 100644 index 0000000000000000000000000000000000000000..1938409dddb0b57d9f298046cf51875060283df2 --- /dev/null +++ b/nix/sources.nix @@ -0,0 +1,174 @@ +# This file has been generated by Niv. + +let + + # + # The fetchers. fetch_<type> fetches specs of type <type>. + # + + fetch_file = pkgs: name: spec: + let + name' = sanitizeName name + "-src"; + in + if spec.builtin or true then + builtins_fetchurl { inherit (spec) url sha256; name = name'; } + else + pkgs.fetchurl { inherit (spec) url sha256; name = name'; }; + + fetch_tarball = pkgs: name: spec: + let + name' = sanitizeName name + "-src"; + in + if spec.builtin or true then + builtins_fetchTarball { name = name'; inherit (spec) url sha256; } + else + pkgs.fetchzip { name = name'; inherit (spec) url sha256; }; + + fetch_git = name: spec: + let + ref = + if spec ? ref then spec.ref else + if spec ? branch then "refs/heads/${spec.branch}" else + if spec ? tag then "refs/tags/${spec.tag}" else + abort "In git source '${name}': Please specify `ref`, `tag` or `branch`!"; + in + builtins.fetchGit { url = spec.repo; inherit (spec) rev; inherit ref; }; + + fetch_local = spec: spec.path; + + fetch_builtin-tarball = name: throw + ''[${name}] The niv type "builtin-tarball" is deprecated. You should instead use `builtin = true`. + $ niv modify ${name} -a type=tarball -a builtin=true''; + + fetch_builtin-url = name: throw + ''[${name}] The niv type "builtin-url" will soon be deprecated. You should instead use `builtin = true`. + $ niv modify ${name} -a type=file -a builtin=true''; + + # + # Various helpers + # + + # https://github.com/NixOS/nixpkgs/pull/83241/files#diff-c6f540a4f3bfa4b0e8b6bafd4cd54e8bR695 + sanitizeName = name: + ( + concatMapStrings (s: if builtins.isList s then "-" else s) + ( + builtins.split "[^[:alnum:]+._?=-]+" + ((x: builtins.elemAt (builtins.match "\\.*(.*)" x) 0) name) + ) + ); + + # The set of packages used when specs are fetched using non-builtins. + mkPkgs = sources: system: + let + sourcesNixpkgs = + import (builtins_fetchTarball { inherit (sources.nixpkgs) url sha256; }) { inherit system; }; + hasNixpkgsPath = builtins.any (x: x.prefix == "nixpkgs") builtins.nixPath; + hasThisAsNixpkgsPath = <nixpkgs> == ./.; + in + if builtins.hasAttr "nixpkgs" sources + then sourcesNixpkgs + else if hasNixpkgsPath && ! hasThisAsNixpkgsPath then + import <nixpkgs> {} + else + abort + '' + Please specify either <nixpkgs> (through -I or NIX_PATH=nixpkgs=...) or + add a package called "nixpkgs" to your sources.json. + ''; + + # The actual fetching function. + fetch = pkgs: name: spec: + + if ! builtins.hasAttr "type" spec then + abort "ERROR: niv spec ${name} does not have a 'type' attribute" + else if spec.type == "file" then fetch_file pkgs name spec + else if spec.type == "tarball" then fetch_tarball pkgs name spec + else if spec.type == "git" then fetch_git name spec + else if spec.type == "local" then fetch_local spec + else if spec.type == "builtin-tarball" then fetch_builtin-tarball name + else if spec.type == "builtin-url" then fetch_builtin-url name + else + abort "ERROR: niv spec ${name} has unknown type ${builtins.toJSON spec.type}"; + + # If the environment variable NIV_OVERRIDE_${name} is set, then use + # the path directly as opposed to the fetched source. + replace = name: drv: + let + saneName = stringAsChars (c: if isNull (builtins.match "[a-zA-Z0-9]" c) then "_" else c) name; + ersatz = builtins.getEnv "NIV_OVERRIDE_${saneName}"; + in + if ersatz == "" then drv else + # this turns the string into an actual Nix path (for both absolute and + # relative paths) + if builtins.substring 0 1 ersatz == "/" then /. + ersatz else /. + builtins.getEnv "PWD" + "/${ersatz}"; + + # Ports of functions for older nix versions + + # a Nix version of mapAttrs if the built-in doesn't exist + mapAttrs = builtins.mapAttrs or ( + f: set: with builtins; + listToAttrs (map (attr: { name = attr; value = f attr set.${attr}; }) (attrNames set)) + ); + + # https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/lists.nix#L295 + range = first: last: if first > last then [] else builtins.genList (n: first + n) (last - first + 1); + + # https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/strings.nix#L257 + stringToCharacters = s: map (p: builtins.substring p 1 s) (range 0 (builtins.stringLength s - 1)); + + # https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/strings.nix#L269 + stringAsChars = f: s: concatStrings (map f (stringToCharacters s)); + concatMapStrings = f: list: concatStrings (map f list); + concatStrings = builtins.concatStringsSep ""; + + # https://github.com/NixOS/nixpkgs/blob/8a9f58a375c401b96da862d969f66429def1d118/lib/attrsets.nix#L331 + optionalAttrs = cond: as: if cond then as else {}; + + # fetchTarball version that is compatible between all the versions of Nix + builtins_fetchTarball = { url, name ? null, sha256 }@attrs: + let + inherit (builtins) lessThan nixVersion fetchTarball; + in + if lessThan nixVersion "1.12" then + fetchTarball ({ inherit url; } // (optionalAttrs (!isNull name) { inherit name; })) + else + fetchTarball attrs; + + # fetchurl version that is compatible between all the versions of Nix + builtins_fetchurl = { url, name ? null, sha256 }@attrs: + let + inherit (builtins) lessThan nixVersion fetchurl; + in + if lessThan nixVersion "1.12" then + fetchurl ({ inherit url; } // (optionalAttrs (!isNull name) { inherit name; })) + else + fetchurl attrs; + + # Create the final "sources" from the config + mkSources = config: + mapAttrs ( + name: spec: + if builtins.hasAttr "outPath" spec + then abort + "The values in sources.json should not have an 'outPath' attribute" + else + spec // { outPath = replace name (fetch config.pkgs name spec); } + ) config.sources; + + # The "config" used by the fetchers + mkConfig = + { sourcesFile ? if builtins.pathExists ./sources.json then ./sources.json else null + , sources ? if isNull sourcesFile then {} else builtins.fromJSON (builtins.readFile sourcesFile) + , system ? builtins.currentSystem + , pkgs ? mkPkgs sources system + }: rec { + # The sources, i.e. the attribute set of spec name to spec + inherit sources; + + # The "pkgs" (evaluated nixpkgs) to use for e.g. non-builtin fetchers + inherit pkgs; + }; + +in +mkSources (mkConfig {}) // { __functor = _: settings: mkSources (mkConfig settings); } diff --git a/nixpkgs.nix b/nixpkgs.nix index d3281398199b62a19b5141ffedf0b61cf949c2f5..4ccc1ca73615772b11f718c9f890919a61213849 100644 --- a/nixpkgs.nix +++ b/nixpkgs.nix @@ -1,8 +1 @@ -let - rev = "e065200fc90175a8f6e50e76ef10a48786126e1c"; - sha256 = "sha256:157ih9h1j9r4rf5ppv4yhin73k664bzclsy9c791crx9j5db0l7a"; - tarball = builtins.fetchTarball { - url = "https://github.com/nixos/nixpkgs/archive/${rev}.tar.gz"; - inherit sha256; - }; -in import tarball +import (import ./nix/sources.nix).nixpkgs diff --git a/simplir-data-source/SimplIR/DataSource/Internal.hs b/simplir-data-source/SimplIR/DataSource/Internal.hs index a9599b51869bc9325d8f2999cdfe6fcce13e0225..705a0347bd817cd4e7432238d37ce510a980ba20 100644 --- a/simplir-data-source/SimplIR/DataSource/Internal.hs +++ b/simplir-data-source/SimplIR/DataSource/Internal.hs @@ -59,7 +59,10 @@ localFile = DataSourceParser parse = Just $ DataSource ("file://"<>t) (run $ T.unpack t) run path = - bracket (liftIO $ openFile path ReadMode) (liftIO . hClose) P.BS.fromHandle + bracket + (liftIO $ openFile path ReadMode) + (\hdl -> liftIO $ hClose hdl) + (\hdl -> P.BS.fromHandle hdl) dataLocationReadM :: DataSourceParser m -> ReadM (DataSource m) dataLocationReadM p = do diff --git a/simplir-data-source/simplir-data-source.cabal b/simplir-data-source/simplir-data-source.cabal index 2ec27d2a1a0424cb28e13a9eda4416926e9e7bf9..d73561d9b42238f5bde09999627914786b278de3 100644 --- a/simplir-data-source/simplir-data-source.cabal +++ b/simplir-data-source/simplir-data-source.cabal @@ -20,10 +20,10 @@ library SimplIR.DataSource.Gpg other-extensions: OverloadedStrings, FlexibleContexts build-depends: base >=4.9 && <5, - bytestring >=0.10 && <0.11, + bytestring >=0.10 && <0.12, text >=1.2 && <1.3, filepath >=1.4 && <1.5, - optparse-applicative >=0.14 && <0.16, + optparse-applicative >=0.14 && <0.17, pipes >=4.3 && <4.5, pipes-safe >=2.2 && <2.4, zlib, diff --git a/simplir-disk-index/simplir-disk-index.cabal b/simplir-disk-index/simplir-disk-index.cabal index 71e5870b2e396974bbad213e546dac912251d523..c9d88270313bf5940a07bba938ad7887c6f3fd16 100644 --- a/simplir-disk-index/simplir-disk-index.cabal +++ b/simplir-disk-index/simplir-disk-index.cabal @@ -47,7 +47,7 @@ library base >=4.8 && <5, pipes-bytestring >=2.1 && <2.2, pipes, - bytestring >=0.10 && <0.11, + bytestring >=0.10 && <0.12, deepseq, split, dlist, @@ -58,7 +58,7 @@ library filepath >=1.4 && <1.5, directory, unordered-containers >=0.2 && <0.3, - hashable >= 1.2 && <1.4, + hashable >= 1.2 && <1.5, log-domain, profunctors >= 5.1, foldl >= 1.4, @@ -92,7 +92,7 @@ executable dump-postings base, pipes-bytestring >=2.1 && <2.2, pipes, - bytestring >=0.10 && <0.11, + bytestring, transformers, mtl, binary, diff --git a/simplir-disk-index/src/SimplIR/DiskIndex/Posting2/Internal.hs b/simplir-disk-index/src/SimplIR/DiskIndex/Posting2/Internal.hs index 7fba821f188c9f9f8a2d51329614de43e7c4831c..4b0ebe2566e56bb33f864f0d5f23e58b15c16358 100644 --- a/simplir-disk-index/src/SimplIR/DiskIndex/Posting2/Internal.hs +++ b/simplir-disk-index/src/SimplIR/DiskIndex/Posting2/Internal.hs @@ -46,7 +46,7 @@ data PostingIndex term p = PostingIndex { _metadata :: !Metadata , postingIndex :: !(CL.CborList (TermPostings term p)) } -data TermPostings term p = TermPostings { termPostingsTerm :: ! term +data TermPostings term p = TermPostings { termPostingsTerm :: !term , termPostingsChunks' :: !(ELC.EncodedList (PostingsChunk p)) } deriving (Show) diff --git a/simplir-eval/simplir-eval.cabal b/simplir-eval/simplir-eval.cabal index e7d7642e22862c2bdfe8a96b185ab20365140210..abf4c12dd252456110df2a2c4120ae7abc5e1c90 100644 --- a/simplir-eval/simplir-eval.cabal +++ b/simplir-eval/simplir-eval.cabal @@ -33,7 +33,7 @@ executable simplir-eval -- other-modules: build-depends: base >=4.10 && <5, containers >=0.5 && <0.7, - optparse-applicative >=0.14 && <0.16, + optparse-applicative >=0.14 && <0.17, simplir, simplir-io, simplir-eval diff --git a/simplir-io/simplir-io.cabal b/simplir-io/simplir-io.cabal index 97bb81504171672d587bb14f961cb8b6338cf3f4..5fa19a4c54a5e7116cfd2fe71b1ab470381e2d4f 100644 --- a/simplir-io/simplir-io.cabal +++ b/simplir-io/simplir-io.cabal @@ -17,10 +17,10 @@ library SimplIR.Types.Relevance SimplIR.Format.JsonRunQrels other-extensions: RecordWildCards, OverloadedStrings, ScopedTypeVariables, GeneralizedNewtypeDeriving - build-depends: base >=4.11 && <4.15, + build-depends: base >=4.11 && <4.17, deepseq >=1.4 && <1.5, text >=1.2 && <1.3, - hashable >=1.2 && <1.4, + hashable >=1.2 && <1.5, unordered-containers >=0.2 && <0.3, bytestring, zlib >=0.6, diff --git a/simplir-kyoto-index/simplir-kyoto-index.cabal b/simplir-kyoto-index/simplir-kyoto-index.cabal index 83203ae57fbc8d1eef620cf16852e3e2ec4195e0..c1033c004f0fac95a861f979c096d6642d555c18 100644 --- a/simplir-kyoto-index/simplir-kyoto-index.cabal +++ b/simplir-kyoto-index/simplir-kyoto-index.cabal @@ -23,7 +23,7 @@ library transformers >= 0.5, binary >=0.8 && <0.11, hashable >= 1.2, - bytestring >=0.10 && <0.11, + bytestring >=0.10 && <0.12, vector >=0.12 && <0.13, exceptions >= 0.8, async, diff --git a/simplir-learning-to-rank/simplir-learning-to-rank.cabal b/simplir-learning-to-rank/simplir-learning-to-rank.cabal index 0344423f0355a496c7d4a1f74e0905176b86df3c..cc3e3482e86da1cdb83e1de2d690cf7a15b2bee0 100644 --- a/simplir-learning-to-rank/simplir-learning-to-rank.cabal +++ b/simplir-learning-to-rank/simplir-learning-to-rank.cabal @@ -20,15 +20,15 @@ library SimplIR.LearningToRank.Tests, SimplIR.TrainUtils other-extensions: RoleAnnotations, TupleSections, DataKinds, TypeOperators, BangPatterns, TemplateHaskell, TypeFamilies, FlexibleInstances, GeneralizedNewtypeDeriving, MultiParamTypeClasses, StandaloneDeriving, ExistentialQuantification, ScopedTypeVariables, RankNTypes, GADTs, DeriveGeneric, PartialTypeSignatures, FlexibleContexts, DerivingStrategies - build-depends: base >=4.11 && <4.15, + build-depends: base >=4.11 && <4.17, deepseq >=1.4 && <1.5, primitive >=0.6 && <0.8, - transformers >=0.5 && <0.6, + transformers >=0.5 && <0.7, containers >=0.5 && <0.7, vector >=0.12 && <0.13, text >=1.2 && <1.3, unordered-containers >=0.2 && <0.3, - random >=1.1 && <1.2, + random >=1.2 && <1.3, parallel >=3.2 && <3.3, split, indexed-vector, @@ -37,7 +37,7 @@ library tasty, tasty-hunit, linear, - aeson, + aeson >1.0 && <2.0, simplir, simplir-io, simplir-eval diff --git a/simplir-learning-to-rank/src/SimplIR/LearningToRankWrapper.hs b/simplir-learning-to-rank/src/SimplIR/LearningToRankWrapper.hs index 4358ffa4e584e5cd874bd0214fa529934811402a..b8d28a58e407c00b995a5b6d9d0e6f18d1804cc4 100644 --- a/simplir-learning-to-rank/src/SimplIR/LearningToRankWrapper.hs +++ b/simplir-learning-to-rank/src/SimplIR/LearningToRankWrapper.hs @@ -136,7 +136,7 @@ instance (Aeson.FromJSONKey k, Aeson.FromJSON v) => Aeson.FromJSON (Assocs k v) flip (Aeson.withObject "association list") v $ (fmap Assocs . mapM (fromKV (pure . f)) . HM.toList) Aeson.FromJSONKeyTextParser f -> flip (Aeson.withObject "association list") v $ (fmap Assocs . mapM (fromKV f) . HM.toList) - Aeson.FromJSONKeyCoerce _ -> + Aeson.FromJSONKeyCoerce -> flip (Aeson.withObject "association list") v $ (fmap Assocs . mapM (fromKV (pure . unsafeCoerce)) .HM.toList) where fromKV :: (T.Text -> Aeson.Parser k) -> (T.Text, Aeson.Value) -> Aeson.Parser (k, v) diff --git a/simplir-leveldb-index/simplir-leveldb-index.cabal b/simplir-leveldb-index/simplir-leveldb-index.cabal index 9789fa2599214ae55ebd9a988d10aea8d92e26f1..24c78012f548656da620cf40577e62967a9f6d2b 100644 --- a/simplir-leveldb-index/simplir-leveldb-index.cabal +++ b/simplir-leveldb-index/simplir-leveldb-index.cabal @@ -23,7 +23,7 @@ library transformers >= 0.5, binary >=0.8 && <0.11, hashable >= 1.2, - bytestring >=0.10 && <0.11, + bytestring >=0.10 && <0.12, vector >=0.12 && <0.13, exceptions >= 0.8, async, diff --git a/simplir-pipes-utils/simplir-pipes-utils.cabal b/simplir-pipes-utils/simplir-pipes-utils.cabal index 1d7162be280dd4c1981248206d3f64f4f71e103f..9844fce7e73e6dbb853e324b62c9af2a19b53662 100644 --- a/simplir-pipes-utils/simplir-pipes-utils.cabal +++ b/simplir-pipes-utils/simplir-pipes-utils.cabal @@ -20,8 +20,8 @@ library Control.Foldl.Vector, Data.ByteString.Lazy.Progress other-extensions: RankNTypes, OverloadedStrings, BangPatterns - build-depends: base >=4.11 && <4.15, - bytestring >=0.10 && <0.11, + build-depends: base >=4.11 && <4.17, + bytestring >=0.10 && <0.12, vector, primitive, containers, diff --git a/simplir-stop-words/simplir-stop-words.cabal b/simplir-stop-words/simplir-stop-words.cabal index 4cb5d3aef7bac19d4f7f1c2ecfd142292c369a27..0d213bbd2e888cc835d7c3e848fcc3d42c76231f 100644 --- a/simplir-stop-words/simplir-stop-words.cabal +++ b/simplir-stop-words/simplir-stop-words.cabal @@ -17,10 +17,10 @@ library exposed-modules: SimplIR.StopWords, SimplIR.StopWords.Read -- other-modules: other-extensions: TemplateHaskell, QuasiQuotes - build-depends: base >=4.11 && <4.15, + build-depends: base >=4.11 && <4.17, text >=1.2 && <1.3, unordered-containers >=0.2 && <0.3, - template-haskell >=2.10 && <2.17, + template-haskell >=2.10 && <2.19, filepath >=1.4 && <1.5 hs-source-dirs: src default-language: Haskell2010 diff --git a/simplir-stop-words/src/SimplIR/StopWords/Read.hs b/simplir-stop-words/src/SimplIR/StopWords/Read.hs index 315dbaf899444bc8c2281d20c3516813cd719297..888ce1cdd316a42220f19472804bd05c554c2ce4 100644 --- a/simplir-stop-words/src/SimplIR/StopWords/Read.hs +++ b/simplir-stop-words/src/SimplIR/StopWords/Read.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE CPP #-} {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE TemplateHaskell #-} @@ -13,7 +14,14 @@ import qualified Data.HashSet as HS stopWordDir :: FilePath stopWordDir = "data" +#if MIN_VERSION_template_haskell(2,17,0) +readStopWords :: FilePath -> Code Q (HS.HashSet T.Text) +readStopWords fname = liftCode $ do + stopwords <- runIO $ readFile $ stopWordDir </> fname + examineCode [e|| HS.fromList $ T.lines $ T.pack $$(liftTyped $ stopwords) ||] +#else readStopWords :: FilePath -> Q (TExp (HS.HashSet T.Text)) readStopWords fname = do stopwords <- runIO $ readFile $ stopWordDir </> fname [e|| HS.fromList $ T.lines $ T.pack $$(unsafeTExpCoerce $ lift $ stopwords) ||] +#endif diff --git a/simplir-trec-streaming/Extract.hs b/simplir-trec-streaming/Extract.hs index 4c11aa1fb86754f1a518e9199324e164e8812c5a..0abe31230c9baa4fa824a7d04c284fb8eae78b6e 100644 --- a/simplir-trec-streaming/Extract.hs +++ b/simplir-trec-streaming/Extract.hs @@ -70,7 +70,7 @@ takeDocuments docs = go | docName `S.member` docs = val : go bs' | otherwise = go bs' where - (bs', val) = either error id $ Pinch.deserializeValue' Pinch.binaryProtocol bs + (bs', val) = either error id $ Pinch.decodeWithLeftovers Pinch.binaryProtocol bs item = either error id $ Pinch.runParser (parseStreamItem val) docName = DocName (Utf8.fromText $ Kba.getDocumentId $ Kba.documentId item) diff --git a/simplir-trec-streaming/Streaming.hs b/simplir-trec-streaming/Streaming.hs index 2e7eeb0a22bb14429dfadc9760d0fff575f5b6c9..8ccff69fa1b4a14cb53e3a6439d7fc21776150ed 100644 --- a/simplir-trec-streaming/Streaming.hs +++ b/simplir-trec-streaming/Streaming.hs @@ -477,7 +477,7 @@ findPhrases phrases terms = trie = Trie.fromList $ map (\x -> (x,x)) phrases mergeMatches :: ([(Term, Position)], [Term]) -> ([Term], Position) mergeMatches (matchedTerms, phrase) = - (phrase, fromMaybe (error "findPhrases: Empty phrase") $ getOption $ foldMap (Option . Just . snd) matchedTerms) + (phrase, fromMaybe (error "findPhrases: Empty phrase") $ foldMap (Just . snd) matchedTerms) newtype DocumentFrequency = DocumentFrequency Int deriving (Show, Eq, Ord, Binary) diff --git a/simplir-trec-streaming/simplir-trec-streaming.cabal b/simplir-trec-streaming/simplir-trec-streaming.cabal index 7f9fe5f8ef944e183cd4f776d9b19def8a133307..e80a93e2c25548064f2cea20a55469e02fceae25 100644 --- a/simplir-trec-streaming/simplir-trec-streaming.cabal +++ b/simplir-trec-streaming/simplir-trec-streaming.cabal @@ -22,17 +22,16 @@ library Fac.Types other-extensions: GeneralizedNewtypeDeriving, DeriveGeneric, RecordWildCards build-depends: base >=4.9 && <5, - bytestring >=0.10 && <0.11, + bytestring >=0.10 && <0.12, unordered-containers >= 0.2 && < 0.3, containers, trifecta >= 1.5, monad-control, text >=1.2 && <1.3, - bytestring, - time >=1.6 && <1.11, - pinch >=0.3 && <0.4, + time >=1.6 && <1.14, + pinch >=0.3 && <0.5, binary, - aeson >=1.0 && <1.5, + aeson >=1.0 && <1.6, yaml, hashable, attoparsec, @@ -72,7 +71,7 @@ executable index-fac base, pipes-bytestring >=2.1 && <2.2, pipes, - bytestring >=0.10 && <0.11, + bytestring, transformers, filepath, directory, @@ -107,7 +106,7 @@ executable kba-streaming base, pipes-bytestring >=2.1 && <2.2, pipes, - bytestring >=0.10 && <0.11, + bytestring, transformers, trifecta, filepath, @@ -148,7 +147,7 @@ executable merge-kba-rankings base, pipes-bytestring >=2.1 && <2.2, pipes, - bytestring >=0.10 && <0.11, + bytestring, transformers, filepath, directory, @@ -186,7 +185,7 @@ executable extract-kba-documents base, pipes-bytestring >=2.1 && <2.2, pipes, - bytestring >=0.10 && <0.11, + bytestring, transformers, monoidal-containers, monad-control, diff --git a/simplir-trec-streaming/src/ReadKba.hs b/simplir-trec-streaming/src/ReadKba.hs index 07dc527120646d8c4c6a38191bc8969e7692f9ab..3c8bfe225e1cfcdf6eea86ed35e0f1ffbeb09fe8 100644 --- a/simplir-trec-streaming/src/ReadKba.hs +++ b/simplir-trec-streaming/src/ReadKba.hs @@ -10,7 +10,7 @@ import qualified Data.ByteString.Lazy as BS.L import qualified Pipes.ByteString as P.BS import qualified Data.Text as T import SimplIR.DataSource as DataSource -import SimplIR.DataSource.Compression +import SimplIR.DataSource.Compression.Pipes import qualified SimplIR.DataSource.Gpg as Gpg readKbaFile :: (MonadSafe m, MonadBaseControl IO m, MonadFail m) diff --git a/simplir-trec-streaming/src/SimplIR/TrecStreaming.hs b/simplir-trec-streaming/src/SimplIR/TrecStreaming.hs index 1fef2ed1c4c1bcc990fa1c5d9a57b0b9c5ff678c..c4265a29262c47ad62b3c06e35d3179fd242d62f 100644 --- a/simplir-trec-streaming/src/SimplIR/TrecStreaming.hs +++ b/simplir-trec-streaming/src/SimplIR/TrecStreaming.hs @@ -96,6 +96,6 @@ readItems = go go bs | BS.null bs = [] | otherwise = - let (bs', val) = either error id $ deserializeValue' binaryProtocol bs + let (bs', val) = either error id $ decodeWithLeftovers binaryProtocol bs item = either error id $ runParser (parseStreamItem val) in item : go bs' diff --git a/simplir-trec/simplir-trec.cabal b/simplir-trec/simplir-trec.cabal index 5288832e0f4fe78455a18a170f0fbc77d10cdc4f..9de497cf1b5766e16536ebc5d64e52e7393c8ac3 100644 --- a/simplir-trec/simplir-trec.cabal +++ b/simplir-trec/simplir-trec.cabal @@ -18,14 +18,14 @@ library SimplIR.TREC.News other-extensions: OverloadedStrings, RecordWildCards default-language: Haskell2010 - build-depends: base >=4.8 && <4.15, + build-depends: base >=4.8 && <4.17, pipes >=4.1 && <4.4, text >=1.2 && <1.3, - transformers >=0.4 && <0.6, + transformers >=0.4 && <0.7, case-insensitive >=1.2 && <1.3, pipes-attoparsec >=0.5 && <0.6, pipes-parse >=3.0 && <3.1, - lens >=4.13 && <4.20, + lens >=4.13 && <5.2, html-parse >=0.2 && <0.3, attoparsec >= 0.13 ghc-options: -Wall diff --git a/simplir/simplir.cabal b/simplir/simplir.cabal index 3af1507aaae9224f270b0dbddd68c6b0db85f1c2..b978b47e92708795c08f995d24e2070f4b7a6e9e 100644 --- a/simplir/simplir.cabal +++ b/simplir/simplir.cabal @@ -55,13 +55,13 @@ library build-depends: ghc-compact build-depends: - base >=4.8 && <4.15, + base >=4.8 && <4.17, ghc-prim, template-haskell, pipes-bytestring >=2.1 && <2.2, pipes, - bytestring >=0.10 && <0.11, - transformers >=0.4 && <0.6, + bytestring >=0.10 && <0.12, + transformers >=0.4 && <0.7, contravariant >= 1.3 && <1.6, charset >= 0.3, mtl, @@ -80,17 +80,17 @@ library pipes-zlib >= 0.4.4.2 && <0.5, pipes-lzma, pipes-safe, - lens >=4.14 && <4.20, + lens >=4.19 && <5.2, http-types >=0.9 && <0.13, http-media >=0.6 && <0.9, http-parsers >= 0.1 && <0.2, - attoparsec >=0.13 && <0.14, + attoparsec >=0.13 && <0.17, filepath >=1.4 && <1.5, directory, text-icu >=0.7 && <0.8, deepseq >=1.4 && <1.5, unordered-containers >=0.2 && <0.3, - hashable >= 1.2 && <1.4, + hashable >= 1.2 && <1.5, logging-facade >= 0.1 && <0.4, log-domain >= 0.13, linear, diff --git a/simplir/src/SimplIR/Bag.hs b/simplir/src/SimplIR/Bag.hs index 4d527e3ae5556221c15aa637fce2bc543f2ca1d6..ee89b0e8990ebeb4cf5a251c2ce47e4f6be45283 100644 --- a/simplir/src/SimplIR/Bag.hs +++ b/simplir/src/SimplIR/Bag.hs @@ -16,7 +16,7 @@ module SimplIR.Bag ) where import Data.Hashable -import Data.List +import Data.List (sortBy) import Data.Ord import Data.Tuple import Data.Semigroup diff --git a/simplir/src/SimplIR/RetrievalModels/TfIdf.hs b/simplir/src/SimplIR/RetrievalModels/TfIdf.hs index 28acc6b624047fcc55b9f056743c6d744ebf624e..b05bdf4cf1ebb4f165a24004bc156c48e7cc64d2 100644 --- a/simplir/src/SimplIR/RetrievalModels/TfIdf.hs +++ b/simplir/src/SimplIR/RetrievalModels/TfIdf.hs @@ -8,10 +8,10 @@ module SimplIR.RetrievalModels.TfIdf , tfIdfAccum -- * TF-IDF variants: + -- ** @log tf / smoothed idf@ , tfIdf_log_tf_smoothed_idf -- ** @log tf / smoothed idf@ , tfIdf_raw_tf_smoothed_idf - -- ** @raw tf / smoothed idf@ ) where import Data.Maybe @@ -64,4 +64,4 @@ tfIdfAccum tfIdfVariant stats queryTerms terms = let filteredTerms = HM.filterWithKey (\k _ -> k `HS.member` queryTerms) terms in sum $ fmap (\(term,count) -> tfIdf tfIdfVariant stats term count) - $ HM.toList filteredTerms \ No newline at end of file + $ HM.toList filteredTerms diff --git a/vendor/fork-map b/vendor/fork-map index 186ccf6012912573c87c2ea0f3d9c844b3b7db6b..69132f91185ef269b678c0f4eaa324180744ec6c 160000 --- a/vendor/fork-map +++ b/vendor/fork-map @@ -1 +1 @@ -Subproject commit 186ccf6012912573c87c2ea0f3d9c844b3b7db6b +Subproject commit 69132f91185ef269b678c0f4eaa324180744ec6c diff --git a/vendor/html-parse b/vendor/html-parse index b6fab6ecb8cd135b13af36468277d03292ca013f..8bf66f073bb7ccf00eb6752971cb9376fbbcc303 160000 --- a/vendor/html-parse +++ b/vendor/html-parse @@ -1 +1 @@ -Subproject commit b6fab6ecb8cd135b13af36468277d03292ca013f +Subproject commit 8bf66f073bb7ccf00eb6752971cb9376fbbcc303 diff --git a/vendor/http-parsers b/vendor/http-parsers index 7a07dc724c34de14f1aeb4c6a86a416a9df1a48a..fe3cd42782912355f2a5d4e602182584dd37c9e0 160000 --- a/vendor/http-parsers +++ b/vendor/http-parsers @@ -1 +1 @@ -Subproject commit 7a07dc724c34de14f1aeb4c6a86a416a9df1a48a +Subproject commit fe3cd42782912355f2a5d4e602182584dd37c9e0 diff --git a/vendor/indexed-vector b/vendor/indexed-vector index 6c9012b612c8951429c93c97c1a441bdf2345a60..651e8aa284b27d35946a4bf502790d625893c964 160000 --- a/vendor/indexed-vector +++ b/vendor/indexed-vector @@ -1 +1 @@ -Subproject commit 6c9012b612c8951429c93c97c1a441bdf2345a60 +Subproject commit 651e8aa284b27d35946a4bf502790d625893c964 diff --git a/vendor/pinch b/vendor/pinch index cb6569b383fdc87e8b2cadf2ec4b0c9e03e0e3ad..8af208272aeacaadf1c9aa6314bc132427c99d2f 160000 --- a/vendor/pinch +++ b/vendor/pinch @@ -1 +1 @@ -Subproject commit cb6569b383fdc87e8b2cadf2ec4b0c9e03e0e3ad +Subproject commit 8af208272aeacaadf1c9aa6314bc132427c99d2f