Commit 6db64478 authored by Ole Voldsæter's avatar Ole Voldsæter
Browse files

Merge branch '14-finish-collapse' into 'develop'

Resolve "Finish support for collapse command"

Closes #14

See merge request c2metadata/stata-sdtl-converter!18
parents 16cc80ad fa482141
Pipeline #14306 passed with stages
in 1 minute and 44 seconds
......@@ -238,7 +238,35 @@ collapse_list_default_part
;
collapse_element
: <"("> ID <")"> assignment_varlist
: <"("> collapse_stat <")"> assignment_varlist
;
<collapse_stat>
: ( collapse_percentile
| collapse_semean
| collapse_sebinomial
| collapse_sepoisson )
/ collapse_other
;
collapse_percentile
: #"p[1-9]\d?"
;
collapse_semean
: "sem(e(an?)?)?"
;
collapse_sebinomial
: "seb(i(n(o(m(i(al?)?)?)?)?)?)?"
;
collapse_sepoisson
: "sep(o(i(s(s(on?)?)?)?)?)?"
;
collapse_other
: ID
;
assignment_varlist
......
......@@ -191,25 +191,17 @@
)
:reshape_wide (fn [& _] (throw (UnsupportedOperationException. "reshape wide modifies variable list in an unpredictable way. Cannot finish processing")))
:stublist (fn [& args] args)
:collapse (fn [& args]
(let [options (nth args 3 nil)
by-vars (expand-varlist (first (:by options)) *variables*)]
{:command "collapse"
:aggregations (first args)
:condition (second args)
:weight (nth args 2)
:cellwise (contains? options :cw)
:by by-vars
:mod-varlist (concat
by-vars
(extract-target-vars-from-aggregations (first args))
)
}
)
:collapse collapse-vars
:collapse_list (fn [& args] args)
:collapse_list_default_part (fn [& [vars]] {:aggstat "mean" :variables vars})
:collapse_element (fn [& [aggstat vars]]
(merge {:variables vars} aggstat)
)
:collapse_list (fn [& args] (reduce conj [(first args)] (rest args)))
:collapse_list_default_part (fn [& args] {:aggstat "mean" :variables (first args)})
:collapse_element (fn [& args] {:aggstat (first args) :variables (second args)})
:collapse_percentile (fn [& [arg]] {:aggstat "p" :p (->> arg rest (apply str) (read-string))})
:collapse_semean (fn [& _] {:aggstat "semean"})
:collapse_sebinomial (fn [& _] {:aggstat "sebinomial"})
:collapse_sepoisson (fn [& _] {:aggstat "sepoisson"})
:collapse_other (fn [& [arg]] {:aggstat arg})
:merge (fn [& args]
(let [options (nth args 3 nil)
gen-merge (get-var-length-option :generate 3 options)
......
......@@ -106,4 +106,64 @@
(defn extract-target-vars-from-aggregations [aggs]
(map #(if (string? %) % (:target %)) (apply concat (map #(:variables %) aggs)))
)
(def collapse-stats
{:mean "agg_mean"
:median "agg_median"
:p "agg_pctile"
:sd "col_sd"
:semean "col_semean"
:sebinomial "col_sebinomial"
:sepoisson "col_sepoisson"
:sum "col_sum"
:rawsum "col_sum" ;;; not supported in function lib
:count "agg_count"
:percent "agg_pc"
:max "agg_max"
:min "agg_min"
:iqr "agg_iqr"
:firstnm "agg_first"
:lastnm "agg_last"
:first "agg_first" ;;; not supported in function lib
:last "agg_last" ;;; not supported in function lib
}
)
(defn make-compute-list [aggregation]
(map
(fn [var]
(let [src (if (string? var) var (:source var))
trg (if (string? var) var (:target var))]
{:command "compute"
:variables [trg]
:expression {"$type" "functionCallExpression"
:function (get collapse-stats (-> aggregation :aggstat keyword))
:isSdtlName true
:arguments (cond-> [{"$type" "variableSymbolExpression"
:variableName src}]
(:p aggregation) (conj (:p aggregation))
)
}
}
)
)
(:variables aggregation)
)
)
(defn collapse-vars [& [aggregations filter weight options]]
(let [by-vars (expand-varlist (first (:by options)) *variables*)]
{:command "collapse"
:aggregateVariables (reduce #(concat (make-compute-list %1) (make-compute-list %2)) aggregations)
:condition filter
:weight weight
:cellwise (contains? options :cw)
:groupByVariables by-vars
:mod-varlist (concat
by-vars
(extract-target-vars-from-aggregations aggregations)
)
}
)
)
\ No newline at end of file
......@@ -185,13 +185,38 @@
)
(is
(= (parse-and-transform "collapse a b=c (max) d [fw=f], by(e) cw")
{:command "collapse",
:aggregations [{:aggstat "mean", :variables ["a" {:target "b", :source "c"}]} {:aggstat "max", :variables ["d"]}],
:condition nil,
:weight {:wtype "fw", :expression {:variableName "f", "$type" "VariableSymbolExpression"}},
:cellwise true,
:by ["e"],
:mod-varlist ["e" "a" "b" "d"]}
'{:aggregateVariables ({:command "compute"
:expression {"$type" "functionCallExpression"
:arguments [{"$type" "variableSymbolExpression"
:variableName "a"}]
:function "agg_mean"
:isSdtlName true}
:variables ["a"]}
{:command "compute"
:expression {"$type" "functionCallExpression"
:arguments [{"$type" "variableSymbolExpression"
:variableName "c"}]
:function "agg_mean"
:isSdtlName true}
:variables ["b"]}
{:command "compute"
:expression {"$type" "functionCallExpression"
:arguments [{"$type" "variableSymbolExpression"
:variableName "d"}]
:function "agg_max"
:isSdtlName true}
:variables ["d"]})
:cellwise true
:command "collapse"
:condition nil
:groupByVariables ("e")
:mod-varlist ("e"
"a"
"b"
"d")
:weight {:expression {"$type" "VariableSymbolExpression"
:variableName "f"}
:wtype "fw"}}
)
"Should return a collapse object. Variable 'e' must be first in :mod-varlist"
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment