Commit 6c071163 authored by Sigbjørn Revheim's avatar Sigbjørn Revheim
Browse files

Fixed handling of option lists

parent 7de09a24
Pipeline #22964 failed with stages
in 1 minute and 19 seconds
......@@ -309,16 +309,19 @@ filelist
;
options
: <","> option+
: <","> (option | numlist_option)+
;
option
: ID (<"("> option_param ( <","> option_param ) * <")">)?
: ID (<"("> option_param ( <","> option_param)* <")">)?
;
numlist_option
: ID <"("> numlist <")">
;
option_param
: varlist
| numlist
| string
| display_format
;
......@@ -341,7 +344,7 @@ varRange
;
numlist
: (number | range)+
: (number | range)+ ( <","> (number | range)+ )*
;
local_command
......
(ns ^{
:author "Ole Voldsæter"
:author "Ole Voldsæter, Sigbjørn Revheim"
:doc "Translator STATA -> SDTL"
} stata2sdtl.core
(:require
......@@ -36,18 +36,19 @@
;; mapping of grammar production names to transformation functions
(def transformations
{
:options (fn [& args] (reduce #(assoc %1 (keyword (first %2)) (apply concat (second %2))) {} args))
:option (fn [name & args] [name args])
:option_param (fn [& args] args)
:options (fn [& args] (into {} args))
:option (fn [name & args] {(keyword name) args})
:option_param (fn [args] args)
:numlist_option (fn [name & args] {(keyword name) args})
:analysis (fn [& args] {"$type" "Analysis"
:Command "analysis"
:message (str "Ignoring \"" (-> args first trim) "\"")})
:invalid (fn [& args] {"$type" "Invalid"
:Command "invalid"
:Message (str "Invalid/unexpected syntax for \"" (-> args first trim) "\"")
:Messages [{:Severity "Error"
:Messages [{:Severity "Error"
:MessageText (str "Invalid syntax for " (first args))}]})
:unsupported (fn [& args] {"$type" "Unsupported"
:unsupported (fn [& args] {"$type" "Unsupported"
:Command "unsupported"})
:unrecognized (fn [& args] (first args))
:generate (fn [& [varname expr condition]]
......@@ -84,7 +85,7 @@
:Labels (map (fn [lab] {:Value (first lab) :Label (second lab)}) labels)}
(not labels) (assoc
:Messages
[{:Severity "warning"
[{:Severity "warning"
:MessageText (str "Undefined label list " codelistname)}])
)
)
......@@ -262,7 +263,8 @@
:assignment (fn [& args] {:target (first args) :source (second args)})
:weight (fn [& args]
(when (first args)
{:wtype (let [wtype (first args)]
{:Expression (second args)
:Type (let [wtype (first args)]
(cond
(or (starts-with? wtype "fw") (starts-with? wtype "freq")) "fw"
(or (starts-with? wtype "aw") (starts-with? wtype "cell")) "aw"
......@@ -270,8 +272,7 @@
(starts-with? wtype "iw") "iw"
:default "default"
)
)
:expression (second args)}))
)}))
:append (fn [& args]
(let [files (first args)
options (second args)
......@@ -294,9 +295,11 @@
:filelist (fn [& args] args)
:varlist (fn [& args] args)
:numlist (fn [& args]
{"$type" "ValueListExpression"
:Values (map numeric-constant-expression args)}
)
(let [values (map numeric-constant-expression args)]
(if (> (count values) 1)
{"$type" "ValueListExpression"
:Values values}
(first values))))
:simple_range (fn [& [from to]]
(number-range-expression from to 1))
:step_range (fn [& [from step to]]
......@@ -306,11 +309,19 @@
:varRange (fn [& args] {:first (first args) :last (second args)})
:rules (fn [& args] args)
:simple_rules (fn [& args] args)
:elementlist (fn [& args] args)
:elementlist (fn [& args]
(let [values (map transform-recode-element args)]
(if (> (count values) 1)
{"$type" "ValueListExpression"
:Values values}
(first values))))
:nonmissing_values (fn [& _] {"$type" "UnhandledNonmissingValuesExpression"})
:missing_values (fn [& _] {"$type" "UnhandledMissingValuesExpression"})
:other_values (fn [& _] {"$type" "UnhandledValuesExpression"})
:element_range (fn [& args] {:first (first args) :last (second args)})
:element_range (fn [& args] (number-range-expression
(transform-recode-element (first args))
(transform-recode-element (second args))
1))
:rule transform-recode-rule
:simple_rule transform-recode-rule
:egen_varlist egen-varlist
......
(ns ^{
:author "Ole Voldsæter"
:author "Ole Voldsæter, Sigbjørn Revheim"
:doc "Egen commands are handled here"
} stata2sdtl.egen
(:require [stata2sdtl.utils :refer :all])
......@@ -10,50 +10,50 @@
; group
(def compute-funcs {
:anycount "anycount"
:anymatch "anymatch"
:anyvalue "anyvalue"
:diff "diff"
:rowfirst "rowfirst"
:rowlast "rowlast"
:rowmax "max"
:rowmean "mean"
:rowmedian "median"
:rowmin "min"
:rowmiss "rowmiss"
:rownonmiss "rownonmiss"
:rowpctile "rowpctile"
:rowsd "standard_deviation"
:rowtotal "rowtotal"
:std "std_val"
:tag "tag"
:concat "concat"
:cut "cut"
:group "group"
:ends "ends"
})
:anycount "anycount"
:anymatch "anymatch"
:anyvalue "anyvalue"
:diff "diff"
:rowfirst "rowfirst"
:rowlast "rowlast"
:rowmax "max"
:rowmean "mean"
:rowmedian "median"
:rowmin "min"
:rowmiss "rowmiss"
:rownonmiss "rownonmiss"
:rowpctile "rowpctile"
:rowsd "standard_deviation"
:rowtotal "rowtotal"
:std "std_val"
:tag "tag"
:concat "concat"
:cut "cut"
:group "group"
:ends "ends"
})
(def aggregate-funcs {
:count "agg_count"
:iqr "agg_iqr"
:kurt "agg_kurt"
:mad "agg_mad"
:max "agg_max"
:mdev "agg_mdev"
:mean "agg_mean"
:median "agg_median"
:min "agg_min"
:pctile "agg_pctile"
:sd "agg_sd"
:skew "agg_skew"
:total "agg_total"
})
:count "agg_count"
:iqr "agg_iqr"
:kurt "agg_kurt"
:mad "agg_mad"
:max "agg_max"
:mdev "agg_mdev"
:mean "agg_mean"
:median "agg_median"
:min "agg_min"
:pctile "agg_pctile"
:sd "agg_sd"
:skew "agg_skew"
:total "agg_total"
})
(defn unsupported-func [funcname]
{"$type" "Unsupported"
:Command "unsupported"
:Message (str "Unsupported egen function \"" funcname "\"")
:Messages [{:Severity "Error"
{"$type" "Unsupported"
:Command "unsupported"
:Message (str "Unsupported egen function \"" funcname "\"")
:Messages [{:Severity "Error"
:MessageText (str "Unsupported egen function \"" funcname "\"")}]}
)
......@@ -61,19 +61,19 @@
{"$type" "VariableListExpression" :Variables (map variable-symbol-expression (expand-varlist varlist *variables*))})
(defn make-compute [varname funcname args _filter]
{"$type" "Compute"
:Command "compute"
:Variable (variable-symbol-expression varname)
:Expression {"$type" "FunctionCallExpression"
:Function funcname
:IsSdtlName true
:Arguments (map-indexed
(fn [i arg] {:ArgumentName (str "EXP" (inc i))
:ArgumentValue arg})
args
)}
:Condition _filter
:mod-varlist (vec (conj *variables* varname))}
{"$type" "Compute"
:Command "compute"
:Variable (variable-symbol-expression varname)
:Expression {"$type" "FunctionCallExpression"
:Function funcname
:IsSdtlName true
:Arguments (map-indexed
(fn [i arg] {:ArgumentName (str "EXP" (inc i))
:ArgumentValue arg})
args
)}
:Condition _filter
:mod-varlist (vec (conj *variables* varname))}
)
(defn make-any [varname funcname varlist options _filter]
......@@ -81,14 +81,16 @@
varname
funcname
[(make-varlist-expression varlist)
{"$type" "NumberListExpression" :Values (->> (get-var-length-option :Values 1 options) (get options) first)}]
(->> (get-var-length-option :values 1 options)
(get options)
first)]
_filter
)
)
(defn make-aggregate [by varname funcname expr _filter]
{"$type" "Aggregate"
:Command "aggregate"
{"$type" "Aggregate"
:Command "aggregate"
:GroupByVariables (map variable-symbol-expression by)
:AggregateVariables [(dissoc (make-compute varname funcname expr _filter) :mod-varlist)]
:mod-varlist (vec (conj *variables* varname))}
......@@ -99,42 +101,44 @@
:rowpctile (make-compute
varname
"rowpctile"
(let [p (:p options '({:Values (50)}))]
[(make-varlist-expression varlist)
(numeric-constant-expression (-> p first :Values first))]
)
[(make-varlist-expression varlist)
(get-numeric-constant (first (:p options [(numeric-constant-expression 50)])))]
_filter)
:rowtotal (make-compute
varname
"rowtotal"
[(make-varlist-expression varlist) (-> (get-var-length-option :missing 1 options) nil? not)]
_filter)
:tag (make-compute
varname
"tag"
[(make-varlist-expression varlist) (-> (get-var-length-option :missing 1 options) nil? not)]
_filter)
:concat (make-compute
varname
"concat"
[(make-varlist-expression varlist)
(->> (get-var-length-option :format 1 options) (get options) first)
(->> (get-var-length-option :decode 1 options) nil? not)
(->> (get-var-length-option :maxlength 4 options) (get options) first :Values first)
(->> (get-var-length-option :punct 1 options) (get options) first)]
_filter)
:cut (make-compute
varname
"cut"
[(first varlist)
(->> options :at (transduce (map #(:Values %)) concat) (assoc {"$type" "NumberListExpression"} :Values))
(->> (get-var-length-option :group 1 options) (get options) first :Values first)
(->> (get-var-length-option :icodes 2 options) nil? not)
(->> (get-var-length-option :label 3 options) nil? not)]
_filter)
:anycount (make-any varname funcname varlist options _filter)
:anymatch (make-any varname funcname varlist options _filter)
:anyvalue (make-any varname funcname varlist options _filter)
:tag (make-compute
varname
"tag"
[(make-varlist-expression varlist) (-> (get-var-length-option :missing 1 options) nil? not)]
_filter)
:concat (make-compute
varname
"concat"
[(make-varlist-expression varlist)
(->> (get-var-length-option :format 1 options) (get options) first)
(->> (get-var-length-option :decode 1 options) nil? not)
(->> (get-var-length-option :maxlength 4 options) (get options) first :Values first)
(->> (get-var-length-option :punct 1 options) (get options) first)]
_filter)
:cut (make-compute
varname
"cut"
[(first varlist)
(first (:at options))
(->>
(get-var-length-option :group 1 options)
(get options)
first
get-numeric-constant)
(->> (get-var-length-option :icodes 2 options) nil? not)
(->> (get-var-length-option :label 3 options) nil? not)]
_filter)
:anycount (make-any varname funcname varlist options _filter)
:anymatch (make-any varname funcname varlist options _filter)
:anyvalue (make-any varname funcname varlist options _filter)
; default
(make-compute
......@@ -173,53 +177,53 @@
(defn egen-mtr [& [varname exp1 exp2 _filter]]
(make-compute
varname
"mtr"
[exp1 exp2]
_filter)
varname
"mtr"
[exp1 exp2]
_filter)
)
(defn egen-expr [& [by-clause varname funcname expr _filter options]]
(let [funcname (keyword funcname)]
(case funcname
:std (make-compute
varname
"std_val"
(let [m (get-var-length-option :mean 1 options)
s (get-var-length-option :std 1 options)
m (if m (get options m) '({:Values [0]}))
s (if s (get options s) '({:Values [1]}))]
[expr
(numeric-constant-expression (-> m first :Values first))
(numeric-constant-expression (-> s first :Values first))]
)
_filter)
:std (make-compute
varname
"std_val"
(let [m (get-var-length-option :mean 1 options)
s (get-var-length-option :std 1 options)
m (if m (get options m) [(numeric-constant-expression 0)])
s (if s (get options s) [(numeric-constant-expression 1)])]
[expr
(get-numeric-constant (first m))
(get-numeric-constant (first s))]
)
_filter)
:pctile (make-aggregate
by-clause
varname
(get aggregate-funcs (keyword funcname))
[expr
(numeric-constant-expression (-> (:p options '({:Values [50]})) first :Values first))]
(get-numeric-constant (first (:p options [(numeric-constant-expression 50)])))]
_filter)
:pc (make-aggregate
by-clause
varname
(get aggregate-funcs (keyword funcname))
[expr
(-> options :prop nil? not)]
_filter)
:rank (make-aggregate
by-clause
varname
(get aggregate-funcs (keyword funcname))
[expr
(string-constant-expression (cond
(get-var-length-option :field 1 options) "field"
(get-var-length-option :track 1 options) "track"
(get-var-length-option :unique 1 options) "unique"
:else "default"
))]
_filter)
:pc (make-aggregate
by-clause
varname
(get aggregate-funcs (keyword funcname))
[expr
(-> options :prop nil? not)]
_filter)
:rank (make-aggregate
by-clause
varname
(get aggregate-funcs (keyword funcname))
[expr
(string-constant-expression (cond
(get-var-length-option :field 1 options) "field"
(get-var-length-option :track 1 options) "track"
(get-var-length-option :unique 1 options) "unique"
:else "default"
))]
_filter)
;;; default
(make-aggregate
......
(ns ^{
:author "Ole Voldsæter"
:doc "Stata functions are handled here"
} stata2sdtl.functions)
} stata2sdtl.functions
(:require
[stata2sdtl.utils :refer :all]))
(def function-mapping
{"abs" "absolute_value"
......@@ -33,13 +35,13 @@
(defn function-lookup [& args]
(let [canonical (get function-mapping (first args))]
{"$type" "FunctionCallExpression"
:Function (or canonical (first args))
:Arguments (map-indexed
(fn [i arg] {:ArgumentName (str "EXP" (inc i))
:ArgumentValue arg})
(rest args)
)
:IsSdtlName (some? canonical)}
{"$type" "FunctionCallExpression"
:Function (or canonical (first args))
:Arguments (map-indexed
(fn [i arg] {:ArgumentName (str "EXP" (inc i))
:ArgumentValue arg})
(rest args)
)
:IsSdtlName (some? canonical)}
)
)
\ No newline at end of file
(ns ^{
:author "Ole Voldsæter"
:author "Ole Voldsæter, Sigbjørn Revheim"
:doc "Functions are put here to limit complexity in the transformation map in stata2sdtl.core"
} stata2sdtl.transform-helpers
(:require
......@@ -41,67 +41,28 @@
(filter (comp not nil?) (map #(:target %) variables))
)
(defn transform-recode-element
[element]
(cond
(= element "min") [{"$type" "NumericMinimumValueExpression"}]
(= element "max") [{"$type" "NumericMaximumValueExpression"}]
(= element "missing") [{"$type" "UnhandledMissingValuesExpression"}]
(map? element) element
:default (numeric-constant-expression element)))
(defn transform-recode-rule
[& args]
(m/filter-vals
some?
{:FromValue (cond
(= (first (first args)) "min") [{"$type" "NumericMinimumValueExpression"}]
(= (first (first args)) "max") [{"$type" "NumericMaximumValueExpression"}]
(= (first (first args)) "missing") [{"$type" "UnhandledMissingValuesExpression"}]
(= (first (first args)) "nonmissing") [{"$type" "UnhandledNonmissingValuesExpression"}]
(= (first (first args)) "else") [{"$type" "UnhandledValuesExpression"}]
:default args)
:To (second args)
{"$type" "RecodeRule"
:FromValue (cond
(= (first args) "missing") [{"$type" "UnhandledMissingValuesExpression"}]
(= (first args) "nonmissing") [{"$type" "UnhandledNonmissingValuesExpression"}]
(= (first args) "else") [{"$type" "UnhandledValuesExpression"}]
:default (first args))
:To (transform-recode-element (second args))
:Label (if (> (count args) 2) (nth args 2))}))
(defn transform-recode-rule2
[& args]
(m/filter-vals
some?
{:FromValue (cond
(= (first (first args)) "min") [{"$type" "NumericMinimumValueExpression"}]
(= (first (first args)) "max") [{"$type" "NumericMaximumValueExpression"}]
:default (let [values (filter number? (first args))
ranges (filter map? (first args))]
(cond-> {:args args :fTo (second args), :Label (if (> (count args) 2) (nth args 2))}
(not-empty values) (assoc :FromValue values)
(not-empty ranges) (assoc :fromValueRange ranges)
)
))
:To (second args)
:Label (if (> (count args) 2) (nth args 2))}))
(defn transform-recode-rule_test
[& args]
(m/filter-vals
some?
{:specialFromValue (first args)
:to (second args)
:label (if (> (count args) 2) (nth args 2))})
)
(defn transform-recode-rule_original
[& args]
(m/filter-vals
some?
(if (string? (first args))
{:specialFromValue (first args)
:to (second args)
:label (if (> (count args) 2) (nth args 2))}
;; else
(let [values (filter number? (first args))
ranges (filter map? (first args))]
(cond-> {:to (second args), :label (if (> (count args) 2) (nth args 2))}
(not-empty values) (assoc :fromValue values)
(not-empty ranges) (assoc :fromValueRange ranges)
)
)
))
)
(defn make-seq-comparable [s]
(let [digit-set #{\0 \1 \2 \3 \4 \5 \6 \7 \8 \9}]
{:content s
......@@ -201,7 +162,7 @@
:Command "collapse"
:GroupByVariables (map variable-symbol-expression by-vars)
:AggregateVariables (reduce #(concat (make-compute-list %1) (make-compute-list %2)) aggregations)
:WeightVariable (variable-symbol-expression weight)
:WeightVariable weight
:Condition filter
:Cellwise (contains? options :cw)
:mod-varlist (concat
......
(ns ^{
:author "Ole Voldsæter"
:author "Ole Voldsæter, Sigbjørn Revheim"
:doc "Misc. functions and dynamic variables"
} stata2sdtl.utils
(:require
[clj-diff.core :refer [diff]])